diff --git a/.github/workflows/test-pr.yml b/.github/workflows/test-pr.yml index 89ad036616..063ea26483 100644 --- a/.github/workflows/test-pr.yml +++ b/.github/workflows/test-pr.yml @@ -7,6 +7,7 @@ on: pull_request: branches: - master + - path-base-implementing concurrency: group: ${{ github.head_ref || github.run_id }} diff --git a/accounts/abi/bind/backends/simulated.go b/accounts/abi/bind/backends/simulated.go index 999365734c..60e39a34e8 100644 --- a/accounts/abi/bind/backends/simulated.go +++ b/accounts/abi/bind/backends/simulated.go @@ -43,6 +43,7 @@ import ( "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/rpc" + "github.com/ethereum/go-ethereum/trie" ) // This nil assignment ensures at compile time that SimulatedBackend implements bind.ContractBackend. @@ -77,8 +78,8 @@ type SimulatedBackend struct { // A simulated backend always uses chainID 1337. func NewSimulatedBackendWithDatabase(database ethdb.Database, alloc core.GenesisAlloc, gasLimit uint64) *SimulatedBackend { genesis := core.Genesis{Config: params.AllEthashProtocolChanges, GasLimit: gasLimit, Alloc: alloc} - genesis.MustCommit(database) - blockchain, _ := core.NewBlockChain(database, nil, genesis.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + genesis.MustCommit(database, trie.NewDatabase(database, trie.HashDefaults)) + blockchain, _ := core.NewBlockChain(database, nil, &genesis, nil, ethash.NewFaker(), vm.Config{}, nil, nil) backend := &SimulatedBackend{ database: database, @@ -180,7 +181,6 @@ func (b *SimulatedBackend) CodeAt(ctx context.Context, contract common.Address, if err != nil { return nil, err } - return stateDB.GetCode(contract), nil } @@ -193,7 +193,6 @@ func (b *SimulatedBackend) BalanceAt(ctx context.Context, contract common.Addres if err != nil { return nil, err } - return stateDB.GetBalance(contract), nil } @@ -206,7 +205,6 @@ func (b *SimulatedBackend) NonceAt(ctx context.Context, contract common.Address, if err != nil { return 0, err } - return stateDB.GetNonce(contract), nil } @@ -219,7 +217,6 @@ func (b *SimulatedBackend) StorageAt(ctx context.Context, contract common.Addres if err != nil { return nil, err } - val := stateDB.GetState(contract, key) return val[:], nil } @@ -666,7 +663,10 @@ func (b *SimulatedBackend) SendTransaction(ctx context.Context, tx *types.Transa } block.AddTxWithChain(b.blockchain, tx) }, true) - stateDB, _ := b.blockchain.State() + stateDB, err := b.blockchain.State() + if err != nil { + return err + } b.pendingBlock = blocks[0] b.pendingState, _ = state.New(b.pendingBlock.Root(), stateDB.Database(), nil) @@ -781,11 +781,13 @@ func (b *SimulatedBackend) AdjustTime(adjustment time.Duration) error { blocks, _ := core.GenerateChain(b.config, b.blockchain.CurrentBlock(), ethash.NewFaker(), b.database, 1, func(number int, block *core.BlockGen) { block.OffsetTime(int64(adjustment.Seconds())) }, true) - stateDB, _ := b.blockchain.State() + stateDB, err := b.blockchain.State() + if err != nil { + return err + } b.pendingBlock = blocks[0] b.pendingState, _ = state.New(b.pendingBlock.Root(), stateDB.Database(), nil) - return nil } diff --git a/cmd/devp2p/internal/ethtest/chain.go b/cmd/devp2p/internal/ethtest/chain.go index 7dcb412b53..d75d8d0a45 100644 --- a/cmd/devp2p/internal/ethtest/chain.go +++ b/cmd/devp2p/internal/ethtest/chain.go @@ -133,7 +133,7 @@ func loadChain(chainfile string, genesis string) (*Chain, error) { if err != nil { return nil, err } - gblock := gen.ToBlock(nil) + gblock := gen.ToBlock() blocks, err := blocksFromFile(chainfile, gblock) if err != nil { diff --git a/cmd/devp2p/internal/ethtest/suite_test.go b/cmd/devp2p/internal/ethtest/suite_test.go index 50380b989c..4754e81e1b 100644 --- a/cmd/devp2p/internal/ethtest/suite_test.go +++ b/cmd/devp2p/internal/ethtest/suite_test.go @@ -91,8 +91,6 @@ func setupGeth(stack *node.Node) error { Genesis: &chain.genesis, NetworkId: chain.genesis.Config.ChainID.Uint64(), // 19763 DatabaseCache: 10, - TrieCleanCache: 10, - TrieCleanCacheJournal: "", TrieCleanCacheRejournal: 60 * time.Minute, TrieDirtyCache: 16, TrieTimeout: 60 * time.Minute, @@ -101,6 +99,7 @@ func setupGeth(stack *node.Node) error { if err != nil { return err } + backend.SetSynced() _, err = backend.BlockChain().InsertChain(chain.blocks[1:], nil) return err diff --git a/cmd/evm/internal/t8ntool/execution.go b/cmd/evm/internal/t8ntool/execution.go index 5a75d142b4..b3773b9fa2 100644 --- a/cmd/evm/internal/t8ntool/execution.go +++ b/cmd/evm/internal/t8ntool/execution.go @@ -242,7 +242,7 @@ func (pre *Prestate) Apply(vmConfig vm.Config, chainConfig *params.ChainConfig, statedb.AddBalance(pre.Env.Coinbase, minerReward) } // Commit block - root, err := statedb.Commit(chainConfig.IsEIP158(vmContext.BlockNumber)) + root, err := statedb.Commit(vmContext.BlockNumber.Uint64(), chainConfig.IsEIP158(vmContext.BlockNumber)) if err != nil { fmt.Fprintf(os.Stderr, "Could not commit state: %v", err) return nil, nil, NewError(ErrorEVM, fmt.Errorf("could not commit state: %v", err)) @@ -258,11 +258,17 @@ func (pre *Prestate) Apply(vmConfig vm.Config, chainConfig *params.ChainConfig, Difficulty: (*math.HexOrDecimal256)(vmContext.Difficulty), GasUsed: (math.HexOrDecimal64)(gasUsed), } + // Re-create statedb instance with new root upon the updated database + // for accessing latest states. + statedb, err = state.New(root, statedb.Database(), nil) + if err != nil { + return nil, nil, NewError(ErrorEVM, fmt.Errorf("could not reopen state: %v", err)) + } return statedb, execRs, nil } func MakePreState(db ethdb.Database, accounts core.GenesisAlloc) *state.StateDB { - sdb := state.NewDatabase(db) + sdb := state.NewDatabaseWithConfig(db, &trie.Config{Preimages: true}) statedb, _ := state.New(common.Hash{}, sdb, nil) for addr, a := range accounts { statedb.SetCode(addr, a.Code) @@ -273,7 +279,7 @@ func MakePreState(db ethdb.Database, accounts core.GenesisAlloc) *state.StateDB } } // Commit and re-open to start with a clean state. - root, _ := statedb.Commit(false) + root, _ := statedb.Commit(0, false) statedb, _ = state.New(root, sdb, nil) return statedb } diff --git a/cmd/evm/runner.go b/cmd/evm/runner.go index 9ee248f0f7..4a060f3e11 100644 --- a/cmd/evm/runner.go +++ b/cmd/evm/runner.go @@ -20,8 +20,6 @@ import ( "bytes" "encoding/json" "fmt" - "github.com/ethereum/go-ethereum/eth/tracers/logger" - "github.com/ethereum/go-ethereum/internal/flags" "io/ioutil" "math/big" "os" @@ -30,12 +28,18 @@ import ( "testing" "time" + "github.com/ethereum/go-ethereum/eth/tracers/logger" + "github.com/ethereum/go-ethereum/internal/flags" + "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/triedb/hashdb" + "github.com/ethereum/go-ethereum/cmd/evm/internal/compiler" "github.com/ethereum/go-ethereum/cmd/utils" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/state" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/core/vm" "github.com/ethereum/go-ethereum/core/vm/runtime" "github.com/ethereum/go-ethereum/log" @@ -138,11 +142,22 @@ func runCmd(ctx *cli.Context) error { gen := readGenesis(ctx.String(GenesisFlag.Name)) genesisConfig = gen db := rawdb.NewMemoryDatabase() - genesis := gen.ToBlock(db) - statedb, _ = state.New(genesis.Root(), state.NewDatabase(db), nil) + triedb := trie.NewDatabase(db, &trie.Config{ + HashDB: hashdb.Defaults, + }) + defer triedb.Close() + genesis := gen.MustCommit(db, triedb) + sdb := state.NewDatabaseWithNodeDB(db, triedb) + statedb, _ = state.New(genesis.Root(), sdb, nil) chainConfig = gen.Config } else { - statedb, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil) + db := rawdb.NewMemoryDatabase() + triedb := trie.NewDatabase(db, &trie.Config{ + HashDB: hashdb.Defaults, + }) + defer triedb.Close() + sdb := state.NewDatabaseWithNodeDB(db, triedb) + statedb, _ = state.New(types.EmptyRootHash, sdb, nil) genesisConfig = new(core.Genesis) } if ctx.String(SenderFlag.Name) != "" { @@ -269,7 +284,7 @@ func runCmd(ctx *cli.Context) error { output, leftOverGas, stats, err := timedExec(bench, execFunc) if ctx.Bool(DumpFlag.Name) { - statedb.Commit(true) + statedb.Commit(0, true) statedb.IntermediateRoot(true) fmt.Println(string(statedb.Dump(nil))) } diff --git a/cmd/evm/staterunner.go b/cmd/evm/staterunner.go index 0e05237343..506f789a8a 100644 --- a/cmd/evm/staterunner.go +++ b/cmd/evm/staterunner.go @@ -20,11 +20,14 @@ import ( "encoding/json" "errors" "fmt" - "github.com/ethereum/go-ethereum/eth/tracers/logger" "io/ioutil" "os" + "github.com/ethereum/go-ethereum/eth/tracers/logger" + + "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/state" + "github.com/ethereum/go-ethereum/core/state/snapshot" "github.com/ethereum/go-ethereum/core/vm" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/tests" @@ -96,21 +99,22 @@ func stateTestCmd(ctx *cli.Context) error { results := make([]StatetestResult, 0, len(tests)) for key, test := range tests { for _, st := range test.Subtests() { + dump := ctx.Bool(DumpFlag.Name) // Run the test and aggregate the result result := &StatetestResult{Name: key, Fork: st.Fork, Pass: true} - _, s, err := test.Run(st, cfg, false) - // print state root for evmlab tracing - if ctx.Bool(MachineFlag.Name) && s != nil { - fmt.Fprintf(os.Stderr, "{\"stateRoot\": \"%x\"}\n", s.IntermediateRoot(false)) - } - if err != nil { - // Test failed, mark as so and dump any state to aid debugging - result.Pass, result.Error = false, err.Error() - if ctx.Bool(DumpFlag.Name) && s != nil { - dump := s.RawDump(nil) - result.State = &dump + test.Run(st, cfg, false, rawdb.HashScheme, func(err error, snaps *snapshot.Tree, state *state.StateDB) { + if err != nil { + // Test failed, mark as so and dump any state to aid debugging + result.Pass, result.Error = false, err.Error() + if dump { + dump := state.RawDump(nil) + result.State = &dump + } } - } + if ctx.Bool(MachineFlag.Name) && state != nil { + fmt.Fprintf(os.Stderr, "{\"stateRoot\": \"%x\"}\n", state.IntermediateRoot(false)) + } + }) results = append(results, *result) diff --git a/cmd/faucet/faucet.go b/cmd/faucet/faucet.go index 5ec4960080..4ae98ed605 100644 --- a/cmd/faucet/faucet.go +++ b/cmd/faucet/faucet.go @@ -251,7 +251,7 @@ func newFaucet(genesis *core.Genesis, port int, enodes []*enode.Node, network ui cfg.SyncMode = downloader.LightSync cfg.NetworkId = network cfg.Genesis = genesis - utils.SetDNSDiscoveryDefaults(&cfg, genesis.ToBlock(nil).Hash()) + utils.SetDNSDiscoveryDefaults(&cfg, genesis.ToBlock().Hash()) lesBackend, err := les.New(stack, &cfg) if err != nil { diff --git a/cmd/ronin/chaincmd.go b/cmd/ronin/chaincmd.go index b53a5c8951..128b9b3c9f 100644 --- a/cmd/ronin/chaincmd.go +++ b/cmd/ronin/chaincmd.go @@ -51,6 +51,9 @@ var ( utils.DataDirFlag, utils.DBEngineFlag, utils.ForceOverrideChainConfigFlag, + utils.CachePreimagesFlag, + utils.StateSchemeFlag, + utils.AncientFlag, }, Category: "BLOCKCHAIN COMMANDS", Description: ` @@ -105,6 +108,9 @@ The dumpgenesis command dumps the genesis block configuration in JSON format to utils.MetricsInfluxDBBucketFlag, utils.MetricsInfluxDBOrganizationFlag, utils.TxLookupLimitFlag, + utils.TransactionHistoryFlag, + utils.StateSchemeFlag, + utils.StateHistoryFlag, }, Category: "BLOCKCHAIN COMMANDS", Description: ` @@ -124,6 +130,7 @@ processing will proceed even if an individual RLP-file import failure occurs.`, utils.DBEngineFlag, utils.CacheFlag, utils.SyncModeFlag, + utils.StateSchemeFlag, }, Category: "BLOCKCHAIN COMMANDS", Description: ` @@ -220,15 +227,18 @@ func initGenesis(ctx *cli.Context) error { defer stack.Close() for _, name := range []string{"chaindata", "lightchaindata"} { - chaindb, err := stack.OpenDatabase(name, 0, 0, "", false) + chaindb, err := stack.OpenDatabaseWithFreezer(name, 0, 0, ctx.String(utils.AncientFlag.Name), "", false) if err != nil { utils.Fatalf("Failed to open database: %v", err) } - _, hash, err := core.SetupGenesisBlock(chaindb, genesis, overrideChainConfig) + // Create triedb firstly + + triedb := utils.MakeTrieDatabase(ctx, chaindb, ctx.Bool(utils.CachePreimagesFlag.Name), false) + defer chaindb.Close() + _, hash, err := core.SetupGenesisBlock(chaindb, triedb, genesis, overrideChainConfig) if err != nil { utils.Fatalf("Failed to write genesis block: %v", err) } - chaindb.Close() log.Info("Successfully wrote genesis state", "database", name, "hash", hash) } return nil @@ -466,7 +476,10 @@ func dump(ctx *cli.Context) error { if err != nil { return err } - state, err := state.New(root, state.NewDatabase(db), nil) + triedb := utils.MakeTrieDatabase(ctx, db, true, false) // always enable preimage lookup + defer triedb.Close() + state, err := state.New(root, state.NewDatabaseWithNodeDB(db, triedb), nil) + if err != nil { return err } diff --git a/cmd/ronin/config.go b/cmd/ronin/config.go index cae2e3bc30..d3bf084fbb 100644 --- a/cmd/ronin/config.go +++ b/cmd/ronin/config.go @@ -287,6 +287,10 @@ func deprecated(field string) bool { return true case "ethconfig.Config.EWASMInterpreter": return true + case "ethconfig.Config.TrieCleanCacheJournal": + return true + case "ethconfig.Config.TrieCleanCacheRejournal": + return true default: return false } diff --git a/cmd/ronin/dbcmd.go b/cmd/ronin/dbcmd.go index 56fe03a3bd..1de4223294 100644 --- a/cmd/ronin/dbcmd.go +++ b/cmd/ronin/dbcmd.go @@ -23,7 +23,6 @@ import ( "os" "os/signal" "path/filepath" - "sort" "strconv" "strings" "syscall" @@ -181,7 +180,7 @@ WARNING: This is a low-level operation which may cause database corruption!`, Action: dbDumpTrie, Name: "dumptrie", Usage: "Show the storage key/values of a given storage trie", - ArgsUsage: " ", + ArgsUsage: " ", Flags: []cli.Flag{ utils.DataDirFlag, utils.DBEngineFlag, @@ -191,14 +190,15 @@ WARNING: This is a low-level operation which may cause database corruption!`, utils.SepoliaFlag, utils.RinkebyFlag, utils.GoerliFlag, + utils.StateSchemeFlag, }, Description: "This command looks up the specified database key from the database.", } dbDumpFreezerIndex = &cli.Command{ Action: freezerInspect, Name: "freezer-index", - Usage: "Dump out the index of a given freezer type", - ArgsUsage: " ", + Usage: "Dump out the index of a specific freezer table", + ArgsUsage: " ", Flags: []cli.Flag{ utils.DataDirFlag, utils.DBEngineFlag, @@ -248,60 +248,73 @@ WARNING: This is a low-level operation which may cause database corruption!`, func removeDB(ctx *cli.Context) error { stack, config := makeConfigNode(ctx) - // Remove the full node state database - path := stack.ResolvePath("chaindata") - if common.FileExist(path) { - confirmAndRemoveDB(path, "full node state database") - } else { - log.Info("Full node state database missing", "path", path) - } - // Remove the full node ancient database - path = config.Eth.DatabaseFreezer + // Resolve folder paths. + var ( + rootDir = stack.ResolvePath("chaindata") + ancientDir = config.Eth.DatabaseFreezer + ) switch { - case path == "": - path = filepath.Join(stack.ResolvePath("chaindata"), "ancient") - case !filepath.IsAbs(path): - path = config.Node.ResolvePath(path) - } - if common.FileExist(path) { - confirmAndRemoveDB(path, "full node ancient database") - } else { - log.Info("Full node ancient database missing", "path", path) - } - // Remove the light node database - path = stack.ResolvePath("lightchaindata") - if common.FileExist(path) { - confirmAndRemoveDB(path, "light node database") - } else { - log.Info("Light node database missing", "path", path) - } + case ancientDir == "": + ancientDir = filepath.Join(stack.ResolvePath("chaindata"), "ancient") + case !filepath.IsAbs(ancientDir): + ancientDir = config.Node.ResolvePath(ancientDir) + } + // Delete state data + statePaths := []string{rootDir, filepath.Join(ancientDir, rawdb.StateFreezerName)} + confirmAndRemoveDB(statePaths, "state data") + + // Delete ancient chain + chainPaths := []string{filepath.Join(ancientDir, rawdb.ChainFreezerName)} + confirmAndRemoveDB(chainPaths, "ancient chain") return nil } +// removeFolder deletes all files (not folders) inside the directory 'dir' (but +// not files in subfolders). +func removeFolder(dir string) { + filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { + // If we're at the top level folder, recurse into + if path == dir { + return nil + } + // Delete all the files, but not subfolders + if !info.IsDir() { + os.Remove(path) + return nil + } + return filepath.SkipDir + }) +} + // confirmAndRemoveDB prompts the user for a last confirmation and removes the -// folder if accepted. -func confirmAndRemoveDB(database string, kind string) { - confirm, err := prompt.Stdin.PromptConfirm(fmt.Sprintf("Remove %s (%s)?", kind, database)) +// list of folders if accepted. +func confirmAndRemoveDB(paths []string, kind string) { + msg := fmt.Sprintf("Location(s) of '%s': \n", kind) + for _, path := range paths { + msg += fmt.Sprintf("\t- %s\n", path) + } + fmt.Println(msg) + + confirm, err := prompt.Stdin.PromptConfirm(fmt.Sprintf("Remove '%s'?", kind)) switch { case err != nil: utils.Fatalf("%v", err) case !confirm: - log.Info("Database deletion skipped", "path", database) + log.Info("Database deletion skipped", "kind", kind, "paths", paths) default: - start := time.Now() - filepath.Walk(database, func(path string, info os.FileInfo, err error) error { - // If we're at the top level folder, recurse into - if path == database { - return nil + var ( + deleted []string + start = time.Now() + ) + for _, path := range paths { + if common.FileExist(path) { + removeFolder(path) + deleted = append(deleted, path) + } else { + log.Info("Folder is not existent", "path", path) } - // Delete all the files, but not subfolders - if !info.IsDir() { - os.Remove(path) - return nil - } - return filepath.SkipDir - }) - log.Info("Database successfully deleted", "path", database, "elapsed", common.PrettyDuration(time.Since(start))) + } + log.Info("Database successfully deleted", "kind", kind, "paths", deleted, "elapsed", common.PrettyDuration(time.Since(start))) } } @@ -311,7 +324,7 @@ func inspect(ctx *cli.Context) error { start []byte ) if ctx.NArg() > 2 { - return fmt.Errorf("Max 2 arguments: %v", ctx.Command.ArgsUsage) + return fmt.Errorf("max 2 arguments: %v", ctx.Command.ArgsUsage) } if ctx.NArg() >= 1 { if d, err := hexutil.Decode(ctx.Args().Get(0)); err != nil { @@ -336,7 +349,7 @@ func inspect(ctx *cli.Context) error { return rawdb.InspectDatabase(db, prefix, start) } -func showLeveldbStats(db ethdb.Stater) { +func showLeveldbStats(db ethdb.KeyValueStater) { if stats, err := db.Stat("leveldb.stats"); err != nil { log.Warn("Failed to read database stats", "error", err) } else { @@ -469,7 +482,7 @@ func dbPut(ctx *cli.Context) error { // dbDumpTrie shows the key-value slots of a given storage trie func dbDumpTrie(ctx *cli.Context) error { - if ctx.NArg() < 1 { + if ctx.NArg() < 3 { return fmt.Errorf("required arguments: %v", ctx.Command.ArgsUsage) } stack, _ := makeConfigNode(ctx) @@ -477,35 +490,51 @@ func dbDumpTrie(ctx *cli.Context) error { db := utils.MakeChainDatabase(ctx, stack, true) defer db.Close() + triedb := utils.MakeTrieDatabase(ctx, db, false, true) + defer triedb.Close() var ( - root []byte - start []byte - max = int64(-1) - err error + state []byte + storage []byte + account []byte + start []byte + max = int64(-1) + err error ) - if root, err = hexutil.Decode(ctx.Args().Get(0)); err != nil { - log.Info("Could not decode the root", "error", err) + if state, err = hexutil.Decode(ctx.Args().Get(0)); err != nil { + log.Info("Could not decode the state", "error", err) return err } - stRoot := common.BytesToHash(root) - if ctx.NArg() >= 2 { - if start, err = hexutil.Decode(ctx.Args().Get(1)); err != nil { + if account, err = hexutil.Decode(ctx.Args().Get(1)); err != nil { + log.Info("Could not decode the account hash", "error", err) + return err + } + if storage, err = hexutil.Decode(ctx.Args().Get(2)); err != nil { + log.Info("Could not decode the storage trie root", "error", err) + return err + } + if ctx.NArg() > 3 { + if start, err = hexutil.Decode(ctx.Args().Get(3)); err != nil { log.Info("Could not decode the seek position", "error", err) return err } } - if ctx.NArg() >= 3 { - if max, err = strconv.ParseInt(ctx.Args().Get(2), 10, 64); err != nil { + if ctx.NArg() > 4 { + if max, err = strconv.ParseInt(ctx.Args().Get(4), 10, 64); err != nil { log.Info("Could not decode the max count", "error", err) return err } } - theTrie, err := trie.New(stRoot, trie.NewDatabase(db)) + id := trie.StorageTrieID(common.BytesToHash(state), common.BytesToHash(account), common.BytesToHash(storage)) + theTrie, err := trie.New(id, triedb) + if err != nil { + return err + } + trieIt, err := theTrie.NodeIterator(start) if err != nil { return err } var count int64 - it := trie.NewIterator(theTrie.NodeIterator(start)) + it := trie.NewIterator(trieIt) for it.Next() { if max > 0 && count == max { fmt.Printf("Exiting after %d values\n", count) @@ -519,25 +548,19 @@ func dbDumpTrie(ctx *cli.Context) error { func freezerInspect(ctx *cli.Context) error { var ( - start, end int64 - disableSnappy bool - err error + start, end int64 + err error ) - if ctx.NArg() < 3 { + if ctx.NArg() < 4 { return fmt.Errorf("required arguments: %v", ctx.Command.ArgsUsage) } - kind := ctx.Args().Get(0) - if noSnap, ok := rawdb.FreezerNoSnappy[kind]; !ok { - var options []string - for opt := range rawdb.FreezerNoSnappy { - options = append(options, opt) - } - sort.Strings(options) - return fmt.Errorf("Could read freezer-type '%v'. Available options: %v", kind, options) - } else { - disableSnappy = noSnap - } - if start, err = strconv.ParseInt(ctx.Args().Get(1), 10, 64); err != nil { + + var ( + freezerType = ctx.Args().Get(0) + tableType = ctx.Args().Get(1) + ) + + if start, err = strconv.ParseInt(ctx.Args().Get(2), 10, 64); err != nil { log.Info("Could read start-param", "error", err) return err } @@ -545,16 +568,20 @@ func freezerInspect(ctx *cli.Context) error { log.Info("Could read count param", "error", err) return err } + stack, _ := makeConfigNode(ctx) defer stack.Close() - path := filepath.Join(stack.ResolvePath("chaindata"), "ancient") - log.Info("Opening freezer", "location", path, "name", kind) - if f, err := rawdb.NewFreezerTable(path, kind, disableSnappy); err != nil { - return err - } else { - f.DumpIndex(start, end) + // Open the Freezer Database with mode read-only + db := utils.MakeChainDatabase(ctx, stack, true) + defer db.Close() + + ancient, err := db.AncientDatadir() + if err != nil { + log.Info("Failed to retrive ancient root", "err", err) } - return nil + + return rawdb.InspectFreezerTable(ancient, freezerType, tableType, start, end) + } // ParseHexOrString tries to hexdecode b, but if the prefix is missing, it instead just returns the raw bytes diff --git a/cmd/ronin/genesis_test.go b/cmd/ronin/genesis_test.go index 2d4cb6a3ac..41b50b45d7 100644 --- a/cmd/ronin/genesis_test.go +++ b/cmd/ronin/genesis_test.go @@ -173,12 +173,12 @@ func TestCustomBackend(t *testing.T) { { // Can't start pebble on top of leveldb initArgs: []string{"--db.engine", "leveldb"}, execArgs: []string{"--db.engine", "pebble"}, - execExpect: `Fatal: Failed to register the Ethereum service: db.engine choice was pebble but found pre-existing leveldb database in specified data directory`, + execExpect: `Fatal: Could not open database: db.engine choice was pebble but found pre-existing leveldb database in specified data directory`, }, { // Can't start leveldb on top of pebble initArgs: []string{"--db.engine", "pebble"}, execArgs: []string{"--db.engine", "leveldb"}, - execExpect: `Fatal: Failed to register the Ethereum service: db.engine choice was leveldb but found pre-existing pebble database in specified data directory`, + execExpect: `Fatal: Could not open database: db.engine choice was leveldb but found pre-existing pebble database in specified data directory`, }, { // Reject invalid backend choice initArgs: []string{"--db.engine", "mssql"}, diff --git a/cmd/ronin/main.go b/cmd/ronin/main.go index 5144bc14a9..057080de40 100644 --- a/cmd/ronin/main.go +++ b/cmd/ronin/main.go @@ -101,7 +101,10 @@ var ( utils.ExitWhenSyncedFlag, utils.GCModeFlag, utils.SnapshotFlag, - utils.TxLookupLimitFlag, + utils.TxLookupLimitFlag, // deprecated + utils.TransactionHistoryFlag, + utils.StateSchemeFlag, + utils.StateHistoryFlag, utils.TriesInMemoryFlag, utils.LightServeFlag, utils.LightIngressFlag, @@ -118,8 +121,8 @@ var ( utils.CacheFlag, utils.CacheDatabaseFlag, utils.CacheTrieFlag, - utils.CacheTrieJournalFlag, - utils.CacheTrieRejournalFlag, + utils.CacheTrieJournalFlag, // deprecated + utils.CacheTrieRejournalFlag, // deprecated utils.CacheGCFlag, utils.CacheSnapshotFlag, utils.CacheNoPrefetchFlag, diff --git a/cmd/ronin/snapshot.go b/cmd/ronin/snapshot.go index 904bb72b99..23fd7d19dc 100644 --- a/cmd/ronin/snapshot.go +++ b/cmd/ronin/snapshot.go @@ -66,7 +66,6 @@ var ( utils.SepoliaFlag, utils.RinkebyFlag, utils.GoerliFlag, - utils.CacheTrieJournalFlag, utils.BloomFilterSizeFlag, }, Description: ` @@ -78,10 +77,7 @@ two version states are available: genesis and the specific one. The default pruning target is the HEAD-127 state. -WARNING: It's necessary to delete the trie clean cache after the pruning. -If you specify another directory for the trie clean cache via "--cache.trie.journal" -during the use of Geth, please also specify it here for correct deletion. Otherwise -the trie clean cache with default directory will be deleted. +WARNING: it's only supported in hash mode(--state.scheme=hash)". `, }, { @@ -98,6 +94,7 @@ the trie clean cache with default directory will be deleted. utils.SepoliaFlag, utils.RinkebyFlag, utils.GoerliFlag, + utils.StateSchemeFlag, }, Description: ` geth snapshot verify-state @@ -120,6 +117,7 @@ In other words, this command does the snapshot to trie conversion. utils.SepoliaFlag, utils.RinkebyFlag, utils.GoerliFlag, + utils.StateSchemeFlag, }, Description: ` geth snapshot traverse-state @@ -144,6 +142,7 @@ It's also usable without snapshot enabled. utils.SepoliaFlag, utils.RinkebyFlag, utils.GoerliFlag, + utils.StateSchemeFlag, }, Description: ` geth snapshot traverse-rawstate @@ -173,6 +172,7 @@ It's also usable without snapshot enabled. utils.ExcludeStorageFlag, utils.StartKeyFlag, utils.DumpLimitFlag, + utils.StateSchemeFlag, }, Description: ` This command is semantically equivalent to 'geth dump', but uses the snapshots @@ -186,12 +186,18 @@ block is used. } ) +// Deprecation: this command should be deprecated once the hash-based +// scheme is deprecated. func pruneState(ctx *cli.Context) error { - stack, config := makeConfigNode(ctx) + stack, _ := makeConfigNode(ctx) defer stack.Close() chaindb := utils.MakeChainDatabase(ctx, stack, false) - pruner, err := pruner.NewPruner(chaindb, stack.ResolvePath(""), stack.ResolvePath(config.Eth.TrieCleanCacheJournal), ctx.Uint64(utils.BloomFilterSizeFlag.Name)) + if rawdb.ReadStateScheme(chaindb) != rawdb.HashScheme { + log.Crit("Offline pruning is not required for path scheme") + } + pruner, err := pruner.NewPruner(chaindb, stack.ResolvePath(""), + ctx.Uint64(utils.BloomFilterSizeFlag.Name)) if err != nil { log.Error("Failed to open snapshot tree", "err", err) return err @@ -225,7 +231,10 @@ func verifyState(ctx *cli.Context) error { log.Error("Failed to load head block") return errors.New("no head block") } - snaptree, err := snapshot.New(chaindb, trie.NewDatabase(chaindb), 256, headBlock.Root(), false, false, false) + triedb := utils.MakeTrieDatabase(ctx, chaindb, false, true) + defer triedb.Close() + + snaptree, err := snapshot.New(chaindb, triedb, 256, headBlock.Root(), false, false, false) if err != nil { log.Error("Failed to open snapshot tree", "err", err) return err @@ -258,6 +267,10 @@ func traverseState(ctx *cli.Context) error { defer stack.Close() chaindb := utils.MakeChainDatabase(ctx, stack, true) + defer chaindb.Close() + + triedb := utils.MakeTrieDatabase(ctx, chaindb, false, true) + defer triedb.Close() headBlock := rawdb.ReadHeadBlock(chaindb) if headBlock == nil { log.Error("Failed to load head block") @@ -282,8 +295,7 @@ func traverseState(ctx *cli.Context) error { root = headBlock.Root() log.Info("Start traversing the state", "root", root, "number", headBlock.NumberU64()) } - triedb := trie.NewDatabase(chaindb) - t, err := trie.NewSecure(root, triedb) + t, err := trie.NewSecure(trie.StateTrieID(root), triedb) if err != nil { log.Error("Failed to open trie", "root", root, "err", err) return err @@ -295,7 +307,12 @@ func traverseState(ctx *cli.Context) error { lastReport time.Time start = time.Now() ) - accIter := trie.NewIterator(t.NodeIterator(nil)) + acctIt, err := t.NodeIterator(nil) + if err != nil { + log.Error("Failed to open iterator", "root", root, "err", err) + return err + } + accIter := trie.NewIterator(acctIt) for accIter.Next() { accounts += 1 var acc types.StateAccount @@ -304,12 +321,17 @@ func traverseState(ctx *cli.Context) error { return err } if acc.Root != emptyRoot { - storageTrie, err := trie.NewSecure(acc.Root, triedb) + storageTrie, err := trie.NewSecure(trie.StorageTrieID(root, common.BytesToHash(accIter.Key), acc.Root), triedb) if err != nil { log.Error("Failed to open storage trie", "root", acc.Root, "err", err) return err } - storageIter := trie.NewIterator(storageTrie.NodeIterator(nil)) + storageIt, err := storageTrie.NodeIterator(nil) + if err != nil { + log.Error("Failed to open storage iterator", "root", acc.Root, "err", err) + return err + } + storageIter := trie.NewIterator(storageIt) for storageIter.Next() { slots += 1 } @@ -348,6 +370,10 @@ func traverseRawState(ctx *cli.Context) error { defer stack.Close() chaindb := utils.MakeChainDatabase(ctx, stack, true) + defer chaindb.Close() + + triedb := utils.MakeTrieDatabase(ctx, chaindb, false, true) + defer triedb.Close() headBlock := rawdb.ReadHeadBlock(chaindb) if headBlock == nil { log.Error("Failed to load head block") @@ -372,8 +398,7 @@ func traverseRawState(ctx *cli.Context) error { root = headBlock.Root() log.Info("Start traversing the state", "root", root, "number", headBlock.NumberU64()) } - triedb := trie.NewDatabase(chaindb) - t, err := trie.NewSecure(root, triedb) + t, err := trie.NewSecure(trie.StateTrieID(root), triedb) if err != nil { log.Error("Failed to open trie", "root", root, "err", err) return err @@ -386,16 +411,25 @@ func traverseRawState(ctx *cli.Context) error { lastReport time.Time start = time.Now() ) - accIter := t.NodeIterator(nil) + accIter, err := t.NodeIterator(nil) + if err != nil { + log.Error("Failed to open iterator", "root", root, "err", err) + return err + } + // using reader. + //reader, err := triedb.Reader(root) + if err != nil { + log.Error("State is non-existent", "root", root) + return nil + } for accIter.Next(true) { nodes += 1 node := accIter.Hash() if node != (common.Hash{}) { - // Check the present for non-empty hash node(embedded node doesn't + // Check the presence for non-empty hash node(embedded node doesn't // have their own hash). - blob := rawdb.ReadTrieNode(chaindb, node) - if len(blob) == 0 { + if !rawdb.HasLegacyTrieNode(chaindb, node) { log.Error("Missing trie node(account)", "hash", node) return errors.New("missing account") } @@ -410,12 +444,16 @@ func traverseRawState(ctx *cli.Context) error { return errors.New("invalid account") } if acc.Root != emptyRoot { - storageTrie, err := trie.NewSecure(acc.Root, triedb) + storageTrie, err := trie.NewSecure(trie.StorageTrieID(root, common.BytesToHash(accIter.LeafKey()), acc.Root), triedb) if err != nil { log.Error("Failed to open storage trie", "root", acc.Root, "err", err) return errors.New("missing storage trie") } - storageIter := storageTrie.NodeIterator(nil) + storageIter, err := storageTrie.NodeIterator(nil) + if err != nil { + log.Error("Failed to open storage iterator", "root", acc.Root, "err", err) + return err + } for storageIter.Next(true) { nodes += 1 node := storageIter.Hash() @@ -423,8 +461,7 @@ func traverseRawState(ctx *cli.Context) error { // Check the present for non-empty hash node(embedded node doesn't // have their own hash). if node != (common.Hash{}) { - blob := rawdb.ReadTrieNode(chaindb, node) - if len(blob) == 0 { + if !rawdb.HasLegacyTrieNode(chaindb, node) { log.Error("Missing trie node(storage)", "hash", node) return errors.New("missing storage") } @@ -477,7 +514,9 @@ func dumpState(ctx *cli.Context) error { if err != nil { return err } - snaptree, err := snapshot.New(db, trie.NewDatabase(db), 256, root, false, false, false) + triedb := utils.MakeTrieDatabase(ctx, db, false, true) + defer triedb.Close() + snaptree, err := snapshot.New(db, triedb, 256, root, false, false, false) if err != nil { return err } @@ -498,14 +537,14 @@ func dumpState(ctx *cli.Context) error { Root common.Hash `json:"root"` }{root}) for accIt.Next() { - account, err := snapshot.FullAccount(accIt.Account()) + account, err := types.FullAccount(accIt.Account()) if err != nil { return err } da := &state.DumpAccount{ Balance: account.Balance.String(), Nonce: account.Nonce, - Root: account.Root, + Root: account.Root.Bytes(), CodeHash: account.CodeHash, SecureKey: accIt.Hash().Bytes(), } diff --git a/cmd/utils/flags.go b/cmd/utils/flags.go index 1a5377eeb0..8f480b9c17 100644 --- a/cmd/utils/flags.go +++ b/cmd/utils/flags.go @@ -31,6 +31,9 @@ import ( "time" "github.com/ethereum/go-ethereum/internal/flags" + "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/triedb/hashdb" + "github.com/ethereum/go-ethereum/trie/triedb/pathdb" pcsclite "github.com/gballet/go-libpcsclite" gopsutil "github.com/shirou/gopsutil/mem" @@ -92,7 +95,7 @@ var ( } AncientFlag = &flags.DirectoryFlag{ Name: "datadir.ancient", - Usage: "Data directory for ancient chain segments (default = inside chaindata)", + Usage: "Root directory for ancient data (default = inside chaindata)", Category: flags.EthCategory, } MinFreeDiskSpaceFlag = &flags.DirectoryFlag{ @@ -225,18 +228,30 @@ var ( Value: "full", Category: flags.StateCategory, } + StateSchemeFlag = &cli.StringFlag{ + Name: "state.scheme", + Usage: `State scheme to use for trie storage ("hash" or "path")`, + Value: rawdb.HashScheme, // Default to hash scheme + Category: flags.StateCategory, + } + StateHistoryFlag = &cli.Uint64Flag{ + Name: "history.state", + Usage: "Number of recent blocks to retain state history for (default = 90,000 blocks, 0 = entire chain)", + Value: ethconfig.Defaults.StateHistory, + Category: flags.StateCategory, + } + TransactionHistoryFlag = &cli.Uint64Flag{ + Name: "history.transactions", + Usage: "Number of recent blocks to maintain transactions index for (default = about one year, 0 = entire chain)", + Value: ethconfig.Defaults.TransactionHistory, + Category: flags.StateCategory, + } SnapshotFlag = &cli.BoolFlag{ Name: "snapshot", Usage: `Enables snapshot-database mode (default = enable)`, Value: true, Category: flags.EthCategory, } - TxLookupLimitFlag = &cli.Uint64Flag{ - Name: "txlookuplimit", - Usage: "Number of recent blocks to maintain transactions index for (default = about one year, 0 = entire chain)", - Value: ethconfig.Defaults.TxLookupLimit, - Category: flags.EthCategory, - } TriesInMemoryFlag = &cli.IntFlag{ Name: "triesinmemory", Usage: "The number of tries is kept in memory before pruning (default = 128)", @@ -479,18 +494,6 @@ var ( Value: 15, Category: flags.PerfCategory, } - CacheTrieJournalFlag = &cli.StringFlag{ - Name: "cache.trie.journal", - Usage: "Disk journal directory for trie cache to survive node restarts", - Value: ethconfig.Defaults.TrieCleanCacheJournal, - Category: flags.PerfCategory, - } - CacheTrieRejournalFlag = &cli.DurationFlag{ - Name: "cache.trie.rejournal", - Usage: "Time interval to regenerate the trie cache journal", - Value: ethconfig.Defaults.TrieCleanCacheRejournal, - Category: flags.PerfCategory, - } CacheGCFlag = &cli.IntFlag{ Name: "cache.gc", Usage: "Percentage of cache memory allowance to use for trie pruning (default = 25% full mode, 0% archive mode)", @@ -1847,17 +1850,12 @@ func SetEthConfig(ctx *cli.Context, stack *node.Node, cfg *ethconfig.Config) { CheckExclusive(ctx, MainnetFlag, DeveloperFlag, RopstenFlag, RinkebyFlag, GoerliFlag, SepoliaFlag) CheckExclusive(ctx, LightServeFlag, SyncModeFlag, "light") CheckExclusive(ctx, DeveloperFlag, ExternalSignerFlag) // Can't use both ephemeral unlocked and external signer - if ctx.String(GCModeFlag.Name) == "archive" && ctx.Uint64(TxLookupLimitFlag.Name) != 0 { - ctx.Set(TxLookupLimitFlag.Name, "0") - log.Warn("Disable transaction unindexing for archive node") - } - if ctx.IsSet(LightServeFlag.Name) && ctx.Uint64(TxLookupLimitFlag.Name) != 0 { - log.Warn("LES server cannot serve old transaction status and cannot connect below les/4 protocol version if transaction lookup index is limited") - } + var ks *keystore.KeyStore if keystores := stack.AccountManager().Backends(keystore.KeyStoreType); len(keystores) > 0 { ks = keystores[0].(*keystore.KeyStore) } + // Set configurations from CLI flags. setEtherbase(ctx, ks, cfg) setGPO(ctx, &cfg.GPO, ctx.String(SyncModeFlag.Name) == "light") setTxPool(ctx, &cfg.TxPool) @@ -1916,17 +1914,44 @@ func SetEthConfig(ctx *cli.Context, stack *node.Node, cfg *ethconfig.Config) { cfg.Preimages = true log.Info("Enabling recording of key preimages since archive mode is used") } - if ctx.IsSet(TxLookupLimitFlag.Name) { - cfg.TxLookupLimit = ctx.Uint64(TxLookupLimitFlag.Name) + if ctx.IsSet(StateHistoryFlag.Name) { + cfg.StateHistory = ctx.Uint64(StateHistoryFlag.Name) } - if ctx.IsSet(CacheFlag.Name) || ctx.IsSet(CacheTrieFlag.Name) { - cfg.TrieCleanCache = ctx.Int(CacheFlag.Name) * ctx.Int(CacheTrieFlag.Name) / 100 + + /* State Scheme Config logic */ + // Parse the state scheme from chaindb firstly. + chaindb := tryMakeReadOnlyDatabase(ctx, stack) + scheme, err := ParseStateScheme(ctx, chaindb) + chaindb.Close() + if err != nil { + Fatalf("%v", err) } - if ctx.IsSet(CacheTrieJournalFlag.Name) { - cfg.TrieCleanCacheJournal = ctx.String(CacheTrieJournalFlag.Name) + cfg.StateScheme = scheme + + // Parse transaction history flag, if user is still using legacy config + // file with 'TxLookupLimit' configured, copy the value to 'TransactionHistory'. + if cfg.TransactionHistory == ethconfig.Defaults.TransactionHistory && cfg.TxLookupLimit != ethconfig.Defaults.TxLookupLimit { + log.Warn("The config option 'TxLookupLimit' is deprecated and will be removed, please use 'TransactionHistory'") + cfg.TransactionHistory = cfg.TxLookupLimit } - if ctx.IsSet(CacheTrieRejournalFlag.Name) { - cfg.TrieCleanCacheRejournal = ctx.Duration(CacheTrieRejournalFlag.Name) + + if ctx.IsSet(TransactionHistoryFlag.Name) { + cfg.TransactionHistory = ctx.Uint64(TransactionHistoryFlag.Name) + } else if ctx.IsSet(TxLookupLimitFlag.Name) { + log.Warn("The flag --txlookuplimit is deprecated and will be removed, please use --history.transactions") + cfg.TransactionHistory = ctx.Uint64(TransactionHistoryFlag.Name) + } + + if ctx.String(GCModeFlag.Name) == "archive" && cfg.TransactionHistory != 0 { + cfg.TransactionHistory = 0 + log.Warn("Disabled transaction unindexing for archive node") + } + if ctx.IsSet(LightServeFlag.Name) && cfg.TransactionHistory != 0 { + log.Warn("LES server cannot serve old transaction status and cannot connect below les/4 protocol version if transaction lookup index is limited") + } + + if ctx.IsSet(CacheFlag.Name) || ctx.IsSet(CacheTrieFlag.Name) { + cfg.TrieCleanCache = ctx.Int(CacheFlag.Name) * ctx.Int(CacheTrieFlag.Name) / 100 } if ctx.IsSet(CacheFlag.Name) || ctx.IsSet(CacheGCFlag.Name) { cfg.TrieDirtyCache = ctx.Int(CacheFlag.Name) * ctx.Int(CacheGCFlag.Name) / 100 @@ -2272,7 +2297,8 @@ func MakeGenesis(ctx *cli.Context) *core.Genesis { func MakeChain(ctx *cli.Context, stack *node.Node) (chain *core.BlockChain, chainDb ethdb.Database) { var err error chainDb = MakeChainDatabase(ctx, stack, false) // TODO(rjl493456442) support read-only database - config, _, err := core.SetupGenesisBlock(chainDb, MakeGenesis(ctx), false) + gpec := MakeGenesis(ctx) + config, err := core.LoadChainConfig(chainDb, gpec) if err != nil { Fatalf("%v", err) } @@ -2331,7 +2357,7 @@ func MakeChain(ctx *cli.Context, stack *node.Node) (chain *core.BlockChain, chai // TODO(rjl493456442) disable snapshot generation/wiping if the chain is read only. // Disable transaction indexing/unindexing by default. - chain, err = core.NewBlockChain(chainDb, cache, config, engine, vmcfg, nil, nil) + chain, err = core.NewBlockChain(chainDb, cache, gpec, nil, engine, vmcfg, nil, nil) if err != nil { Fatalf("Can't create BlockChain: %v", err) } @@ -2373,3 +2399,76 @@ func MakeConsolePreloads(ctx *cli.Context) []string { } return preloads } + +// tryMakeReadOnlyDatabase try to open the chain database in read-only mode, +// or fallback to write mode if the database is not initialized. +func tryMakeReadOnlyDatabase(ctx *cli.Context, stack *node.Node) ethdb.Database { + // If datadir doesn't exist we need to open db in write-mode + // so database engine can create files. + readonly := true + if !common.FileExist(stack.ResolvePath("chaindata")) { + readonly = false + } + return MakeChainDatabase(ctx, stack, readonly) +} + +// ParseStateScheme resolves scheme identifier from CLI flag. If the provided +// state scheme is not compatible with the one of persistent scheme, an error +// will be returned +// +// - none: use the scheme consistent with persistent state, or fallback +// to hash-based scheme if state is empty. +// - hash: use hash-based scheme or error out if not compatible with +// persistent state scheme. +// - path: use path-based scheme or error out if not compatible with +// persistent state scheme. +func ParseStateScheme(ctx *cli.Context, disk ethdb.Database) (string, error) { + // If state scheme is not specified, use the scheme consistent + // with persistent state, or fallback to hash mode if database + // is empty. + stored := rawdb.ReadStateScheme(disk) + + // If the flag is not set. + if !ctx.IsSet(StateSchemeFlag.Name) { + if stored == "" { + // If the database is empty, use hash-based scheme. + log.Info("State schema set to default when database is empty", "scheme", "hash") + return rawdb.HashScheme, nil + } + log.Info("State scheme set to already existing", "scheme", stored) + return stored, nil + } + // If state scheme is specified, ensure it's compatible with + // persistent state. + scheme := ctx.String(StateSchemeFlag.Name) + if stored == "" || scheme == stored { + log.Info("State scheme set by user", "scheme", scheme) + return scheme, nil + } + return "", fmt.Errorf("incompatible state scheme, stored: %s, provided: %s", stored, scheme) +} + +// MakeTrieDatabase constructs a trie database based on the configured scheme. +func MakeTrieDatabase(ctx *cli.Context, disk ethdb.Database, preimage bool, readOnly bool) *trie.Database { + config := &trie.Config{ + Preimages: preimage, + } + scheme, err := ParseStateScheme(ctx, disk) + if err != nil { + Fatalf("%v", err) + } + // Readonly only support in PathScheme + if scheme == rawdb.HashScheme { + // Read-only mode is not implemented in hash mode, + // ignore the parameter silently. TODO(rjl493456442) + // please config it if read mode is implemented. + config.HashDB = hashdb.Defaults + return trie.NewDatabase(disk, config) + } + if readOnly { + config.PathDB = pathdb.ReadOnly + } else { + config.PathDB = pathdb.Defaults + } + return trie.NewDatabase(disk, config) +} diff --git a/cmd/utils/flags_legacy.go b/cmd/utils/flags_legacy.go index cafa07892d..8aa77e1ba0 100644 --- a/cmd/utils/flags_legacy.go +++ b/cmd/utils/flags_legacy.go @@ -20,6 +20,7 @@ import ( "fmt" "github.com/ethereum/go-ethereum/eth/ethconfig" + "github.com/ethereum/go-ethereum/internal/flags" "github.com/urfave/cli/v2" ) @@ -35,6 +36,9 @@ var ShowDeprecated = &cli.Command{ var DeprecatedFlags = []cli.Flag{ LegacyMinerGasTargetFlag, NoUSBFlag, + CacheTrieJournalFlag, + CacheTrieRejournalFlag, + TxLookupLimitFlag, } var ( @@ -49,6 +53,24 @@ var ( Usage: "Target gas floor for mined blocks (deprecated)", Value: ethconfig.Defaults.Miner.GasFloor, } + // (Deprecated Oct 2024, shown in aliased flags section) + CacheTrieJournalFlag = &cli.StringFlag{ + Name: "cache.trie.journal", + Usage: "Disk journal directory for trie cache to survive node restarts", + Category: flags.PerfCategory, + } + CacheTrieRejournalFlag = &cli.DurationFlag{ + Name: "cache.trie.rejournal", + Usage: "Time interval to regenerate the trie cache journal", + Category: flags.PerfCategory, + } + // Deprecated Nov 2024 + TxLookupLimitFlag = &cli.Uint64Flag{ + Name: "txlookuplimit", + Usage: "Number of recent blocks to maintain transactions index for (default = about one year, 0 = entire chain) (deprecated, use history.transactions instead)", + Value: ethconfig.Defaults.TransactionHistory, + Category: flags.DeprecatedCategory, + } ) // showDeprecated displays deprecated flags that will be soon removed from the codebase. diff --git a/common/types.go b/common/types.go index 88cda0a841..7779e800a7 100644 --- a/common/types.go +++ b/common/types.go @@ -49,6 +49,9 @@ const ( var ( hashT = reflect.TypeOf(Hash{}) addressT = reflect.TypeOf(Address{}) + + // MaxHash represents the maximum possible hash value. + MaxHash = HexToHash("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff") ) // Hash represents the 32 byte Keccak256 hash of arbitrary data. @@ -79,6 +82,11 @@ func (h Hash) Big() *big.Int { return new(big.Int).SetBytes(h[:]) } // Hex converts a hash to a hex string. func (h Hash) Hex() string { return hexutil.Encode(h[:]) } +// Cmp compares two hashes. +func (h Hash) Cmp(other Hash) int { + return bytes.Compare(h[:], other[:]) +} + // TerminalString implements log.TerminalStringer, formatting a string for console // output during logging. func (h Hash) TerminalString() string { diff --git a/consensus/clique/clique_test.go b/consensus/clique/clique_test.go index 1e11fe0d70..744d84595f 100644 --- a/consensus/clique/clique_test.go +++ b/consensus/clique/clique_test.go @@ -27,6 +27,7 @@ import ( "github.com/ethereum/go-ethereum/core/vm" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/params" + "github.com/ethereum/go-ethereum/trie" ) // This test case is a repro of an annoying bug that took us forever to catch. @@ -45,6 +46,7 @@ func TestReimportMirroredState(t *testing.T) { signer = new(types.HomesteadSigner) ) genspec := &core.Genesis{ + Config: params.AllCliqueProtocolChanges, ExtraData: make([]byte, extraVanity+common.AddressLength+extraSeal), Alloc: map[common.Address]core.GenesisAccount{ addr: {Balance: big.NewInt(10000000000000000)}, @@ -52,10 +54,10 @@ func TestReimportMirroredState(t *testing.T) { BaseFee: big.NewInt(params.InitialBaseFee), } copy(genspec.ExtraData[extraVanity:], addr[:]) - genesis := genspec.MustCommit(db) + genesis := genspec.MustCommit(db, trie.NewDatabase(db, trie.HashDefaults)) // Generate a batch of blocks, each properly signed - chain, _ := core.NewBlockChain(db, nil, params.AllCliqueProtocolChanges, engine, vm.Config{}, nil, nil) + chain, _ := core.NewBlockChain(db, nil, genspec, nil, engine, vm.Config{}, nil, nil) defer chain.Stop() blocks, _ := core.GenerateChain(params.AllCliqueProtocolChanges, genesis, engine, db, 3, func(i int, block *core.BlockGen) { @@ -87,9 +89,9 @@ func TestReimportMirroredState(t *testing.T) { } // Insert the first two blocks and make sure the chain is valid db = rawdb.NewMemoryDatabase() - genspec.MustCommit(db) + genspec.MustCommit(db, trie.NewDatabase(db, trie.HashDefaults)) - chain, _ = core.NewBlockChain(db, nil, params.AllCliqueProtocolChanges, engine, vm.Config{}, nil, nil) + chain, _ = core.NewBlockChain(db, nil, genspec, nil, engine, vm.Config{}, nil, nil) defer chain.Stop() if _, err := chain.InsertChain(blocks[:2], nil); err != nil { @@ -102,7 +104,7 @@ func TestReimportMirroredState(t *testing.T) { // Simulate a crash by creating a new chain on top of the database, without // flushing the dirty states out. Insert the last block, triggering a sidechain // reimport. - chain, _ = core.NewBlockChain(db, nil, params.AllCliqueProtocolChanges, engine, vm.Config{}, nil, nil) + chain, _ = core.NewBlockChain(db, nil, genspec, nil, engine, vm.Config{}, nil, nil) defer chain.Stop() if _, err := chain.InsertChain(blocks[2:], nil); err != nil { diff --git a/consensus/clique/snapshot_test.go b/consensus/clique/snapshot_test.go index 72e719a3f6..09d8aaaf5d 100644 --- a/consensus/clique/snapshot_test.go +++ b/consensus/clique/snapshot_test.go @@ -30,6 +30,7 @@ import ( "github.com/ethereum/go-ethereum/core/vm" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/params" + "github.com/ethereum/go-ethereum/trie" ) // testerAccountPool is a pool to maintain currently active tester accounts, @@ -403,7 +404,7 @@ func TestClique(t *testing.T) { } // Create a pristine blockchain with the genesis injected db := rawdb.NewMemoryDatabase() - genesis.Commit(db) + genesisBlock := genesis.MustCommit(db, trie.NewDatabase(db, trie.HashDefaults)) // Assemble a chain of headers from the cast votes config := *params.TestChainConfig @@ -411,10 +412,11 @@ func TestClique(t *testing.T) { Period: 1, Epoch: tt.epoch, } + genesis.Config = &config engine := New(config.Clique, db) engine.fakeDiff = true - blocks, _ := core.GenerateChain(&config, genesis.ToBlock(db), engine, db, len(tt.votes), func(j int, gen *core.BlockGen) { + blocks, _ := core.GenerateChain(&config, genesisBlock, engine, db, len(tt.votes), func(j int, gen *core.BlockGen) { // Cast the vote contained in this block gen.SetCoinbase(accounts.address(tt.votes[j].voted)) if tt.votes[j].auth { @@ -450,7 +452,7 @@ func TestClique(t *testing.T) { batches[len(batches)-1] = append(batches[len(batches)-1], block) } // Pass all the headers through clique and ensure tallying succeeds - chain, err := core.NewBlockChain(db, nil, &config, engine, vm.Config{}, nil, nil) + chain, err := core.NewBlockChain(db, nil, genesis, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Errorf("test %d: failed to create test chain: %v", i, err) continue diff --git a/consensus/consortium/v1/consortium.go b/consensus/consortium/v1/consortium.go index f208275857..5114586a0c 100644 --- a/consensus/consortium/v1/consortium.go +++ b/consensus/consortium/v1/consortium.go @@ -308,9 +308,13 @@ func (c *Consortium) verifyCascadingFields(chain consensus.ChainHeaderReader, he func (c *Consortium) snapshot(chain consensus.ChainHeaderReader, number uint64, hash common.Hash, parents []*types.Header) (*Snapshot, error) { // Search for a snapshot in memory or on disk for checkpoints var ( - headers []*types.Header - snap *Snapshot + headers []*types.Header + snap *Snapshot + cpyParents = make([]*types.Header, len(parents)) ) + // We must copy parents before going to the loop because parents are modified. + // If not, the FindAncientHeader function can not find its block ancestor + copy(cpyParents, parents) for snap == nil { // If an in-memory snapshot was found, use that if s, ok := c.recents.Get(hash); ok { @@ -334,11 +338,10 @@ func (c *Consortium) snapshot(chain consensus.ChainHeaderReader, number uint64, if cpHeader != nil { hash := cpHeader.Hash() - validators, err := c.getValidatorsFromContract(chain, number) + validators, err := c.getValidatorsFromGenesis() if err != nil { return nil, err } - snap = newSnapshot(c.config, c.signatures, number, hash, validators) if err := snap.store(c.db); err != nil { return nil, err @@ -370,7 +373,7 @@ func (c *Consortium) snapshot(chain consensus.ChainHeaderReader, number uint64, for i := 0; i < len(headers)/2; i++ { headers[i], headers[len(headers)-1-i] = headers[len(headers)-1-i], headers[i] } - snap, err := snap.apply(chain, c, headers, parents) + snap, err := snap.apply(chain, c, headers, cpyParents) if err != nil { return nil, err } @@ -752,6 +755,31 @@ func (c *Consortium) doCalcDifficulty(signer common.Address, number uint64, vali return new(big.Int).Set(diffNoTurn) } +// getValidatorsFromGenesis gets the list of validators from the genesis block support backward compatibility in v1, only used with Snap Sync. +func (c *Consortium) getValidatorsFromGenesis() ([]common.Address, error) { + var validatorSet []string + switch { + case c.chainConfig.ChainID.Cmp(big.NewInt(2020)) == 0: + validatorSet = []string{ + "0x000000000000000000000000f224beff587362a88d859e899d0d80c080e1e812", + "0x00000000000000000000000011360eacdedd59bc433afad4fc8f0417d1fbebab", + "0x00000000000000000000000070bb1fb41c8c42f6ddd53a708e2b82209495e455", + } + case c.chainConfig.ChainID.Cmp(big.NewInt(2021)) == 0: + validatorSet = []string{ + "0x0000000000000000000000004a4bc674a97737376cfe990ae2fe0d2b6e738393", + "0x000000000000000000000000b6bc5bc0410773a3f86b1537ce7495c52e38f88b", + } + default: + return nil, errors.New("no validator set for this chain only support Mainnet & Testnet") + } + var addresses []common.Address + for _, str := range validatorSet { + addresses = append(addresses, common.HexToAddress(str)) + } + return addresses, nil +} + // Read the validator list from contract func (c *Consortium) getValidatorsFromContract(chain consensus.ChainHeaderReader, number uint64) ([]common.Address, error) { if chain.Config().IsFenix(big.NewInt(int64(number))) { @@ -775,7 +803,7 @@ func (c *Consortium) getValidatorsFromLastCheckpoint(chain consensus.ChainHeader if lastCheckpoint == 0 { // TODO(andy): Review if we should put validators in genesis block's extra data - return c.getValidatorsFromContract(chain, number) + return c.getValidatorsFromGenesis() } var header *types.Header diff --git a/consensus/consortium/v2/consortium_test.go b/consensus/consortium/v2/consortium_test.go index 1f580f917a..dda675e15f 100644 --- a/consensus/consortium/v2/consortium_test.go +++ b/consensus/consortium/v2/consortium_test.go @@ -1433,6 +1433,10 @@ func TestAssembleFinalityVoteTripp(t *testing.T) { } func TestVerifyVote(t *testing.T) { + testVeiryVote(t, rawdb.PathScheme) + testVeiryVote(t, rawdb.HashScheme) +} +func testVeiryVote(t *testing.T, scheme string) { const numValidator = 3 var err error @@ -1453,11 +1457,12 @@ func TestVerifyVote(t *testing.T) { } db := rawdb.NewMemoryDatabase() - genesis := (&core.Genesis{ + gspec := &core.Genesis{ Config: params.TestChainConfig, BaseFee: big.NewInt(params.InitialBaseFee), - }).MustCommit(db) - chain, _ := core.NewBlockChain(db, nil, params.TestChainConfig, ethash.NewFullFaker(), vm.Config{}, nil, nil) + } + genesis := gspec.MustCommit(db, trie.NewDatabase(db, nil)) + chain, _ := core.NewBlockChain(db, core.DefaultCacheConfigWithScheme(scheme), gspec, nil, ethash.NewFullFaker(), vm.Config{}, nil, nil) bs, _ := core.GenerateChain(params.TestChainConfig, genesis, ethash.NewFaker(), db, 1, nil, true) if _, err := chain.InsertChain(bs[:], nil); err != nil { @@ -1540,6 +1545,11 @@ func TestVerifyVote(t *testing.T) { } func TestKnownBlockReorg(t *testing.T) { + testKnowBlockReorg(t, rawdb.PathScheme) + testKnowBlockReorg(t, rawdb.HashScheme) +} + +func testKnowBlockReorg(t *testing.T, scheme string) { db := rawdb.NewMemoryDatabase() blsKeys := make([]blsCommon.SecretKey, 3) @@ -1584,9 +1594,10 @@ func TestKnownBlockReorg(t *testing.T) { }, } - genesis := (&core.Genesis{ + gspec := &core.Genesis{ Config: &chainConfig, - }).MustCommit(db) + } + genesis := gspec.MustCommit(db, trie.NewDatabase(db, nil)) mock := &mockContract{ validators: make(map[common.Address]mockValidator), @@ -1606,7 +1617,7 @@ func TestKnownBlockReorg(t *testing.T) { db: db, } - chain, _ := core.NewBlockChain(db, nil, &chainConfig, &v2, vm.Config{}, nil, nil) + chain, _ := core.NewBlockChain(db, core.DefaultCacheConfigWithScheme(scheme), gspec, nil, &v2, vm.Config{}, nil, nil) extraData := [consortiumCommon.ExtraVanity + consortiumCommon.ExtraSeal]byte{} blocks, _ := core.GenerateConsortiumChain( @@ -1807,6 +1818,11 @@ func TestKnownBlockReorg(t *testing.T) { } func TestUpgradeRoninTrustedOrg(t *testing.T) { + testUpgradeRoninTrustedOrg(t, rawdb.PathScheme) + testUpgradeRoninTrustedOrg(t, rawdb.HashScheme) +} + +func testUpgradeRoninTrustedOrg(t *testing.T, scheme string) { db := rawdb.NewMemoryDatabase() blsSecretKey, err := blst.RandKey() if err != nil { @@ -1835,13 +1851,14 @@ func TestUpgradeRoninTrustedOrg(t *testing.T) { }, } - genesis := (&core.Genesis{ + gspec := &core.Genesis{ Config: &chainConfig, Alloc: core.GenesisAlloc{ // Make proxy address non-empty to avoid being deleted common.Address{0x10}: core.GenesisAccount{Balance: common.Big1}, }, - }).MustCommit(db) + } + genesis := gspec.MustCommit(db, trie.NewDatabase(db, nil)) mock := &mockContract{ validators: map[common.Address]mockValidator{ @@ -1863,7 +1880,7 @@ func TestUpgradeRoninTrustedOrg(t *testing.T) { }, } - chain, _ := core.NewBlockChain(db, nil, &chainConfig, &v2, vm.Config{}, nil, nil) + chain, _ := core.NewBlockChain(db, core.DefaultCacheConfigWithScheme(scheme), gspec, nil, &v2, vm.Config{}, nil, nil) extraData := [consortiumCommon.ExtraVanity + consortiumCommon.ExtraSeal]byte{} parent := genesis @@ -1933,6 +1950,11 @@ func TestUpgradeRoninTrustedOrg(t *testing.T) { } func TestUpgradeAxieProxyCode(t *testing.T) { + testUpgradeAxieProxyCode(t, rawdb.PathScheme) + testUpgradeAxieProxyCode(t, rawdb.HashScheme) +} + +func testUpgradeAxieProxyCode(t *testing.T, scheme string) { secretKey, err := crypto.GenerateKey() if err != nil { t.Fatal(err) @@ -1982,9 +2004,10 @@ func TestUpgradeAxieProxyCode(t *testing.T) { Code: code, }, } - genesis := (&core.Genesis{ + gspec := &core.Genesis{ Config: chainConfig, - }).MustCommit(db) + } + genesis := gspec.MustCommit(db, trie.NewDatabase(db, nil)) mock := &mockTrippContract{ checkpointValidators: []validatorWithBlsWeight{ validatorWithBlsWeight{ @@ -2011,7 +2034,7 @@ func TestUpgradeAxieProxyCode(t *testing.T) { testTrippEffective: true, } - chain, _ := core.NewBlockChain(db, nil, chainConfig, v2, vm.Config{}, nil, nil) + chain, _ := core.NewBlockChain(db, core.DefaultCacheConfigWithScheme(scheme), gspec, nil, v2, vm.Config{}, nil, nil) extraData := &finality.HeaderExtraData{} parent := genesis @@ -2072,6 +2095,11 @@ func TestUpgradeAxieProxyCode(t *testing.T) { } func TestSystemTransactionOrder(t *testing.T) { + testSystemTransactionOrder(t, rawdb.PathScheme) + testSystemTransactionOrder(t, rawdb.HashScheme) +} + +func testSystemTransactionOrder(t *testing.T, scheme string) { db := rawdb.NewMemoryDatabase() blsSecretKey, err := blst.RandKey() if err != nil { @@ -2104,13 +2132,14 @@ func TestSystemTransactionOrder(t *testing.T) { }, } - genesis := (&core.Genesis{ + gspec := &core.Genesis{ Config: &chainConfig, Alloc: core.GenesisAlloc{ // Make proxy address non-empty to avoid being deleted common.Address{0x10}: core.GenesisAccount{Balance: common.Big1}, }, - }).MustCommit(db) + } + genesis := gspec.MustCommit(db, trie.NewDatabase(db, nil)) mock := &mockContract{ validators: map[common.Address]mockValidator{ @@ -2132,7 +2161,7 @@ func TestSystemTransactionOrder(t *testing.T) { }, } - chain, _ := core.NewBlockChain(db, nil, &chainConfig, &v2, vm.Config{}, nil, nil) + chain, _ := core.NewBlockChain(db, core.DefaultCacheConfigWithScheme(scheme), gspec, nil, &v2, vm.Config{}, nil, nil) extraData := [consortiumCommon.ExtraVanity + consortiumCommon.ExtraSeal]byte{} signer := types.NewEIP155Signer(big.NewInt(2021)) @@ -2213,6 +2242,11 @@ func TestSystemTransactionOrder(t *testing.T) { } func TestIsPeriodBlock(t *testing.T) { + testIsPeriodBlock(t, rawdb.PathScheme) + testIsPeriodBlock(t, rawdb.HashScheme) +} + +func testIsPeriodBlock(t *testing.T, scheme string) { const NUM_OF_VALIDATORS = 21 dateInSeconds := uint64(86400) now := uint64(time.Now().Unix()) @@ -2229,12 +2263,13 @@ func TestIsPeriodBlock(t *testing.T) { RoninValidatorSet: common.HexToAddress("0xaa"), }, } - genesis := (&core.Genesis{ + gspec := &core.Genesis{ Config: &chainConfig, BaseFee: big.NewInt(params.InitialBaseFee), Timestamp: midnight, // genesis at day 1 - }).MustCommit(db) - chain, _ := core.NewBlockChain(db, nil, &chainConfig, ethash.NewFullFaker(), vm.Config{}, nil, nil) + } + genesis := gspec.MustCommit(db, trie.NewDatabase(db, nil)) + chain, _ := core.NewBlockChain(db, core.DefaultCacheConfigWithScheme(scheme), gspec, nil, ethash.NewFullFaker(), vm.Config{}, nil, nil) // create chain of up to 399 blocks, all of them are not period block bs, _ := core.GenerateChain(&chainConfig, genesis, ethash.NewFaker(), db, 399, nil, true) // create chain of up to 399 blocks if _, err := chain.InsertChain(bs[:], nil); err != nil { @@ -2286,7 +2321,11 @@ func TestIsPeriodBlock(t *testing.T) { block, _ := core.GenerateChain(&chainConfig, bs[len(bs)-1], ethash.NewFaker(), db, 1, callback, true) bs = append(bs, block...) } - if _, err := chain.InsertChain(bs[:], nil); err != nil { + // Only the new blocks are inserted here + // For path scheme, the number of db diff layers corresponding to blocks are limited to 128 + // So just the newest 128 blocks can be retrieved from the db + // Therefore, the handling of the inserted blocks can result in error since the older blocks can not be retrieved for checking + if _, err := chain.InsertChain(bs[399:], nil); err != nil { panic(err) } @@ -2406,11 +2445,13 @@ func createNewChain( chainConfig *params.ChainConfig, validatorAddr common.Address, blsSecretKey blsCommon.SecretKey, + scheme string, ) (*types.Block, *core.BlockChain, ethdb.Database, *Consortium) { db := rawdb.NewMemoryDatabase() - genesis := (&core.Genesis{ + gspec := &core.Genesis{ Config: chainConfig, - }).MustCommit(db) + } + genesis := gspec.MustCommit(db, trie.NewDatabase(db, nil)) mock := &mockContract{ validators: map[common.Address]mockValidator{ @@ -2436,11 +2477,16 @@ func createNewChain( }, } - chain, _ := core.NewBlockChain(db, nil, chainConfig, v2, vm.Config{}, nil, nil) + chain, _ := core.NewBlockChain(db, core.DefaultCacheConfigWithScheme(scheme), gspec, nil, v2, vm.Config{}, nil, nil) return genesis, chain, db, v2 } func TestIsPeriodVenoki(t *testing.T) { + testIsPeriodVenoki(t, rawdb.PathScheme) + testIsPeriodVenoki(t, rawdb.HashScheme) +} + +func testIsPeriodVenoki(t *testing.T, scheme string) { blsSecretKey, err := blst.RandKey() if err != nil { t.Fatal(err) @@ -2473,7 +2519,7 @@ func TestIsPeriodVenoki(t *testing.T) { }, } - genesis, chain, db, v2 := createNewChain(&chainConfig, validatorAddr, blsSecretKey) + genesis, chain, db, v2 := createNewChain(&chainConfig, validatorAddr, blsSecretKey, scheme) block := generateChain(t, genesis, &chainConfig, chain, v2, db, secretKey, validatorAddr, blsSecretKey, false) // Case 1 @@ -2500,7 +2546,7 @@ func TestIsPeriodVenoki(t *testing.T) { // previous epoch passes 0:00 // The last block of previous epoch does not pass 0:00 chainConfig.VenokiBlock = big.NewInt(200) - genesis, chain, db, v2 = createNewChain(&chainConfig, validatorAddr, blsSecretKey) + genesis, chain, db, v2 = createNewChain(&chainConfig, validatorAddr, blsSecretKey, scheme) block = generateChain(t, genesis, &chainConfig, chain, v2, db, secretKey, validatorAddr, blsSecretKey, false) isPeriodBlock, err = v2.IsPeriodBlock(chain, block[0].Header(), nil) if err != nil { @@ -2520,7 +2566,7 @@ func TestIsPeriodVenoki(t *testing.T) { // Case 3 // The last block of previous epoch passes 0:00 - genesis, chain, db, v2 = createNewChain(&chainConfig, validatorAddr, blsSecretKey) + genesis, chain, db, v2 = createNewChain(&chainConfig, validatorAddr, blsSecretKey, scheme) block = generateChain(t, genesis, &chainConfig, chain, v2, db, secretKey, validatorAddr, blsSecretKey, true) isPeriodBlock, err = v2.IsPeriodBlock(chain, block[0].Header(), nil) if err != nil { @@ -2540,6 +2586,12 @@ func TestIsPeriodVenoki(t *testing.T) { } func TestIsTrippEffective(t *testing.T) { + testIsTrippEffective(t, rawdb.HashScheme) + testIsTrippEffective(t, rawdb.PathScheme) + +} + +func testIsTrippEffective(t *testing.T, scheme string) { now := uint64(time.Now().Unix()) midnight := uint64(now / dayInSeconds * dayInSeconds) db := rawdb.NewMemoryDatabase() @@ -2554,12 +2606,13 @@ func TestIsTrippEffective(t *testing.T) { }, TrippPeriod: new(big.Int).SetUint64(now / dayInSeconds), } - genesis := (&core.Genesis{ + gspec := &core.Genesis{ Config: &chainConfig, BaseFee: big.NewInt(params.InitialBaseFee), Timestamp: midnight, // genesis at day 1 - }).MustCommit(db) - chain, _ := core.NewBlockChain(db, nil, &chainConfig, ethash.NewFullFaker(), vm.Config{}, nil, nil) + } + genesis := gspec.MustCommit(db, trie.NewDatabase(db, nil)) + chain, _ := core.NewBlockChain(db, core.DefaultCacheConfigWithScheme(scheme), gspec, nil, ethash.NewFullFaker(), vm.Config{}, nil, nil) // create chain of up to 399 blocks, all of them are not Tripp effective bs, _ := core.GenerateChain(&chainConfig, genesis, ethash.NewFaker(), db, 399, nil, true) if _, err := chain.InsertChain(bs[:], nil); err != nil { @@ -2621,7 +2674,11 @@ func TestIsTrippEffective(t *testing.T) { block, _ := core.GenerateChain(&chainConfig, bs[len(bs)-1], ethash.NewFaker(), db, 1, callback, true) bs = append(bs, block...) } - if _, err := chain.InsertChain(bs[:], nil); err != nil { + // Only the new blocks are inserted here + // For path scheme, the number of db diff layers corresponding to blocks are limited to 128 + // So just the newest 128 blocks can be retrieved from the db + // Therefore, the handling of the inserted blocks can result in error since the older blocks can not be retrieved for checking + if _, err := chain.InsertChain(bs[399:], nil); err != nil { panic(err) } diff --git a/core/bench_test.go b/core/bench_test.go index 600fa2e4be..cd0b3fbaba 100644 --- a/core/bench_test.go +++ b/core/bench_test.go @@ -32,6 +32,7 @@ import ( "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/params" + "github.com/ethereum/go-ethereum/trie" ) func BenchmarkInsertChain_empty_memdb(b *testing.B) { @@ -196,12 +197,12 @@ func benchInsertChain(b *testing.B, disk bool, gen func(int, *BlockGen)) { Config: params.TestChainConfig, Alloc: GenesisAlloc{benchRootAddr: {Balance: benchRootFunds}}, } - genesis := gspec.MustCommit(db) + genesis := gspec.MustCommit(db, trie.NewDatabase(db, trie.HashDefaults)) chain, _ := GenerateChain(gspec.Config, genesis, ethash.NewFaker(), db, b.N, gen, true) // Time the insertion of the new chain. // State and blocks are stored in the same DB. - chainman, _ := NewBlockChain(db, nil, gspec.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + chainman, _ := NewBlockChain(db, nil, &gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) defer chainman.Stop() b.ReportAllocs() b.ResetTimer() @@ -316,7 +317,10 @@ func benchReadChain(b *testing.B, full bool, count uint64) { if err != nil { b.Fatalf("error opening database at %v: %v", dir, err) } - chain, err := NewBlockChain(db, &cacheConfig, params.TestChainConfig, ethash.NewFaker(), vm.Config{}, nil, nil) + gspec := &Genesis{ + Config: params.TestChainConfig, + } + chain, err := NewBlockChain(db, &cacheConfig, gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) if err != nil { b.Fatalf("error creating chain: %v", err) } diff --git a/core/block_validator_test.go b/core/block_validator_test.go index dcfe9af5d9..27138f5df2 100644 --- a/core/block_validator_test.go +++ b/core/block_validator_test.go @@ -26,15 +26,20 @@ import ( "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/core/vm" "github.com/ethereum/go-ethereum/params" + "github.com/ethereum/go-ethereum/trie" ) -// Tests that simple header verification works, for both good and bad blocks. func TestHeaderVerification(t *testing.T) { + testHeaderVerification(t, rawdb.HashScheme) + testHeaderVerification(t, rawdb.PathScheme) +} + +func testHeaderVerification(t *testing.T, scheme string) { // Create a simple chain to verify var ( testdb = rawdb.NewMemoryDatabase() gspec = &Genesis{Config: params.TestChainConfig} - genesis = gspec.MustCommit(testdb) + genesis = gspec.MustCommit(testdb, trie.NewDatabase(testdb, newDbConfig(scheme))) blocks, _ = GenerateChain(params.TestChainConfig, genesis, ethash.NewFaker(), testdb, 8, nil, true) ) headers := make([]*types.Header, len(blocks)) @@ -42,7 +47,7 @@ func TestHeaderVerification(t *testing.T) { headers[i] = block.Header() } // Run the header checker for blocks one-by-one, checking for both valid and invalid nonces - chain, _ := NewBlockChain(testdb, nil, params.TestChainConfig, ethash.NewFaker(), vm.Config{}, nil, nil) + chain, _ := NewBlockChain(testdb, DefaultCacheConfigWithScheme(scheme), gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) defer chain.Stop() for i := 0; i < len(blocks); i++ { @@ -86,7 +91,7 @@ func testHeaderConcurrentVerification(t *testing.T, threads int) { var ( testdb = rawdb.NewMemoryDatabase() gspec = &Genesis{Config: params.TestChainConfig} - genesis = gspec.MustCommit(testdb) + genesis = gspec.MustCommit(testdb, trie.NewDatabase(testdb, trie.HashDefaults)) blocks, _ = GenerateChain(params.TestChainConfig, genesis, ethash.NewFaker(), testdb, 8, nil, true) ) headers := make([]*types.Header, len(blocks)) @@ -106,11 +111,11 @@ func testHeaderConcurrentVerification(t *testing.T, threads int) { var results <-chan error if valid { - chain, _ := NewBlockChain(testdb, nil, params.TestChainConfig, ethash.NewFaker(), vm.Config{}, nil, nil) + chain, _ := NewBlockChain(testdb, nil, gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) _, results = chain.engine.VerifyHeaders(chain, headers, seals) chain.Stop() } else { - chain, _ := NewBlockChain(testdb, nil, params.TestChainConfig, ethash.NewFakeFailer(uint64(len(headers)-1)), vm.Config{}, nil, nil) + chain, _ := NewBlockChain(testdb, nil, gspec, nil, ethash.NewFakeFailer(uint64(len(headers)-1)), vm.Config{}, nil, nil) _, results = chain.engine.VerifyHeaders(chain, headers, seals) chain.Stop() } @@ -158,7 +163,7 @@ func testHeaderConcurrentAbortion(t *testing.T, threads int) { var ( testdb = rawdb.NewMemoryDatabase() gspec = &Genesis{Config: params.TestChainConfig} - genesis = gspec.MustCommit(testdb) + genesis = gspec.MustCommit(testdb, trie.NewDatabase(testdb, trie.HashDefaults)) blocks, _ = GenerateChain(params.TestChainConfig, genesis, ethash.NewFaker(), testdb, 1024, nil, true) ) headers := make([]*types.Header, len(blocks)) @@ -173,7 +178,7 @@ func testHeaderConcurrentAbortion(t *testing.T, threads int) { defer runtime.GOMAXPROCS(old) // Start the verifications and immediately abort - chain, _ := NewBlockChain(testdb, nil, params.TestChainConfig, ethash.NewFakeDelayer(time.Millisecond), vm.Config{}, nil, nil) + chain, _ := NewBlockChain(testdb, nil, gspec, nil, ethash.NewFakeDelayer(time.Millisecond), vm.Config{}, nil, nil) defer chain.Stop() abort, results := chain.engine.VerifyHeaders(chain, headers, seals) diff --git a/core/blockchain.go b/core/blockchain.go index e8688443be..5881713f65 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -46,6 +46,8 @@ import ( "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/triedb/hashdb" + "github.com/ethereum/go-ethereum/trie/triedb/pathdb" lru "github.com/hashicorp/golang-lru/v2" ) @@ -68,6 +70,8 @@ var ( snapshotStorageReadTimer = metrics.NewRegisteredTimer("chain/snapshot/storage/reads", nil) snapshotCommitTimer = metrics.NewRegisteredTimer("chain/snapshot/commits", nil) + triedbCommitTimer = metrics.NewRegisteredTimer("chain/triedb/commits", nil) + blockInsertTimer = metrics.NewRegisteredTimer("chain/inserts", nil) blockValidationTimer = metrics.NewRegisteredTimer("chain/validation", nil) blockExecutionTimer = metrics.NewRegisteredTimer("chain/execution", nil) @@ -126,12 +130,10 @@ const ( BlockChainVersion uint64 = 8 ) -// CacheConfig contains the configuration values for the trie caching/pruning +// CacheConfig contains the configuration values for the trie database // that's resident in a blockchain. type CacheConfig struct { TrieCleanLimit int // Memory allowance (MB) to use for caching trie nodes in memory - TrieCleanJournal string // Disk journal for saving clean cache entries. - TrieCleanRejournal time.Duration // Time interval to dump clean cache to disk periodically TrieCleanNoPrefetch bool // Whether to disable heuristic state prefetching for followup blocks TrieDirtyLimit int // Memory limit (MB) at which to start flushing dirty trie nodes to disk TrieDirtyDisabled bool // Whether to disable trie write caching and GC altogether (archive node) @@ -139,10 +141,30 @@ type CacheConfig struct { SnapshotLimit int // Memory allowance (MB) to use for caching snapshot entries in memory Preimages bool // Whether to store preimage of trie key to the disk TriesInMemory int // The number of tries is kept in memory before pruning + StateHistory uint64 // Number of blocks from head whose state histories are reserved. + StateScheme string // Scheme used to store ethereum states and merkle tree nodes on top SnapshotWait bool // Wait for snapshot construction on startup. TODO(karalabe): This is a dirty hack for testing, nuke it } +// triedbConfig derives the configures for trie database. +func (c *CacheConfig) triedbConfig() *trie.Config { + config := &trie.Config{Preimages: c.Preimages} + if c.StateScheme == rawdb.HashScheme { + config.HashDB = &hashdb.Config{ + CleanCacheSize: c.TrieCleanLimit * 1024 * 1024, + } + } + if c.StateScheme == rawdb.PathScheme { + config.PathDB = &pathdb.Config{ + StateHistory: c.StateHistory, + CleanCacheSize: c.TrieCleanLimit * 1024 * 1024, + DirtyCacheSize: c.TrieDirtyLimit * 1024 * 1024, + } + } + return config +} + // defaultCacheConfig are the default caching values if none are specified by the // user (also used during testing). var defaultCacheConfig = &CacheConfig{ @@ -152,6 +174,15 @@ var defaultCacheConfig = &CacheConfig{ SnapshotLimit: 256, SnapshotWait: true, TriesInMemory: DefaultTriesInMemory, + StateScheme: rawdb.HashScheme, +} + +// DefaultCacheConfigWithScheme returns a deep copied default cache config with +// a provided trie node scheme. +func DefaultCacheConfigWithScheme(scheme string) *CacheConfig { + config := *defaultCacheConfig + config.StateScheme = scheme + return &config } // BlockChain represents the canonical chain given a database with a genesis @@ -176,6 +207,7 @@ type BlockChain struct { snaps *snapshot.Tree // Snapshot tree for fast trie leaf access triegc *prque.Prque // Priority queue mapping block numbers to tries to gc gcproc time.Duration // Accumulates canonical block processing for trie dumping + triedb *trie.Database // The database handler for maintaining trie nodes. // txLookupLimit is the maximum number of blocks from head whose tx indices // are reserved: @@ -243,7 +275,7 @@ type futureBlock struct { // NewBlockChain returns a fully initialised block chain using information // available in the database. It initialises the default Ethereum Validator and // Processor. -func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *params.ChainConfig, engine consensus.Engine, vmConfig vm.Config, shouldPreserve func(block *types.Block) bool, txLookupLimit *uint64) (*BlockChain, error) { +func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, genesis *Genesis, overrideArrowGlacier *big.Int, engine consensus.Engine, vmConfig vm.Config, shouldPreserve func(block *types.Block) bool, txLookupLimit *uint64) (*BlockChain, error) { if cacheConfig == nil { cacheConfig = defaultCacheConfig } @@ -261,16 +293,24 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par blobSidecarsCache, _ := lru.New[common.Hash, types.BlobSidecars](blobSidecarsCacheLimit) + // Open trie database with provided config + triedb := trie.NewDatabase(db, cacheConfig.triedbConfig()) + // Setup the genesis block, commit the provided genesis specification + // to database if the genesis block is not present yet, or load the + // stored one from database. + chainConfig, genesisHash, genesisErr := SetupGenesisBlockWithOverride(db, triedb, genesis, overrideArrowGlacier, false) + if _, ok := genesisErr.(*params.ConfigCompatError); genesisErr != nil && !ok { + return nil, genesisErr + } + log.Info("Initialised chain configuration", "config", chainConfig) + bc := &BlockChain{ - chainConfig: chainConfig, - cacheConfig: cacheConfig, - db: db, - triegc: prque.New(nil), - stateCache: state.NewDatabaseWithConfig(db, &trie.Config{ - Cache: cacheConfig.TrieCleanLimit, - Journal: cacheConfig.TrieCleanJournal, - Preimages: cacheConfig.Preimages, - }), + chainConfig: chainConfig, + cacheConfig: cacheConfig, + db: db, + triedb: triedb, + triegc: prque.New(nil), + stateCache: state.NewDatabaseWithNodeDB(db, triedb), quit: make(chan struct{}), chainmu: syncx.NewClosableMutex(), shouldPreserve: shouldPreserve, @@ -294,6 +334,7 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par bc.evmHook = TestnetHook{} } + bc.stateCache = state.NewDatabaseWithNodeDB(bc.db, bc.triedb) bc.validator = NewBlockValidator(chainConfig, bc, engine) bc.prefetcher = newStatePrefetcher(chainConfig, bc, engine) bc.processor = NewStateProcessor(chainConfig, bc, engine) @@ -330,29 +371,38 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par // Make sure the state associated with the block is available head := bc.CurrentBlock() - if _, err := state.New(head.Root(), bc.stateCache, bc.snaps); err != nil { - // Head state is missing, before the state recovery, find out the - // disk layer point of snapshot(if it's enabled). Make sure the - // rewound point is lower than disk layer. - var diskRoot common.Hash - if bc.cacheConfig.SnapshotLimit > 0 { - diskRoot = rawdb.ReadSnapshotRoot(bc.db) - } - if diskRoot != (common.Hash{}) { - log.Warn("Head state missing, repairing", "number", head.Number(), "hash", head.Hash(), "snaproot", diskRoot) - - snapDisk, err := bc.setHeadBeyondRoot(head.NumberU64(), diskRoot, true) - if err != nil { - return nil, err - } - // Chain rewound, persist old snapshot number to indicate recovery procedure - if snapDisk != 0 { - rawdb.WriteSnapshotRecoveryNumber(bc.db, snapDisk) - } + if !bc.HasState(head.Root()) { + if head.NumberU64() == 0 { + // The genesis state is missing, which is only possible in the path-based + // scheme. This situation occurs when the initial state sync is not finished + // yet, or the chain head is rewound below the pivot point. In both scenario, + // there is no possible recovery approach except for rerunning a snap sync. + // Do nothing here until the state syncer picks it up. + log.Info("Genesis state is missing, wait state sync") } else { - log.Warn("Head state missing, repairing", "number", head.Number(), "hash", head.Hash()) - if _, err := bc.setHeadBeyondRoot(head.NumberU64(), common.Hash{}, true); err != nil { - return nil, err + // Head state is missing, before the state recovery, find out the + // disk layer point of snapshot(if it's enabled). Make sure the + // rewound point is lower than disk layer. + var diskRoot common.Hash + if bc.cacheConfig.SnapshotLimit > 0 { + diskRoot = rawdb.ReadSnapshotRoot(bc.db) + } + if diskRoot != (common.Hash{}) { + log.Warn("Head state missing, repairing", "number", head.Number, "hash", head.Hash(), "snaproot", diskRoot) + + snapDisk, err := bc.setHeadBeyondRoot(head.NumberU64(), diskRoot, true) + if err != nil { + return nil, err + } + // Chain rewound, persist old snapshot number to indicate recovery procedure + if snapDisk != 0 { + rawdb.WriteSnapshotRecoveryNumber(bc.db, snapDisk) + } + } else { + log.Warn("Head state missing, repairing", "number", head.Number, "hash", head.Hash()) + if _, err := bc.setHeadBeyondRoot(head.NumberU64(), common.Hash{}, true); err != nil { + return nil, err + } } } } @@ -414,11 +464,12 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par var recover bool head := bc.CurrentBlock() - if layer := rawdb.ReadSnapshotRecoveryNumber(bc.db); layer != nil && *layer > head.NumberU64() { + // If we rewind the chain state to disk layer, then in this case recovery mode should be enabled. + if layer := rawdb.ReadSnapshotRecoveryNumber(bc.db); layer != nil && *layer >= head.NumberU64() { log.Warn("Enabling snapshot recovery", "chainhead", head.NumberU64(), "diskbase", *layer) recover = true } - bc.snaps, _ = snapshot.New(bc.db, bc.stateCache.TrieDB(), bc.cacheConfig.SnapshotLimit, head.Root(), !bc.cacheConfig.SnapshotWait, true, recover) + bc.snaps, _ = snapshot.New(bc.db, bc.triedb, bc.cacheConfig.SnapshotLimit, head.Root(), !bc.cacheConfig.SnapshotWait, true, recover) } // Start future block processor. @@ -433,23 +484,14 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par go bc.maintainTxIndex(txIndexBlock) } - // If periodic cache journal is required, spin it up. - if bc.cacheConfig.TrieCleanRejournal > 0 { - if bc.cacheConfig.TrieCleanRejournal < time.Minute { - log.Warn("Sanitizing invalid trie cache journal time", "provided", bc.cacheConfig.TrieCleanRejournal, "updated", time.Minute) - bc.cacheConfig.TrieCleanRejournal = time.Minute - } - triedb := bc.stateCache.TrieDB() - bc.wg.Add(1) - go func() { - defer bc.wg.Done() - triedb.SaveCachePeriodically(bc.cacheConfig.TrieCleanJournal, bc.cacheConfig.TrieCleanRejournal, bc.quit) - }() - } - // load the latest dirty accounts stored from last stop to cache bc.loadLatestDirtyAccounts() - + // Rewind the chain in case of an incompatible config upgrade. + if compat, ok := genesisErr.(*params.ConfigCompatError); ok { + log.Warn("Rewinding chain to upgrade configuration", "err", compat) + bc.SetHead(compat.RewindTo) + rawdb.WriteChainConfig(db, genesisHash, chainConfig) + } return bc, nil } @@ -674,7 +716,9 @@ func (bc *BlockChain) setHeadBeyondRoot(head uint64, root common.Hash, repair bo if root != (common.Hash{}) && !beyondRoot && newHeadBlock.Root() == root { beyondRoot, rootNumber = true, newHeadBlock.NumberU64() } - if _, err := state.New(newHeadBlock.Root(), bc.stateCache, bc.snaps); err != nil { + + // In pbss, if the state is missing, we can possibly recover it from history db + if !bc.HasState(newHeadBlock.Root()) && !bc.stateRecoverable(newHeadBlock.Root()) { log.Trace("Block state missing, rewinding further", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash()) if pivot == nil || newHeadBlock.NumberU64() > *pivot { parent := bc.GetBlock(newHeadBlock.ParentHash(), newHeadBlock.NumberU64()-1) @@ -690,7 +734,14 @@ func (bc *BlockChain) setHeadBeyondRoot(head uint64, root common.Hash, repair bo } } if beyondRoot || newHeadBlock.NumberU64() == 0 { - log.Debug("Rewound to block with state", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash()) + if !bc.HasState(newHeadBlock.Root()) && bc.stateRecoverable(newHeadBlock.Root()) { + // Rewind to a block with recoverable state. If the state is + // missing, run the state recovery here. + if err := bc.triedb.Recover(newHeadBlock.Root()); err != nil { + log.Crit("Failed to rollback state", "err", err) // Shouldn't happen + } + log.Debug("Rewound to block with state", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash()) + } break } log.Debug("Skipping block with threshold state", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash(), "root", newHeadBlock.Root()) @@ -705,6 +756,15 @@ func (bc *BlockChain) setHeadBeyondRoot(head uint64, root common.Hash, repair bo // to low, so it's safe the update in-memory markers directly. bc.currentBlock.Store(newHeadBlock) headBlockGauge.Update(int64(newHeadBlock.NumberU64())) + + // The head state is missing, which is only possible in the path-based + // scheme. This situation occurs when the chain head is rewound below + // the pivot point. In this scenario, there is no possible recovery + // approach except for rerunning a snap sync. Do nothing here until the + // state syncer picks it up. + if !bc.HasState(newHeadBlock.Root()) { + log.Info("Chain is stateless, wait state sync", "number", newHeadBlock.Number(), "hash", newHeadBlock.Hash()) + } } // Rewind the fast block in a simpleton way to the target head if currentFastBlock := bc.CurrentFastBlock(); currentFastBlock != nil && header.Number.Uint64() < currentFastBlock.NumberU64() { @@ -740,7 +800,7 @@ func (bc *BlockChain) setHeadBeyondRoot(head uint64, root common.Hash, repair bo if num+1 <= frozen { // Truncate all relative data(header, total difficulty, body, receipt // and canonical hash) from ancient store. - if err := bc.db.TruncateAncients(num); err != nil { + if _, err := bc.db.TruncateHead(num); err != nil { log.Crit("Failed to truncate ancient data", "number", num, "err", err) } // Remove the hash <-> number mapping from the active store. @@ -785,10 +845,16 @@ func (bc *BlockChain) FastSyncCommitHead(hash common.Hash) error { if block == nil { return fmt.Errorf("non existent block [%x..]", hash[:4]) } - if _, err := trie.NewSecure(block.Root(), bc.stateCache.TrieDB()); err != nil { - return err + // Reset the trie database with the fresh fast synced state. + root := block.Root() + if bc.triedb.Scheme() == rawdb.PathScheme { + if err := bc.triedb.Enable(root); err != nil { + return err + } + } + if !bc.HasState(root) { + return fmt.Errorf("non existent state [%x..]", root[:4]) } - // If all checks out, manually set the head block. if !bc.chainmu.TryLock() { return errChainStopped @@ -800,7 +866,7 @@ func (bc *BlockChain) FastSyncCommitHead(hash common.Hash) error { // Destroy any existing state snapshot and regenerate it in the background, // also resuming the normal maintenance of any previously paused snapshot. if bc.snaps != nil { - bc.snaps.Rebuild(block.Root()) + bc.snaps.Rebuild(root) } log.Info("Committed new head block", "number", block.Number(), "hash", hash) return nil @@ -965,43 +1031,47 @@ func (bc *BlockChain) Stop() { log.Error("Failed to journal state snapshot", "err", err) } } - - // Ensure the state of a recent block is also stored to disk before exiting. - // We're writing three different states to catch different restart scenarios: - // - HEAD: So we don't need to reprocess any blocks in the general case - // - HEAD-1: So we don't do large reorgs if our HEAD becomes an uncle - // - HEAD-127: So we have a hard limit on the number of blocks reexecuted - if !bc.cacheConfig.TrieDirtyDisabled { - triedb := bc.stateCache.TrieDB() - - for _, offset := range []uint64{0, 1, uint64(bc.cacheConfig.TriesInMemory) - 1} { - if number := bc.CurrentBlock().NumberU64(); number > offset { - recent := bc.GetBlockByNumber(number - offset) - - log.Info("Writing cached state to disk", "block", recent.Number(), "hash", recent.Hash(), "root", recent.Root()) - if err := triedb.Commit(recent.Root(), true, nil); err != nil { + if bc.triedb.Scheme() == rawdb.PathScheme { + // Ensure that the in-memory trie nodes are journaled to disk properly. + if err := bc.triedb.Journal(bc.CurrentBlock().Root()); err != nil { + log.Info("Failed to journal in-memory trie nodes", "err", err) + } + } else { + // Ensure the state of a recent block is also stored to disk before exiting. + // We're writing three different states to catch different restart scenarios: + // - HEAD: So we don't need to reprocess any blocks in the general case + // - HEAD-1: So we don't do large reorgs if our HEAD becomes an uncle + // - HEAD-127: So we have a hard limit on the number of blocks reexecuted + if !bc.cacheConfig.TrieDirtyDisabled { + triedb := bc.triedb + + for _, offset := range []uint64{0, 1, uint64(bc.cacheConfig.TriesInMemory) - 1} { + if number := bc.CurrentBlock().NumberU64(); number > offset { + recent := bc.GetBlockByNumber(number - offset) + + log.Info("Writing cached state to disk", "block", recent.Number(), "hash", recent.Hash(), "root", recent.Root()) + if err := triedb.Commit(recent.Root(), true); err != nil { + log.Error("Failed to commit recent state trie", "err", err) + } + } + } + if snapBase != (common.Hash{}) { + log.Info("Writing snapshot state to disk", "root", snapBase) + if err := triedb.Commit(snapBase, true); err != nil { log.Error("Failed to commit recent state trie", "err", err) } } - } - if snapBase != (common.Hash{}) { - log.Info("Writing snapshot state to disk", "root", snapBase) - if err := triedb.Commit(snapBase, true, nil); err != nil { - log.Error("Failed to commit recent state trie", "err", err) + for !bc.triegc.Empty() { + triedb.Dereference(bc.triegc.PopItem().(common.Hash)) + } + if size, _ := triedb.Size(); size != 0 { + log.Error("Dangling trie nodes after full cleanup") } - } - for !bc.triegc.Empty() { - triedb.Dereference(bc.triegc.PopItem().(common.Hash)) - } - if size, _ := triedb.Size(); size != 0 { - log.Error("Dangling trie nodes after full cleanup") } } - // Ensure all live cached entries be saved into disk, so that we can skip - // cache warmup when node restarts. - if bc.cacheConfig.TrieCleanJournal != "" { - triedb := bc.stateCache.TrieDB() - triedb.SaveCache(bc.cacheConfig.TrieCleanJournal) + // Flush the collected preimages to disk + if err := bc.triedb.Close(); err != nil { + log.Error("Failed to close trie db", "err", err) } log.Info("Blockchain stopped") } @@ -1177,7 +1247,7 @@ func (bc *BlockChain) InsertReceiptChain(blockChain types.Blocks, receiptChain [ // The tx index data could not be written. // Roll back the ancient store update. fastBlock := bc.CurrentFastBlock().NumberU64() - if err := bc.db.TruncateAncients(fastBlock + 1); err != nil { + if _, err := bc.db.TruncateHead(fastBlock + 1); err != nil { log.Error("Can't truncate ancient store after failed insert", "err", err) } return 0, err @@ -1193,7 +1263,7 @@ func (bc *BlockChain) InsertReceiptChain(blockChain types.Blocks, receiptChain [ if !updateHead(blockChain[len(blockChain)-1]) { // We end up here if the header chain has reorg'ed, and the blocks/receipts // don't match the canonical chain. - if err := bc.db.TruncateAncients(previousFastBlock + 1); err != nil { + if _, err := bc.db.TruncateHead(previousFastBlock + 1); err != nil { log.Error("Can't truncate ancient store after failed insert", "err", err) } return 0, errSideChainReceipts @@ -1537,67 +1607,69 @@ func (bc *BlockChain) writeBlockWithState( } // Commit all cached state changes into underlying memory database. dirtyAccounts := state.DirtyAccounts(block.Hash(), block.NumberU64()) - root, err := state.Commit(bc.chainConfig.IsEIP158(block.Number())) + root, err := state.Commit(block.NumberU64(), bc.chainConfig.IsEIP158(block.Number())) if err != nil { return NonStatTy, err } - triedb := bc.stateCache.TrieDB() - - // If we're running an archive node, always flush - if bc.cacheConfig.TrieDirtyDisabled { - if err := triedb.Commit(root, false, nil); err != nil { - return NonStatTy, err - } - } else { - // Full but not archive node, do proper garbage collection - triedb.Reference(root, common.Hash{}) // metadata reference to keep trie alive - bc.triegc.Push(root, -int64(block.NumberU64())) - - triesInMemory := uint64(bc.cacheConfig.TriesInMemory) - if current := block.NumberU64(); current > triesInMemory { - // If we exceeded our memory allowance, flush matured singleton nodes to disk - var ( - nodes, imgs = triedb.Size() - limit = common.StorageSize(bc.cacheConfig.TrieDirtyLimit) * 1024 * 1024 - ) - if nodes > limit || imgs > 4*1024*1024 { - triedb.Cap(limit - ethdb.IdealBatchSize) + // If node is running in path mode, skip explicit gc operation + // which is unnecessary in this mode. + if bc.triedb.Scheme() != rawdb.PathScheme { + // If we're running an archive node, always flush + if bc.cacheConfig.TrieDirtyDisabled { + if err := bc.triedb.Commit(root, false); err != nil { + return NonStatTy, err } - // Find the next state trie we need to commit - chosen := current - triesInMemory - - // If we exceeded out time allowance, flush an entire trie to disk - if bc.gcproc > bc.cacheConfig.TrieTimeLimit { - // If the header is missing (canonical chain behind), we're reorging a low - // diff sidechain. Suspend committing until this operation is completed. - header := bc.GetHeaderByNumber(chosen) - if header == nil { - log.Warn("Reorg in progress, trie commit postponed", "number", chosen) - } else { - // If we're exceeding limits but haven't reached a large enough memory gap, - // warn the user that the system is becoming unstable. - if chosen < lastWrite+triesInMemory && bc.gcproc >= 2*bc.cacheConfig.TrieTimeLimit { - log.Info( - "State in memory for too long, committing", - "time", bc.gcproc, - "allowance", bc.cacheConfig.TrieTimeLimit, - "optimum", float64(chosen-lastWrite)/float64(triesInMemory), - ) + } else { + // Full but not archive node, do proper garbage collection + bc.triedb.Reference(root, common.Hash{}) // metadata reference to keep trie alive + bc.triegc.Push(root, -int64(block.NumberU64())) + + triesInMemory := uint64(bc.cacheConfig.TriesInMemory) + if current := block.NumberU64(); current > triesInMemory { + // If we exceeded our memory allowance, flush matured singleton nodes to disk + var ( + nodes, imgs = bc.triedb.Size() + limit = common.StorageSize(bc.cacheConfig.TrieDirtyLimit) * 1024 * 1024 + ) + if nodes > limit || imgs > 4*1024*1024 { + bc.triedb.Cap(limit - ethdb.IdealBatchSize) + } + // Find the next state trie we need to commit + chosen := current - triesInMemory + + // If we exceeded out time allowance, flush an entire trie to disk + if bc.gcproc > bc.cacheConfig.TrieTimeLimit { + // If the header is missing (canonical chain behind), we're reorging a low + // diff sidechain. Suspend committing until this operation is completed. + header := bc.GetHeaderByNumber(chosen) + if header == nil { + log.Warn("Reorg in progress, trie commit postponed", "number", chosen) + } else { + // If we're exceeding limits but haven't reached a large enough memory gap, + // warn the user that the system is becoming unstable. + if chosen < lastWrite+triesInMemory && bc.gcproc >= 2*bc.cacheConfig.TrieTimeLimit { + log.Info( + "State in memory for too long, committing", + "time", bc.gcproc, + "allowance", bc.cacheConfig.TrieTimeLimit, + "optimum", float64(chosen-lastWrite)/float64(triesInMemory), + ) + } + // Flush an entire trie and restart the counters + bc.triedb.Commit(header.Root, true) + lastWrite = chosen + bc.gcproc = 0 } - // Flush an entire trie and restart the counters - triedb.Commit(header.Root, true, nil) - lastWrite = chosen - bc.gcproc = 0 } - } - // Garbage collect anything below our required write retention - for !bc.triegc.Empty() { - root, number := bc.triegc.Pop() - if uint64(-number) > chosen { - bc.triegc.Push(root, number) - break + // Garbage collect anything below our required write retention + for !bc.triegc.Empty() { + root, number := bc.triegc.Pop() + if uint64(-number) > chosen { + bc.triegc.Push(root, number) + break + } + bc.triedb.Dereference(root.(common.Hash)) } - triedb.Dereference(root.(common.Hash)) } } } @@ -1998,8 +2070,9 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool, sidecars accountCommitTimer.Update(statedb.AccountCommits) // Account commits are complete, we can mark them storageCommitTimer.Update(statedb.StorageCommits) // Storage commits are complete, we can mark them snapshotCommitTimer.Update(statedb.SnapshotCommits) // Snapshot commits are complete, we can mark them + triedbCommitTimer.Update(statedb.TrieDBCommits) // Triedb commits are complete, we can mark them - blockWriteTimer.Update(time.Since(substart) - statedb.AccountCommits - statedb.StorageCommits - statedb.SnapshotCommits) + blockWriteTimer.Update(time.Since(substart) - statedb.AccountCommits - statedb.StorageCommits - statedb.SnapshotCommits - statedb.TrieDBCommits) blockInsertTimer.UpdateSince(start) blockTxsGauge.Update(int64(len(block.Transactions()))) blockGasUsedGauge.Update(int64(block.GasUsed())) @@ -2033,7 +2106,7 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool, sidecars stats.processed++ stats.usedGas += usedGas - dirty, _ := bc.stateCache.TrieDB().Size() + dirty, _ := bc.triedb.Size() stats.report(chain, it.index, dirty) } @@ -2147,6 +2220,13 @@ func (bc *BlockChain) insertSideChain(block *types.Block, it *insertIterator, si ) parent := it.previous() for parent != nil && !bc.HasState(parent.Root) { + // If this is in pathdb and the state is in the history, recover it + if bc.stateRecoverable(parent.Root) { + if err := bc.triedb.Recover(parent.Root); err != nil { + return 0, err + } + break + } hashes = append(hashes, parent.Hash()) numbers = append(numbers, parent.Number.Uint64()) diff --git a/core/blockchain_reader.go b/core/blockchain_reader.go index ff503f91ec..f9e7621d12 100644 --- a/core/blockchain_reader.go +++ b/core/blockchain_reader.go @@ -29,6 +29,7 @@ import ( "github.com/ethereum/go-ethereum/event" "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" ) // CurrentHeader retrieves the current head header of the canonical chain. The @@ -319,10 +320,21 @@ func (bc *BlockChain) HasBlockAndState(hash common.Hash, number uint64) bool { return bc.HasState(block.Root()) } -// TrieNode retrieves a blob of data associated with a trie node -// either from ephemeral in-memory cache, or from persistent storage. -func (bc *BlockChain) TrieNode(hash common.Hash) ([]byte, error) { - return bc.stateCache.TrieDB().Node(hash) +// stateRecoverable checks if the specified state is recoverable. +// Note, this function assumes the state is not present, because +// state is not treated as recoverable if it's available, thus +// false will be returned in this case. +func (bc *BlockChain) stateRecoverable(root common.Hash) bool { + if bc.triedb.Scheme() == rawdb.HashScheme { + return false + } + result, _ := bc.triedb.Recoverable(root) + return result +} + +// TrieDB retrieves the low level trie database used for data storage. +func (bc *BlockChain) TrieDB() *trie.Database { + return bc.triedb } // ContractCode retrieves a blob of data associated with a contract hash diff --git a/core/blockchain_repair_test.go b/core/blockchain_repair_test.go index d5e092a472..6d28624c03 100644 --- a/core/blockchain_repair_test.go +++ b/core/blockchain_repair_test.go @@ -24,6 +24,7 @@ import ( "io/ioutil" "math/big" "os" + "path" "testing" "time" @@ -33,6 +34,7 @@ import ( "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/core/vm" "github.com/ethereum/go-ethereum/params" + "github.com/ethereum/go-ethereum/trie" ) // Tests a recovery for a short canonical chain where a recent block was already @@ -1751,12 +1753,18 @@ func testLongReorgedFastSyncingDeepRepair(t *testing.T, snapshots bool) { } func testRepair(t *testing.T, tt *rewindTest, snapshots bool) { + testRepairWithScheme(t, tt, snapshots, rawdb.PathScheme) + testRepairWithScheme(t, tt, snapshots, rawdb.HashScheme) +} + +func testRepairWithScheme(t *testing.T, tt *rewindTest, snapshots bool, scheme string) { // It's hard to follow the test case, visualize the input //log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) // fmt.Println(tt.dump(true)) // Create a temporary persistent database datadir, err := ioutil.TempDir("", "") + ancient := path.Join(datadir, "ancient") if err != nil { t.Fatalf("Failed to create temporary datadir: %v", err) } @@ -1764,7 +1772,7 @@ func testRepair(t *testing.T, tt *rewindTest, snapshots bool) { db, err := rawdb.Open(rawdb.OpenOptions{ Directory: datadir, - AncientsDirectory: datadir, + AncientsDirectory: ancient, Ephemeral: true, }) if err != nil { @@ -1774,20 +1782,24 @@ func testRepair(t *testing.T, tt *rewindTest, snapshots bool) { // Initialize a fresh chain var ( - genesis = (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db) + gspec = &Genesis{Config: params.TestChainConfig, BaseFee: big.NewInt(params.InitialBaseFee)} + triedb = trie.NewDatabase(db, nil) + genesis = gspec.MustCommit(db, triedb) engine = ethash.NewFullFaker() config = &CacheConfig{ TrieCleanLimit: 256, TrieDirtyLimit: 256, TrieTimeLimit: 5 * time.Minute, SnapshotLimit: 0, // Disable snapshot by default + StateScheme: scheme, } ) + triedb.Close() if snapshots { config.SnapshotLimit = 256 config.SnapshotWait = true } - chain, err := NewBlockChain(db, config, params.AllEthashProtocolChanges, engine, vm.Config{}, nil, nil) + chain, err := NewBlockChain(db, config, gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to create chain: %v", err) } @@ -1809,7 +1821,7 @@ func testRepair(t *testing.T, tt *rewindTest, snapshots bool) { t.Fatalf("Failed to import canonical chain start: %v", err) } if tt.commitBlock > 0 { - chain.stateCache.TrieDB().Commit(canonblocks[tt.commitBlock-1].Root(), true, nil) + chain.triedb.Commit(canonblocks[tt.commitBlock-1].Root(), true) if snapshots { if err := chain.snaps.Cap(canonblocks[tt.commitBlock-1].Root(), 0); err != nil { t.Fatalf("Failed to flatten snapshots: %v", err) @@ -1832,11 +1844,12 @@ func testRepair(t *testing.T, tt *rewindTest, snapshots bool) { } // Pull the plug on the database, simulating a hard crash db.Close() + chain.triedb.Close() // Start a new blockchain back up and see where the repair leads us db, err = rawdb.Open(rawdb.OpenOptions{ Directory: datadir, - AncientsDirectory: datadir, + AncientsDirectory: ancient, Ephemeral: true, }) @@ -1845,7 +1858,7 @@ func testRepair(t *testing.T, tt *rewindTest, snapshots bool) { } defer db.Close() - chain, err = NewBlockChain(db, nil, params.AllEthashProtocolChanges, engine, vm.Config{}, nil, nil) + chain, err = NewBlockChain(db, config, gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to recreate chain: %v", err) } @@ -1888,11 +1901,17 @@ func testRepair(t *testing.T, tt *rewindTest, snapshots bool) { // In this case the snapshot layer of B3 is not created because of existent // state. func TestIssue23496(t *testing.T) { + //testIssue23496(t, rawdb.HashScheme) + testIssue23496(t, rawdb.PathScheme) +} + +func testIssue23496(t *testing.T, scheme string) { // It's hard to follow the test case, visualize the input //log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) // Create a temporary persistent database datadir, err := ioutil.TempDir("", "") + ancient := path.Join(datadir, "ancient") if err != nil { t.Fatalf("Failed to create temporary datadir: %v", err) } @@ -1900,7 +1919,7 @@ func TestIssue23496(t *testing.T) { db, err := rawdb.Open(rawdb.OpenOptions{ Directory: datadir, - AncientsDirectory: datadir, + AncientsDirectory: ancient, }) if err != nil { @@ -1910,7 +1929,9 @@ func TestIssue23496(t *testing.T) { // Initialize a fresh chain var ( - genesis = (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db) + gspec = &Genesis{Config: params.TestChainConfig, BaseFee: big.NewInt(params.InitialBaseFee)} + triedb = trie.NewDatabase(db, nil) + genesis = gspec.MustCommit(db, triedb) engine = ethash.NewFullFaker() config = &CacheConfig{ TrieCleanLimit: 256, @@ -1918,9 +1939,11 @@ func TestIssue23496(t *testing.T) { TrieTimeLimit: 5 * time.Minute, SnapshotLimit: 256, SnapshotWait: true, + StateScheme: scheme, } ) - chain, err := NewBlockChain(db, config, params.AllEthashProtocolChanges, engine, vm.Config{}, nil, nil) + triedb.Close() + chain, err := NewBlockChain(db, config, gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to create chain: %v", err) } @@ -1933,7 +1956,7 @@ func TestIssue23496(t *testing.T) { if _, err := chain.InsertChain(blocks[:1], nil); err != nil { t.Fatalf("Failed to import canonical chain start: %v", err) } - chain.stateCache.TrieDB().Commit(blocks[0].Root(), true, nil) + chain.triedb.Commit(blocks[0].Root(), true) // Insert block B2 and commit the snapshot into disk if _, err := chain.InsertChain(blocks[1:2], nil); err != nil { @@ -1947,20 +1970,20 @@ func TestIssue23496(t *testing.T) { if _, err := chain.InsertChain(blocks[2:3], nil); err != nil { t.Fatalf("Failed to import canonical chain start: %v", err) } - chain.stateCache.TrieDB().Commit(blocks[2].Root(), true, nil) + chain.triedb.Commit(blocks[2].Root(), true) // Insert the remaining blocks if _, err := chain.InsertChain(blocks[3:], nil); err != nil { t.Fatalf("Failed to import canonical chain tail: %v", err) } - // Pull the plug on the database, simulating a hard crash db.Close() + chain.triedb.Close() // Start a new blockchain back up and see where the repair leads us db, err = rawdb.Open(rawdb.OpenOptions{ Directory: datadir, - AncientsDirectory: datadir, + AncientsDirectory: ancient, Ephemeral: true, }) if err != nil { @@ -1968,7 +1991,7 @@ func TestIssue23496(t *testing.T) { } defer db.Close() - chain, err = NewBlockChain(db, nil, params.AllEthashProtocolChanges, engine, vm.Config{}, nil, nil) + chain, err = NewBlockChain(db, config, gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to recreate chain: %v", err) } @@ -1980,8 +2003,12 @@ func TestIssue23496(t *testing.T) { if head := chain.CurrentFastBlock(); head.NumberU64() != uint64(4) { t.Errorf("Head fast block mismatch: have %d, want %d", head.NumberU64(), uint64(4)) } - if head := chain.CurrentBlock(); head.NumberU64() != uint64(1) { - t.Errorf("Head block mismatch: have %d, want %d", head.NumberU64(), uint64(1)) + expHead := uint64(1) + if scheme == rawdb.PathScheme { + expHead = uint64(2) + } + if head := chain.CurrentBlock(); head.NumberU64() != expHead { + t.Errorf("Head block mismatch: have %d, want %d", head.NumberU64(), expHead) } // Reinsert B2-B4 diff --git a/core/blockchain_sethead_test.go b/core/blockchain_sethead_test.go index 02d1fea76b..8d48e3134e 100644 --- a/core/blockchain_sethead_test.go +++ b/core/blockchain_sethead_test.go @@ -24,6 +24,7 @@ import ( "io/ioutil" "math/big" "os" + "path" "strings" "testing" "time" @@ -31,9 +32,13 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/consensus/ethash" "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/state" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/core/vm" "github.com/ethereum/go-ethereum/params" + "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/triedb/hashdb" + "github.com/ethereum/go-ethereum/trie/triedb/pathdb" ) // rewindTest is a test case for chain rollback upon user request. @@ -1950,12 +1955,18 @@ func testLongReorgedFastSyncingDeepSetHead(t *testing.T, snapshots bool) { } func testSetHead(t *testing.T, tt *rewindTest, snapshots bool) { + testSetHeadWithScheme(t, tt, snapshots, rawdb.PathScheme) + testSetHeadWithScheme(t, tt, snapshots, rawdb.HashScheme) +} + +func testSetHeadWithScheme(t *testing.T, tt *rewindTest, snapshots bool, scheme string) { // It's hard to follow the test case, visualize the input // log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) // fmt.Println(tt.dump(false)) // Create a temporary persistent database datadir, err := ioutil.TempDir("", "") + ancient := path.Join(datadir, "ancient") if err != nil { t.Fatalf("Failed to create temporary datadir: %v", err) } @@ -1963,7 +1974,7 @@ func testSetHead(t *testing.T, tt *rewindTest, snapshots bool) { db, err := rawdb.Open(rawdb.OpenOptions{ Directory: datadir, - AncientsDirectory: datadir, + AncientsDirectory: ancient, Ephemeral: true, }) if err != nil { @@ -1973,20 +1984,24 @@ func testSetHead(t *testing.T, tt *rewindTest, snapshots bool) { // Initialize a fresh chain var ( - genesis = (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db) + gspec = &Genesis{Config: params.TestChainConfig, BaseFee: big.NewInt(params.InitialBaseFee)} + triedb = trie.NewDatabase(db, newDbConfig(scheme)) + genesis = gspec.MustCommit(db, triedb) engine = ethash.NewFullFaker() config = &CacheConfig{ TrieCleanLimit: 256, TrieDirtyLimit: 256, TrieTimeLimit: 5 * time.Minute, SnapshotLimit: 0, // Disable snapshot + StateScheme: scheme, } ) + triedb.Close() if snapshots { config.SnapshotLimit = 256 config.SnapshotWait = true } - chain, err := NewBlockChain(db, config, params.AllEthashProtocolChanges, engine, vm.Config{}, nil, nil) + chain, err := NewBlockChain(db, config, gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to create chain: %v", err) } @@ -2008,7 +2023,7 @@ func testSetHead(t *testing.T, tt *rewindTest, snapshots bool) { t.Fatalf("Failed to import canonical chain start: %v", err) } if tt.commitBlock > 0 { - chain.stateCache.TrieDB().Commit(canonblocks[tt.commitBlock-1].Root(), true, nil) + chain.triedb.Commit(canonblocks[tt.commitBlock-1].Root(), true) if snapshots { if err := chain.snaps.Cap(canonblocks[tt.commitBlock-1].Root(), 0); err != nil { t.Fatalf("Failed to flatten snapshots: %v", err) @@ -2018,13 +2033,17 @@ func testSetHead(t *testing.T, tt *rewindTest, snapshots bool) { if _, err := chain.InsertChain(canonblocks[tt.commitBlock:], nil); err != nil { t.Fatalf("Failed to import canonical chain tail: %v", err) } - // Manually dereference anything not committed to not have to work with 128+ tries - for _, block := range sideblocks { - chain.stateCache.TrieDB().Dereference(block.Root()) - } - for _, block := range canonblocks { - chain.stateCache.TrieDB().Dereference(block.Root()) + // Reopen the trie database without persisting in-memory dirty nodes. + chain.triedb.Close() + dbconfig := &trie.Config{} + if scheme == rawdb.PathScheme { + dbconfig.PathDB = pathdb.Defaults + + } else { + dbconfig.HashDB = hashdb.Defaults } + chain.triedb = trie.NewDatabase(chain.db, dbconfig) + chain.stateCache = state.NewDatabaseWithNodeDB(chain.db, chain.triedb) // Force run a freeze cycle type freezer interface { Freeze(threshold uint64) error diff --git a/core/blockchain_snapshot_test.go b/core/blockchain_snapshot_test.go index 10d9f37ddf..d9f54e4a1f 100644 --- a/core/blockchain_snapshot_test.go +++ b/core/blockchain_snapshot_test.go @@ -22,9 +22,9 @@ package core import ( "bytes" "fmt" - "io/ioutil" "math/big" "os" + "path" "strings" "testing" "time" @@ -40,6 +40,7 @@ import ( // snapshotTestBasic wraps the common testing fields in the snapshot tests. type snapshotTestBasic struct { + scheme string // Disk scheme used for storing trie nodes chainBlocks int // Number of blocks to generate for the canonical chain snapshotBlock uint64 // Block number of the relevant snapshot disk layer commitBlock uint64 // Block number for which to commit the state to disk @@ -52,43 +53,38 @@ type snapshotTestBasic struct { // share fields, set in runtime datadir string + ancient string db ethdb.Database - gendb ethdb.Database + genDb ethdb.Database engine consensus.Engine + gspec *Genesis } func (basic *snapshotTestBasic) prepare(t *testing.T) (*BlockChain, []*types.Block) { // Create a temporary persistent database - datadir, err := ioutil.TempDir("", "") - if err != nil { - t.Fatalf("Failed to create temporary datadir: %v", err) - } - os.RemoveAll(datadir) + datadir := t.TempDir() + ancient := path.Join(datadir, "ancient") db, err := rawdb.Open(rawdb.OpenOptions{ Directory: datadir, - AncientsDirectory: datadir, - Ephemeral: true, + AncientsDirectory: ancient, }) if err != nil { t.Fatalf("Failed to create persistent database: %v", err) } // Initialize a fresh chain var ( - genesis = (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db) - engine = ethash.NewFullFaker() - gendb = rawdb.NewMemoryDatabase() - - // Snapshot is enabled, the first snapshot is created from the Genesis. - // The snapshot memory allowance is 256MB, it means no snapshot flush - // will happen during the block insertion. - cacheConfig = defaultCacheConfig + gspec = &Genesis{ + BaseFee: big.NewInt(params.InitialBaseFee), + Config: params.AllEthashProtocolChanges, + } + engine = ethash.NewFullFaker() ) - chain, err := NewBlockChain(db, cacheConfig, params.AllEthashProtocolChanges, engine, vm.Config{}, nil, nil) + chain, err := NewBlockChain(db, DefaultCacheConfigWithScheme(basic.scheme), gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to create chain: %v", err) } - blocks, _ := GenerateChain(params.TestChainConfig, genesis, engine, gendb, basic.chainBlocks, func(i int, b *BlockGen) {}, true) + genDb, blocks, _ := GenerateChainWithGenesis(gspec, engine, basic.chainBlocks, func(i int, b *BlockGen) {}) // Insert the blocks with configured settings. var breakpoints []uint64 @@ -105,7 +101,7 @@ func (basic *snapshotTestBasic) prepare(t *testing.T) (*BlockChain, []*types.Blo startPoint = point if basic.commitBlock > 0 && basic.commitBlock == point { - chain.stateCache.TrieDB().Commit(blocks[point-1].Root(), true, nil) + chain.TrieDB().Commit(blocks[point-1].Root(), false) } if basic.snapshotBlock > 0 && basic.snapshotBlock == point { // Flushing the entire snap tree into the disk, the @@ -124,9 +120,11 @@ func (basic *snapshotTestBasic) prepare(t *testing.T) (*BlockChain, []*types.Blo // Set runtime fields basic.datadir = datadir + basic.ancient = ancient basic.db = db - basic.gendb = gendb + basic.genDb = genDb basic.engine = engine + basic.gspec = gspec return chain, blocks } @@ -138,17 +136,17 @@ func (basic *snapshotTestBasic) verify(t *testing.T, chain *BlockChain, blocks [ if head := chain.CurrentHeader(); head.Number.Uint64() != basic.expHeadHeader { t.Errorf("Head header mismatch: have %d, want %d", head.Number, basic.expHeadHeader) } - if head := chain.CurrentFastBlock(); head.NumberU64() != basic.expHeadFastBlock { - t.Errorf("Head fast block mismatch: have %d, want %d", head.NumberU64(), basic.expHeadFastBlock) + if head := chain.CurrentFastBlock(); head.Number().Uint64() != basic.expHeadFastBlock { + t.Errorf("Head fast block mismatch: have %d, want %d", head.Number(), basic.expHeadFastBlock) } - if head := chain.CurrentBlock(); head.NumberU64() != basic.expHeadBlock { - t.Errorf("Head block mismatch: have %d, want %d", head.NumberU64(), basic.expHeadBlock) + if head := chain.CurrentBlock(); head.Number().Uint64() != basic.expHeadBlock { + t.Errorf("Head block mismatch: have %d, want %d", head.Number(), basic.expHeadBlock) } // Check the disk layer, ensure they are matched block := chain.GetBlockByNumber(basic.expSnapshotBottom) if block == nil { - t.Errorf("The correspnding block[%d] of snapshot disk layer is missing", basic.expSnapshotBottom) + t.Errorf("The corresponding block[%d] of snapshot disk layer is missing", basic.expSnapshotBottom) } else if !bytes.Equal(chain.snaps.DiskRoot().Bytes(), block.Root().Bytes()) { t.Errorf("The snapshot disk layer root is incorrect, want %x, get %x", block.Root(), chain.snaps.DiskRoot()) } @@ -159,6 +157,7 @@ func (basic *snapshotTestBasic) verify(t *testing.T, chain *BlockChain, blocks [ } } +//nolint:unused func (basic *snapshotTestBasic) dump() string { buffer := new(strings.Builder) @@ -209,8 +208,9 @@ func (basic *snapshotTestBasic) dump() string { func (basic *snapshotTestBasic) teardown() { basic.db.Close() - basic.gendb.Close() + basic.genDb.Close() os.RemoveAll(basic.datadir) + os.RemoveAll(basic.ancient) } // snapshotTest is a test case type for normal snapshot recovery. @@ -227,7 +227,7 @@ func (snaptest *snapshotTest) test(t *testing.T) { // Restart the chain normally chain.Stop() - newchain, err := NewBlockChain(snaptest.db, nil, params.AllEthashProtocolChanges, snaptest.engine, vm.Config{}, nil, nil) + newchain, err := NewBlockChain(snaptest.db, DefaultCacheConfigWithScheme(snaptest.scheme), snaptest.gspec, nil, snaptest.engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to recreate chain: %v", err) } @@ -236,7 +236,7 @@ func (snaptest *snapshotTest) test(t *testing.T) { snaptest.verify(t, newchain, blocks) } -// crashSnapshotTest is a test case type for innormal snapshot recovery. +// crashSnapshotTest is a test case type for irregular snapshot recovery. // It can be used for testing that restart Geth after the crash. type crashSnapshotTest struct { snapshotTestBasic @@ -251,14 +251,13 @@ func (snaptest *crashSnapshotTest) test(t *testing.T) { // Pull the plug on the database, simulating a hard crash db := chain.db db.Close() + chain.triedb.Close() // Start a new blockchain back up and see where the repair leads us newdb, err := rawdb.Open(rawdb.OpenOptions{ Directory: snaptest.datadir, - AncientsDirectory: snaptest.datadir, - Ephemeral: true, + AncientsDirectory: snaptest.ancient, }) - if err != nil { t.Fatalf("Failed to reopen persistent database: %v", err) } @@ -268,13 +267,13 @@ func (snaptest *crashSnapshotTest) test(t *testing.T) { // the crash, we do restart twice here: one after the crash and one // after the normal stop. It's used to ensure the broken snapshot // can be detected all the time. - newchain, err := NewBlockChain(newdb, nil, params.AllEthashProtocolChanges, snaptest.engine, vm.Config{}, nil, nil) + newchain, err := NewBlockChain(newdb, DefaultCacheConfigWithScheme(snaptest.scheme), snaptest.gspec, nil, snaptest.engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to recreate chain: %v", err) } newchain.Stop() - newchain, err = NewBlockChain(newdb, nil, params.AllEthashProtocolChanges, snaptest.engine, vm.Config{}, nil, nil) + newchain, err = NewBlockChain(newdb, DefaultCacheConfigWithScheme(snaptest.scheme), snaptest.gspec, nil, snaptest.engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to recreate chain: %v", err) } @@ -301,7 +300,7 @@ func (snaptest *gappedSnapshotTest) test(t *testing.T) { // Insert blocks without enabling snapshot if gapping is required. chain.Stop() - gappedBlocks, _ := GenerateChain(params.TestChainConfig, blocks[len(blocks)-1], snaptest.engine, snaptest.gendb, snaptest.gapped, func(i int, b *BlockGen) {}, true) + gappedBlocks, _ := GenerateChain(snaptest.gspec.Config, blocks[len(blocks)-1], snaptest.engine, snaptest.genDb, snaptest.gapped, func(i int, b *BlockGen) {}, false) // Insert a few more blocks without enabling snapshot var cacheConfig = &CacheConfig{ @@ -309,8 +308,9 @@ func (snaptest *gappedSnapshotTest) test(t *testing.T) { TrieDirtyLimit: 256, TrieTimeLimit: 5 * time.Minute, SnapshotLimit: 0, + StateScheme: snaptest.scheme, } - newchain, err := NewBlockChain(snaptest.db, cacheConfig, params.AllEthashProtocolChanges, snaptest.engine, vm.Config{}, nil, nil) + newchain, err := NewBlockChain(snaptest.db, cacheConfig, snaptest.gspec, nil, snaptest.engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to recreate chain: %v", err) } @@ -318,7 +318,7 @@ func (snaptest *gappedSnapshotTest) test(t *testing.T) { newchain.Stop() // Restart the chain with enabling the snapshot - newchain, err = NewBlockChain(snaptest.db, nil, params.AllEthashProtocolChanges, snaptest.engine, vm.Config{}, nil, nil) + newchain, err = NewBlockChain(snaptest.db, DefaultCacheConfigWithScheme(snaptest.scheme), snaptest.gspec, nil, snaptest.engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to recreate chain: %v", err) } @@ -346,55 +346,7 @@ func (snaptest *setHeadSnapshotTest) test(t *testing.T) { chain.SetHead(snaptest.setHead) chain.Stop() - newchain, err := NewBlockChain(snaptest.db, nil, params.AllEthashProtocolChanges, snaptest.engine, vm.Config{}, nil, nil) - if err != nil { - t.Fatalf("Failed to recreate chain: %v", err) - } - defer newchain.Stop() - - snaptest.verify(t, newchain, blocks) -} - -// restartCrashSnapshotTest is the test type used to test this scenario: -// - have a complete snapshot -// - restart chain -// - insert more blocks with enabling the snapshot -// - commit the snapshot -// - crash -// - restart again -type restartCrashSnapshotTest struct { - snapshotTestBasic - newBlocks int -} - -func (snaptest *restartCrashSnapshotTest) test(t *testing.T) { - // It's hard to follow the test case, visualize the input - // log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) - // fmt.Println(tt.dump()) - chain, blocks := snaptest.prepare(t) - - // Firstly, stop the chain properly, with all snapshot journal - // and state committed. - chain.Stop() - - newchain, err := NewBlockChain(snaptest.db, nil, params.AllEthashProtocolChanges, snaptest.engine, vm.Config{}, nil, nil) - if err != nil { - t.Fatalf("Failed to recreate chain: %v", err) - } - newBlocks, _ := GenerateChain(params.TestChainConfig, blocks[len(blocks)-1], snaptest.engine, snaptest.gendb, snaptest.newBlocks, func(i int, b *BlockGen) {}, true) - newchain.InsertChain(newBlocks, nil) - - // Commit the entire snapshot into the disk if requested. Note only - // (a) snapshot root and (b) snapshot generator will be committed, - // the diff journal is not. - newchain.Snapshots().Cap(newBlocks[len(newBlocks)-1].Root(), 0) - - // Simulate the blockchain crash - // Don't call chain.Stop here, so that no snapshot - // journal and latest state will be committed - - // Restart the chain after the crash - newchain, err = NewBlockChain(snaptest.db, nil, params.AllEthashProtocolChanges, snaptest.engine, vm.Config{}, nil, nil) + newchain, err := NewBlockChain(snaptest.db, DefaultCacheConfigWithScheme(snaptest.scheme), snaptest.gspec, nil, snaptest.engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to recreate chain: %v", err) } @@ -428,34 +380,39 @@ func (snaptest *wipeCrashSnapshotTest) test(t *testing.T) { TrieDirtyLimit: 256, TrieTimeLimit: 5 * time.Minute, SnapshotLimit: 0, + StateScheme: snaptest.scheme, } - newchain, err := NewBlockChain(snaptest.db, config, params.AllEthashProtocolChanges, snaptest.engine, vm.Config{}, nil, nil) + newchain, err := NewBlockChain(snaptest.db, config, snaptest.gspec, nil, snaptest.engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to recreate chain: %v", err) } - newBlocks, _ := GenerateChain(params.TestChainConfig, blocks[len(blocks)-1], snaptest.engine, snaptest.gendb, snaptest.newBlocks, func(i int, b *BlockGen) {}, true) + newBlocks, _ := GenerateChain(snaptest.gspec.Config, blocks[len(blocks)-1], snaptest.engine, snaptest.genDb, snaptest.newBlocks, func(i int, b *BlockGen) {}, false) newchain.InsertChain(newBlocks, nil) newchain.Stop() - // Restart the chain, the wiper should starts working + // Restart the chain, the wiper should start working config = &CacheConfig{ TrieCleanLimit: 256, TrieDirtyLimit: 256, TrieTimeLimit: 5 * time.Minute, SnapshotLimit: 256, SnapshotWait: false, // Don't wait rebuild + StateScheme: snaptest.scheme, } - newchain, err = NewBlockChain(snaptest.db, config, params.AllEthashProtocolChanges, snaptest.engine, vm.Config{}, nil, nil) + tmp, err := NewBlockChain(snaptest.db, config, snaptest.gspec, nil, snaptest.engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to recreate chain: %v", err) } + // Simulate the blockchain crash. + tmp.triedb.Close() - newchain, err = NewBlockChain(snaptest.db, nil, params.AllEthashProtocolChanges, snaptest.engine, vm.Config{}, nil, nil) + newchain, err = NewBlockChain(snaptest.db, DefaultCacheConfigWithScheme(snaptest.scheme), snaptest.gspec, nil, snaptest.engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to recreate chain: %v", err) } snaptest.verify(t, newchain, blocks) + newchain.Stop() } // Tests a Geth restart with valid snapshot. Before the shutdown, all snapshot @@ -479,20 +436,23 @@ func TestRestartWithNewSnapshot(t *testing.T) { // Expected head fast block: C8 // Expected head block : C8 // Expected snapshot disk : G - test := &snapshotTest{ - snapshotTestBasic{ - chainBlocks: 8, - snapshotBlock: 0, - commitBlock: 0, - expCanonicalBlocks: 8, - expHeadHeader: 8, - expHeadFastBlock: 8, - expHeadBlock: 8, - expSnapshotBottom: 0, // Initial disk layer built from genesis - }, - } - test.test(t) - test.teardown() + for _, scheme := range []string{rawdb.HashScheme, rawdb.PathScheme} { + test := &snapshotTest{ + snapshotTestBasic{ + scheme: scheme, + chainBlocks: 8, + snapshotBlock: 0, + commitBlock: 0, + expCanonicalBlocks: 8, + expHeadHeader: 8, + expHeadFastBlock: 8, + expHeadBlock: 8, + expSnapshotBottom: 0, // Initial disk layer built from genesis + }, + } + test.test(t) + test.teardown() + } } // Tests a Geth was crashed and restarts with a broken snapshot. In this case the @@ -518,20 +478,23 @@ func TestNoCommitCrashWithNewSnapshot(t *testing.T) { // Expected head fast block: C8 // Expected head block : G // Expected snapshot disk : C4 - test := &crashSnapshotTest{ - snapshotTestBasic{ - chainBlocks: 8, - snapshotBlock: 4, - commitBlock: 0, - expCanonicalBlocks: 8, - expHeadHeader: 8, - expHeadFastBlock: 8, - expHeadBlock: 0, - expSnapshotBottom: 4, // Last committed disk layer, wait recovery - }, - } - test.test(t) - test.teardown() + for _, scheme := range []string{rawdb.HashScheme, rawdb.PathScheme} { + test := &crashSnapshotTest{ + snapshotTestBasic{ + scheme: scheme, + chainBlocks: 8, + snapshotBlock: 4, + commitBlock: 0, + expCanonicalBlocks: 8, + expHeadHeader: 8, + expHeadFastBlock: 8, + expHeadBlock: 0, + expSnapshotBottom: 4, // Last committed disk layer, wait recovery + }, + } + test.test(t) + test.teardown() + } } // Tests a Geth was crashed and restarts with a broken snapshot. In this case the @@ -557,20 +520,23 @@ func TestLowCommitCrashWithNewSnapshot(t *testing.T) { // Expected head fast block: C8 // Expected head block : C2 // Expected snapshot disk : C4 - test := &crashSnapshotTest{ - snapshotTestBasic{ - chainBlocks: 8, - snapshotBlock: 4, - commitBlock: 2, - expCanonicalBlocks: 8, - expHeadHeader: 8, - expHeadFastBlock: 8, - expHeadBlock: 2, - expSnapshotBottom: 4, // Last committed disk layer, wait recovery - }, - } - test.test(t) - test.teardown() + for _, scheme := range []string{rawdb.HashScheme, rawdb.PathScheme} { + test := &crashSnapshotTest{ + snapshotTestBasic{ + scheme: scheme, + chainBlocks: 8, + snapshotBlock: 4, + commitBlock: 2, + expCanonicalBlocks: 8, + expHeadHeader: 8, + expHeadFastBlock: 8, + expHeadBlock: 2, + expSnapshotBottom: 4, // Last committed disk layer, wait recovery + }, + } + test.test(t) + test.teardown() + } } // Tests a Geth was crashed and restarts with a broken snapshot. In this case @@ -596,20 +562,27 @@ func TestHighCommitCrashWithNewSnapshot(t *testing.T) { // Expected head fast block: C8 // Expected head block : G // Expected snapshot disk : C4 - test := &crashSnapshotTest{ - snapshotTestBasic{ - chainBlocks: 8, - snapshotBlock: 4, - commitBlock: 6, - expCanonicalBlocks: 8, - expHeadHeader: 8, - expHeadFastBlock: 8, - expHeadBlock: 0, - expSnapshotBottom: 4, // Last committed disk layer, wait recovery - }, - } - test.test(t) - test.teardown() + for _, scheme := range []string{rawdb.HashScheme, rawdb.PathScheme} { + expHead := uint64(0) + if scheme == rawdb.PathScheme { + expHead = uint64(4) + } + test := &crashSnapshotTest{ + snapshotTestBasic{ + scheme: scheme, + chainBlocks: 8, + snapshotBlock: 4, + commitBlock: 6, + expCanonicalBlocks: 8, + expHeadHeader: 8, + expHeadFastBlock: 8, + expHeadBlock: expHead, + expSnapshotBottom: 4, // Last committed disk layer, wait recovery + }, + } + test.test(t) + test.teardown() + } } // Tests a Geth was running with snapshot enabled. Then restarts without @@ -633,21 +606,24 @@ func TestGappedNewSnapshot(t *testing.T) { // Expected head fast block: C10 // Expected head block : C10 // Expected snapshot disk : C10 - test := &gappedSnapshotTest{ - snapshotTestBasic: snapshotTestBasic{ - chainBlocks: 8, - snapshotBlock: 0, - commitBlock: 0, - expCanonicalBlocks: 10, - expHeadHeader: 10, - expHeadFastBlock: 10, - expHeadBlock: 10, - expSnapshotBottom: 10, // Rebuilt snapshot from the latest HEAD - }, - gapped: 2, - } - test.test(t) - test.teardown() + for _, scheme := range []string{rawdb.HashScheme, rawdb.PathScheme} { + test := &gappedSnapshotTest{ + snapshotTestBasic: snapshotTestBasic{ + scheme: scheme, + chainBlocks: 8, + snapshotBlock: 0, + commitBlock: 0, + expCanonicalBlocks: 10, + expHeadHeader: 10, + expHeadFastBlock: 10, + expHeadBlock: 10, + expSnapshotBottom: 10, // Rebuilt snapshot from the latest HEAD + }, + gapped: 2, + } + test.test(t) + test.teardown() + } } // Tests the Geth was running with snapshot enabled and resetHead is applied. @@ -671,21 +647,24 @@ func TestSetHeadWithNewSnapshot(t *testing.T) { // Expected head fast block: C4 // Expected head block : C4 // Expected snapshot disk : G - test := &setHeadSnapshotTest{ - snapshotTestBasic: snapshotTestBasic{ - chainBlocks: 8, - snapshotBlock: 0, - commitBlock: 0, - expCanonicalBlocks: 4, - expHeadHeader: 4, - expHeadFastBlock: 4, - expHeadBlock: 4, - expSnapshotBottom: 0, // The initial disk layer is built from the genesis - }, - setHead: 4, - } - test.test(t) - test.teardown() + for _, scheme := range []string{rawdb.HashScheme, rawdb.PathScheme} { + test := &setHeadSnapshotTest{ + snapshotTestBasic: snapshotTestBasic{ + scheme: scheme, + chainBlocks: 8, + snapshotBlock: 0, + commitBlock: 0, + expCanonicalBlocks: 4, + expHeadHeader: 4, + expHeadFastBlock: 4, + expHeadBlock: 4, + expSnapshotBottom: 0, // The initial disk layer is built from the genesis + }, + setHead: 4, + } + test.test(t) + test.teardown() + } } // Tests the Geth was running with a complete snapshot and then imports a few @@ -709,19 +688,22 @@ func TestRecoverSnapshotFromWipingCrash(t *testing.T) { // Expected head fast block: C10 // Expected head block : C8 // Expected snapshot disk : C10 - test := &wipeCrashSnapshotTest{ - snapshotTestBasic: snapshotTestBasic{ - chainBlocks: 8, - snapshotBlock: 4, - commitBlock: 0, - expCanonicalBlocks: 10, - expHeadHeader: 10, - expHeadFastBlock: 10, - expHeadBlock: 10, - expSnapshotBottom: 10, - }, - newBlocks: 2, - } - test.test(t) - test.teardown() + for _, scheme := range []string{rawdb.HashScheme, rawdb.PathScheme} { + test := &wipeCrashSnapshotTest{ + snapshotTestBasic: snapshotTestBasic{ + scheme: scheme, + chainBlocks: 8, + snapshotBlock: 4, + commitBlock: 0, + expCanonicalBlocks: 10, + expHeadHeader: 10, + expHeadFastBlock: 10, + expHeadBlock: 10, + expSnapshotBottom: 10, + }, + newBlocks: 2, + } + test.test(t) + test.teardown() + } } diff --git a/core/blockchain_test.go b/core/blockchain_test.go index 265393a699..c24c700593 100644 --- a/core/blockchain_test.go +++ b/core/blockchain_test.go @@ -57,14 +57,15 @@ var ( // newCanonical creates a chain database, and injects a deterministic canonical // chain. Depending on the full flag, if creates either a full block chain or a // header only chain. -func newCanonical(engine consensus.Engine, n int, full bool) (ethdb.Database, *BlockChain, error) { +func newCanonical(engine consensus.Engine, n int, full bool, scheme string) (ethdb.Database, *BlockChain, error) { var ( db = rawdb.NewMemoryDatabase() - genesis = (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db) + gspec = &Genesis{Config: params.TestChainConfig, BaseFee: big.NewInt(params.InitialBaseFee)} + triedb = trie.NewDatabase(db, nil) + genesis = gspec.MustCommit(db, triedb) ) - // Initialize a fresh chain with only a genesis block - blockchain, _ := NewBlockChain(db, nil, params.AllEthashProtocolChanges, engine, vm.Config{}, nil, nil) + blockchain, _ := NewBlockChain(db, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{}, nil, nil) // Create and inject the requested chain if n == 0 { return db, blockchain, nil @@ -86,9 +87,9 @@ func newGwei(n int64) *big.Int { } // Test fork of length N starting from block i -func testFork(t *testing.T, blockchain *BlockChain, i, n int, full bool, comparator func(td1, td2 *big.Int)) { +func testFork(t *testing.T, blockchain *BlockChain, i, n int, full bool, comparator func(td1, td2 *big.Int), scheme string) { // Copy old chain up to #i into a new db - db, blockchain2, err := newCanonical(ethash.NewFaker(), i, full) + db, blockchain2, err := newCanonical(ethash.NewFaker(), i, full, scheme) if err != nil { t.Fatal("could not make new canonical in testFork", err) } @@ -179,7 +180,7 @@ func testBlockChainImport(chain types.Blocks, blockchain *BlockChain) error { blockchain.chainmu.MustLock() rawdb.WriteTd(blockchain.db, block.Hash(), block.NumberU64(), new(big.Int).Add(block.Difficulty(), blockchain.GetTd(block.ParentHash(), block.NumberU64()-1))) rawdb.WriteBlock(blockchain.db, block) - statedb.Commit(false) + statedb.Commit(block.NumberU64(), false) blockchain.chainmu.Unlock() } return nil @@ -203,7 +204,12 @@ func testHeaderChainImport(chain []*types.Header, blockchain *BlockChain) error } func TestLastBlock(t *testing.T) { - _, blockchain, err := newCanonical(ethash.NewFaker(), 0, true) + testLastBlock(t, rawdb.HashScheme) + testLastBlock(t, rawdb.PathScheme) +} + +func testLastBlock(t *testing.T, scheme string) { + _, blockchain, err := newCanonical(ethash.NewFaker(), 0, true, scheme) if err != nil { t.Fatalf("failed to create pristine chain: %v", err) } @@ -220,14 +226,19 @@ func TestLastBlock(t *testing.T) { // Tests that given a starting canonical chain of a given size, it can be extended // with various length chains. -func TestExtendCanonicalHeaders(t *testing.T) { testExtendCanonical(t, false) } -func TestExtendCanonicalBlocks(t *testing.T) { testExtendCanonical(t, true) } - -func testExtendCanonical(t *testing.T, full bool) { +func TestExtendCanonicalHeaders(t *testing.T) { + testExtendCanonical(t, false, rawdb.HashScheme) + testExtendCanonical(t, false, rawdb.PathScheme) +} +func TestExtendCanonicalBlocks(t *testing.T) { + testExtendCanonical(t, true, rawdb.HashScheme) + testExtendCanonical(t, true, rawdb.PathScheme) +} +func testExtendCanonical(t *testing.T, full bool, scheme string) { length := 5 // Make first chain starting from genesis - _, processor, err := newCanonical(ethash.NewFaker(), length, full) + _, processor, err := newCanonical(ethash.NewFaker(), length, full, scheme) if err != nil { t.Fatalf("failed to make new canonical chain: %v", err) } @@ -240,22 +251,28 @@ func testExtendCanonical(t *testing.T, full bool) { } } // Start fork from current height - testFork(t, processor, length, 1, full, better) - testFork(t, processor, length, 2, full, better) - testFork(t, processor, length, 5, full, better) - testFork(t, processor, length, 10, full, better) + testFork(t, processor, length, 1, full, better, scheme) + testFork(t, processor, length, 2, full, better, scheme) + testFork(t, processor, length, 5, full, better, scheme) + testFork(t, processor, length, 10, full, better, scheme) } // Tests that given a starting canonical chain of a given size, creating shorter // forks do not take canonical ownership. -func TestShorterForkHeaders(t *testing.T) { testShorterFork(t, false) } -func TestShorterForkBlocks(t *testing.T) { testShorterFork(t, true) } +func TestShorterForkHeaders(t *testing.T) { + testShorterFork(t, false, rawdb.HashScheme) + testShorterFork(t, false, rawdb.PathScheme) +} +func TestShorterForkBlocks(t *testing.T) { + testShorterFork(t, true, rawdb.HashScheme) + testShorterFork(t, true, rawdb.PathScheme) +} -func testShorterFork(t *testing.T, full bool) { +func testShorterFork(t *testing.T, full bool, scheme string) { length := 10 // Make first chain starting from genesis - _, processor, err := newCanonical(ethash.NewFaker(), length, full) + _, processor, err := newCanonical(ethash.NewFaker(), length, full, scheme) if err != nil { t.Fatalf("failed to make new canonical chain: %v", err) } @@ -268,24 +285,30 @@ func testShorterFork(t *testing.T, full bool) { } } // Sum of numbers must be less than `length` for this to be a shorter fork - testFork(t, processor, 0, 3, full, worse) - testFork(t, processor, 0, 7, full, worse) - testFork(t, processor, 1, 1, full, worse) - testFork(t, processor, 1, 7, full, worse) - testFork(t, processor, 5, 3, full, worse) - testFork(t, processor, 5, 4, full, worse) + testFork(t, processor, 0, 3, full, worse, scheme) + testFork(t, processor, 0, 7, full, worse, scheme) + testFork(t, processor, 1, 1, full, worse, scheme) + testFork(t, processor, 1, 7, full, worse, scheme) + testFork(t, processor, 5, 3, full, worse, scheme) + testFork(t, processor, 5, 4, full, worse, scheme) } // Tests that given a starting canonical chain of a given size, creating longer // forks do take canonical ownership. -func TestLongerForkHeaders(t *testing.T) { testLongerFork(t, false) } -func TestLongerForkBlocks(t *testing.T) { testLongerFork(t, true) } +func TestLongerForkHeaders(t *testing.T) { + testLongerFork(t, false, rawdb.HashScheme) + testLongerFork(t, false, rawdb.PathScheme) +} +func TestLongerForkBlocks(t *testing.T) { + testLongerFork(t, true, rawdb.HashScheme) + testLongerFork(t, true, rawdb.PathScheme) +} -func testLongerFork(t *testing.T, full bool) { +func testLongerFork(t *testing.T, full bool, scheme string) { length := 10 // Make first chain starting from genesis - _, processor, err := newCanonical(ethash.NewFaker(), length, full) + _, processor, err := newCanonical(ethash.NewFaker(), length, full, scheme) if err != nil { t.Fatalf("failed to make new canonical chain: %v", err) } @@ -298,24 +321,30 @@ func testLongerFork(t *testing.T, full bool) { } } // Sum of numbers must be greater than `length` for this to be a longer fork - testFork(t, processor, 0, 11, full, better) - testFork(t, processor, 0, 15, full, better) - testFork(t, processor, 1, 10, full, better) - testFork(t, processor, 1, 12, full, better) - testFork(t, processor, 5, 6, full, better) - testFork(t, processor, 5, 8, full, better) + testFork(t, processor, 0, 11, full, better, scheme) + testFork(t, processor, 0, 15, full, better, scheme) + testFork(t, processor, 1, 10, full, better, scheme) + testFork(t, processor, 1, 12, full, better, scheme) + testFork(t, processor, 5, 6, full, better, scheme) + testFork(t, processor, 5, 8, full, better, scheme) } // Tests that given a starting canonical chain of a given size, creating equal // forks do take canonical ownership. -func TestEqualForkHeaders(t *testing.T) { testEqualFork(t, false) } -func TestEqualForkBlocks(t *testing.T) { testEqualFork(t, true) } +func TestEqualForkHeaders(t *testing.T) { + testEqualFork(t, false, rawdb.HashScheme) + testEqualFork(t, false, rawdb.PathScheme) +} +func TestEqualForkBlocks(t *testing.T) { + testEqualFork(t, true, rawdb.HashScheme) + testEqualFork(t, true, rawdb.PathScheme) +} -func testEqualFork(t *testing.T, full bool) { +func testEqualFork(t *testing.T, full bool, scheme string) { length := 10 // Make first chain starting from genesis - _, processor, err := newCanonical(ethash.NewFaker(), length, full) + _, processor, err := newCanonical(ethash.NewFaker(), length, full, scheme) if err != nil { t.Fatalf("failed to make new canonical chain: %v", err) } @@ -328,21 +357,27 @@ func testEqualFork(t *testing.T, full bool) { } } // Sum of numbers must be equal to `length` for this to be an equal fork - testFork(t, processor, 0, 10, full, equal) - testFork(t, processor, 1, 9, full, equal) - testFork(t, processor, 2, 8, full, equal) - testFork(t, processor, 5, 5, full, equal) - testFork(t, processor, 6, 4, full, equal) - testFork(t, processor, 9, 1, full, equal) + testFork(t, processor, 0, 10, full, equal, scheme) + testFork(t, processor, 1, 9, full, equal, scheme) + testFork(t, processor, 2, 8, full, equal, scheme) + testFork(t, processor, 5, 5, full, equal, scheme) + testFork(t, processor, 6, 4, full, equal, scheme) + testFork(t, processor, 9, 1, full, equal, scheme) } // Tests that chains missing links do not get accepted by the processor. -func TestBrokenHeaderChain(t *testing.T) { testBrokenChain(t, false) } -func TestBrokenBlockChain(t *testing.T) { testBrokenChain(t, true) } +func TestBrokenHeaderChain(t *testing.T) { + testBrokenChain(t, false, rawdb.HashScheme) + testBrokenChain(t, false, rawdb.PathScheme) +} +func TestBrokenBlockChain(t *testing.T) { + testBrokenChain(t, true, rawdb.HashScheme) + testBrokenChain(t, true, rawdb.PathScheme) +} -func testBrokenChain(t *testing.T, full bool) { +func testBrokenChain(t *testing.T, full bool, scheme string) { // Make chain starting from genesis - db, blockchain, err := newCanonical(ethash.NewFaker(), 10, full) + db, blockchain, err := newCanonical(ethash.NewFaker(), 10, full, scheme) if err != nil { t.Fatalf("failed to make new canonical chain: %v", err) } @@ -364,19 +399,31 @@ func testBrokenChain(t *testing.T, full bool) { // Tests that reorganising a long difficult chain after a short easy one // overwrites the canonical numbers and links in the database. -func TestReorgLongHeaders(t *testing.T) { testReorgLong(t, false) } -func TestReorgLongBlocks(t *testing.T) { testReorgLong(t, true) } +func TestReorgLongHeaders(t *testing.T) { + testReorgLong(t, false, rawdb.HashScheme) + testReorgLong(t, false, rawdb.PathScheme) +} +func TestReorgLongBlocks(t *testing.T) { + testReorgLong(t, true, rawdb.HashScheme) + testReorgLong(t, true, rawdb.PathScheme) +} -func testReorgLong(t *testing.T, full bool) { - testReorg(t, []int64{0, 0, -9}, []int64{0, 0, 0, -9}, 393280+params.GenesisDifficulty.Int64(), full) +func testReorgLong(t *testing.T, full bool, scheme string) { + testReorg(t, []int64{0, 0, -9}, []int64{0, 0, 0, -9}, 393280+params.GenesisDifficulty.Int64(), full, scheme) } // Tests that reorganising a short difficult chain after a long easy one // overwrites the canonical numbers and links in the database. -func TestReorgShortHeaders(t *testing.T) { testReorgShort(t, false) } -func TestReorgShortBlocks(t *testing.T) { testReorgShort(t, true) } +func TestReorgShortHeaders(t *testing.T) { + testReorgShort(t, false, rawdb.HashScheme) + testReorgShort(t, false, rawdb.PathScheme) +} +func TestReorgShortBlocks(t *testing.T) { + testReorgShort(t, true, rawdb.HashScheme) + testReorgShort(t, true, rawdb.PathScheme) +} -func testReorgShort(t *testing.T, full bool) { +func testReorgShort(t *testing.T, full bool, scheme string) { // Create a long easy chain vs. a short heavy one. Due to difficulty adjustment // we need a fairly long chain of blocks with different difficulties for a short // one to become heavyer than a long one. The 96 is an empirical value. @@ -388,12 +435,12 @@ func testReorgShort(t *testing.T, full bool) { for i := 0; i < len(diff); i++ { diff[i] = -9 } - testReorg(t, easy, diff, 12615120+params.GenesisDifficulty.Int64(), full) + testReorg(t, easy, diff, 12615120+params.GenesisDifficulty.Int64(), full, scheme) } -func testReorg(t *testing.T, first, second []int64, td int64, full bool) { +func testReorg(t *testing.T, first, second []int64, td int64, full bool, scheme string) { // Create a pristine chain and database - db, blockchain, err := newCanonical(ethash.NewFaker(), 0, full) + db, blockchain, err := newCanonical(ethash.NewFaker(), 0, full, scheme) if err != nil { t.Fatalf("failed to create pristine chain: %v", err) } @@ -461,12 +508,18 @@ func testReorg(t *testing.T, first, second []int64, td int64, full bool) { } // Tests that the insertion functions detect banned hashes. -func TestBadHeaderHashes(t *testing.T) { testBadHashes(t, false) } -func TestBadBlockHashes(t *testing.T) { testBadHashes(t, true) } +func TestBadHeaderHashes(t *testing.T) { + testBadHashes(t, false, rawdb.HashScheme) + testBadHashes(t, false, rawdb.PathScheme) +} +func TestBadBlockHashes(t *testing.T) { + testBadHashes(t, true, rawdb.HashScheme) + testBadHashes(t, true, rawdb.PathScheme) +} -func testBadHashes(t *testing.T, full bool) { +func testBadHashes(t *testing.T, full bool, scheme string) { // Create a pristine chain and database - db, blockchain, err := newCanonical(ethash.NewFaker(), 0, full) + db, blockchain, err := newCanonical(ethash.NewFaker(), 0, full, scheme) if err != nil { t.Fatalf("failed to create pristine chain: %v", err) } @@ -495,12 +548,18 @@ func testBadHashes(t *testing.T, full bool) { // Tests that bad hashes are detected on boot, and the chain rolled back to a // good state prior to the bad hash. -func TestReorgBadHeaderHashes(t *testing.T) { testReorgBadHashes(t, false) } -func TestReorgBadBlockHashes(t *testing.T) { testReorgBadHashes(t, true) } +func TestReorgBadHeaderHashes(t *testing.T) { + testReorgBadHashes(t, false, rawdb.HashScheme) + testReorgBadHashes(t, false, rawdb.PathScheme) +} +func TestReorgBadBlockHashes(t *testing.T) { + testReorgBadHashes(t, true, rawdb.HashScheme) + testReorgBadHashes(t, true, rawdb.PathScheme) +} -func testReorgBadHashes(t *testing.T, full bool) { +func testReorgBadHashes(t *testing.T, full bool, scheme string) { // Create a pristine chain and database - db, blockchain, err := newCanonical(ethash.NewFaker(), 0, full) + db, blockchain, err := newCanonical(ethash.NewFaker(), 0, full, scheme) if err != nil { t.Fatalf("failed to create pristine chain: %v", err) } @@ -530,7 +589,8 @@ func testReorgBadHashes(t *testing.T, full bool) { blockchain.Stop() // Create a new BlockChain and check that it rolled back the state. - ncm, err := NewBlockChain(blockchain.db, nil, blockchain.chainConfig, ethash.NewFaker(), vm.Config{}, nil, nil) + gspec := &Genesis{Config: blockchain.chainConfig} + ncm, err := NewBlockChain(blockchain.db, DefaultCacheConfigWithScheme(scheme), gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) if err != nil { t.Fatalf("failed to create new chain manager: %v", err) } @@ -550,13 +610,19 @@ func testReorgBadHashes(t *testing.T, full bool) { } // Tests chain insertions in the face of one entity containing an invalid nonce. -func TestHeadersInsertNonceError(t *testing.T) { testInsertNonceError(t, false) } -func TestBlocksInsertNonceError(t *testing.T) { testInsertNonceError(t, true) } +func TestHeadersInsertNonceError(t *testing.T) { + testInsertNonceError(t, false, rawdb.HashScheme) + testInsertNonceError(t, false, rawdb.PathScheme) +} +func TestBlocksInsertNonceError(t *testing.T) { + testInsertNonceError(t, true, rawdb.HashScheme) + testInsertNonceError(t, true, rawdb.PathScheme) +} -func testInsertNonceError(t *testing.T, full bool) { +func testInsertNonceError(t *testing.T, full bool, scheme string) { for i := 1; i < 25 && !t.Failed(); i++ { // Create a pristine chain and database - db, blockchain, err := newCanonical(ethash.NewFaker(), 0, full) + db, blockchain, err := newCanonical(ethash.NewFaker(), 0, full, scheme) if err != nil { t.Fatalf("failed to create pristine chain: %v", err) } @@ -608,6 +674,11 @@ func testInsertNonceError(t *testing.T, full bool) { // Tests that fast importing a block chain produces the same chain data as the // classical full block processing. func TestFastVsFullChains(t *testing.T) { + testFastVsFullChains(t, rawdb.HashScheme) + testFastVsFullChains(t, rawdb.PathScheme) +} + +func testFastVsFullChains(t *testing.T, scheme string) { // Configure and generate a sample block chain var ( gendb = rawdb.NewMemoryDatabase() @@ -619,7 +690,8 @@ func TestFastVsFullChains(t *testing.T) { Alloc: GenesisAlloc{address: {Balance: funds}}, BaseFee: big.NewInt(params.InitialBaseFee), } - genesis = gspec.MustCommit(gendb) + triedb = trie.NewDatabase(gendb, nil) + genesis = gspec.MustCommit(gendb, triedb) signer = types.LatestSigner(gspec.Config) ) blocks, receipts := GenerateChain(gspec.Config, genesis, ethash.NewFaker(), gendb, 1024, func(i int, block *BlockGen) { @@ -642,8 +714,8 @@ func TestFastVsFullChains(t *testing.T) { }, true) // Import the chain as an archive node for the comparison baseline archiveDb := rawdb.NewMemoryDatabase() - gspec.MustCommit(archiveDb) - archive, _ := NewBlockChain(archiveDb, nil, gspec.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + gspec.MustCommit(archiveDb, trie.NewDatabase(archiveDb, newDbConfig(scheme))) + archive, _ := NewBlockChain(archiveDb, DefaultCacheConfigWithScheme(scheme), gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) defer archive.Stop() if n, err := archive.InsertChain(blocks, nil); err != nil { @@ -651,8 +723,8 @@ func TestFastVsFullChains(t *testing.T) { } // Fast import the chain as a non-archive node to test fastDb := rawdb.NewMemoryDatabase() - gspec.MustCommit(fastDb) - fast, _ := NewBlockChain(fastDb, nil, gspec.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + gspec.MustCommit(fastDb, trie.NewDatabase(fastDb, newDbConfig(scheme))) + fast, _ := NewBlockChain(fastDb, DefaultCacheConfigWithScheme(scheme), gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) defer fast.Stop() headers := make([]*types.Header, len(blocks)) @@ -675,8 +747,9 @@ func TestFastVsFullChains(t *testing.T) { if err != nil { t.Fatalf("failed to create temp freezer db: %v", err) } - gspec.MustCommit(ancientDb) - ancient, _ := NewBlockChain(ancientDb, nil, gspec.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + triedb = trie.NewDatabase(ancientDb, nil) + gspec.MustCommit(ancientDb, triedb) + ancient, _ := NewBlockChain(ancientDb, DefaultCacheConfigWithScheme(scheme), gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) defer ancient.Stop() if n, err := ancient.InsertHeaderChain(headers, 1); err != nil { @@ -744,6 +817,11 @@ func TestFastVsFullChains(t *testing.T) { // Tests that various import methods move the chain head pointers to the correct // positions. func TestLightVsFastVsFullChainHeads(t *testing.T) { + testLightVsFastVsFullChainHeads(t, rawdb.HashScheme) + testLightVsFastVsFullChainHeads(t, rawdb.PathScheme) +} + +func testLightVsFastVsFullChainHeads(t *testing.T, scheme string) { // Configure and generate a sample block chain var ( gendb = rawdb.NewMemoryDatabase() @@ -755,7 +833,8 @@ func TestLightVsFastVsFullChainHeads(t *testing.T) { Alloc: GenesisAlloc{address: {Balance: funds}}, BaseFee: big.NewInt(params.InitialBaseFee), } - genesis = gspec.MustCommit(gendb) + triedb = trie.NewDatabase(gendb, nil) + genesis = gspec.MustCommit(gendb, triedb) ) height := uint64(1024) blocks, receipts := GenerateChain(gspec.Config, genesis, ethash.NewFaker(), gendb, int(height), nil, true) @@ -771,7 +850,7 @@ func TestLightVsFastVsFullChainHeads(t *testing.T) { if err != nil { t.Fatalf("failed to create temp freezer db: %v", err) } - gspec.MustCommit(db) + gspec.MustCommit(db, trie.NewDatabase(db, nil)) return db, func() { os.RemoveAll(dir) } } // Configure a subchain to roll back @@ -791,14 +870,16 @@ func TestLightVsFastVsFullChainHeads(t *testing.T) { t.Errorf("%s head header mismatch: have #%v, want #%v", kind, num, header) } } + // Import the chain as an archive node and ensure all pointers are updated archiveDb, delfn := makeDb() defer delfn() archiveCaching := *defaultCacheConfig archiveCaching.TrieDirtyDisabled = true + archiveCaching.StateScheme = scheme - archive, _ := NewBlockChain(archiveDb, &archiveCaching, gspec.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + archive, _ := NewBlockChain(archiveDb, &archiveCaching, gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) if n, err := archive.InsertChain(blocks, nil); err != nil { t.Fatalf("failed to process block %d: %v", n, err) } @@ -811,7 +892,7 @@ func TestLightVsFastVsFullChainHeads(t *testing.T) { // Import the chain as a non-archive node and ensure all pointers are updated fastDb, delfn := makeDb() defer delfn() - fast, _ := NewBlockChain(fastDb, nil, gspec.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + fast, _ := NewBlockChain(fastDb, DefaultCacheConfigWithScheme(scheme), gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) defer fast.Stop() headers := make([]*types.Header, len(blocks)) @@ -831,7 +912,7 @@ func TestLightVsFastVsFullChainHeads(t *testing.T) { // Import the chain as a ancient-first node and ensure all pointers are updated ancientDb, delfn := makeDb() defer delfn() - ancient, _ := NewBlockChain(ancientDb, nil, gspec.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + ancient, _ := NewBlockChain(ancientDb, DefaultCacheConfigWithScheme(scheme), gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) defer ancient.Stop() if n, err := ancient.InsertHeaderChain(headers, 1); err != nil { @@ -850,7 +931,7 @@ func TestLightVsFastVsFullChainHeads(t *testing.T) { // Import the chain as a light node and ensure all pointers are updated lightDb, delfn := makeDb() defer delfn() - light, _ := NewBlockChain(lightDb, nil, gspec.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + light, _ := NewBlockChain(lightDb, DefaultCacheConfigWithScheme(scheme), gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) if n, err := light.InsertHeaderChain(headers, 1); err != nil { t.Fatalf("failed to insert header %d: %v", n, err) } @@ -863,6 +944,11 @@ func TestLightVsFastVsFullChainHeads(t *testing.T) { // Tests that chain reorganisations handle transaction removals and reinsertions. func TestChainTxReorgs(t *testing.T) { + testChainTxReorgs(t, rawdb.HashScheme) + testChainTxReorgs(t, rawdb.PathScheme) +} + +func testChainTxReorgs(t *testing.T, scheme string) { var ( key1, _ = crypto.HexToECDSA("b71c71a67e1177ad4e901695e1b4b9ee17ae16c6668d313eac2f96dbcda3f291") key2, _ = crypto.HexToECDSA("8a1f9a8f95be41cd7ccb6168179afb4504aefe388d1e14474d32c45c72ce7b7a") @@ -880,10 +966,10 @@ func TestChainTxReorgs(t *testing.T) { addr3: {Balance: big.NewInt(1000000000000000)}, }, } - genesis = gspec.MustCommit(db) + triedb = trie.NewDatabase(db, nil) + genesis = gspec.MustCommit(db, triedb) signer = types.LatestSigner(gspec.Config) ) - // Create two transactions shared between the chains: // - postponed: transaction included at a later block in the forked chain // - swapped: transaction included at the same block number in the forked chain @@ -919,7 +1005,7 @@ func TestChainTxReorgs(t *testing.T) { } }, true) // Import the chain. This runs all block validation rules. - blockchain, _ := NewBlockChain(db, nil, gspec.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + blockchain, _ := NewBlockChain(db, DefaultCacheConfigWithScheme(scheme), gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) if i, err := blockchain.InsertChain(chain, nil); err != nil { t.Fatalf("failed to insert original chain[%d]: %v", i, err) } @@ -978,6 +1064,11 @@ func TestChainTxReorgs(t *testing.T) { } func TestLogReorgs(t *testing.T) { + testLogReorgs(t, rawdb.HashScheme) + testLogReorgs(t, rawdb.PathScheme) +} + +func testLogReorgs(t *testing.T, scheme string) { var ( key1, _ = crypto.HexToECDSA("b71c71a67e1177ad4e901695e1b4b9ee17ae16c6668d313eac2f96dbcda3f291") addr1 = crypto.PubkeyToAddress(key1.PublicKey) @@ -985,11 +1076,11 @@ func TestLogReorgs(t *testing.T) { // this code generates a log code = common.Hex2Bytes("60606040525b7f24ec1d3ff24c2f6ff210738839dbc339cd45a5294d85c79361016243157aae7b60405180905060405180910390a15b600a8060416000396000f360606040526008565b00") gspec = &Genesis{Config: params.TestChainConfig, Alloc: GenesisAlloc{addr1: {Balance: big.NewInt(10000000000000000)}}} - genesis = gspec.MustCommit(db) + triedb = trie.NewDatabase(db, nil) + genesis = gspec.MustCommit(db, triedb) signer = types.LatestSigner(gspec.Config) ) - - blockchain, _ := NewBlockChain(db, nil, gspec.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + blockchain, _ := NewBlockChain(db, DefaultCacheConfigWithScheme(scheme), gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) defer blockchain.Stop() rmLogsCh := make(chan RemovedLogsEvent) @@ -1034,15 +1125,20 @@ var logCode = common.Hex2Bytes("60606040525b7f24ec1d3ff24c2f6ff210738839dbc339cd // This test checks that log events and RemovedLogsEvent are sent // when the chain reorganizes. func TestLogRebirth(t *testing.T) { + testLogRebirth(t, rawdb.HashScheme) + testLogRebirth(t, rawdb.PathScheme) +} + +func testLogRebirth(t *testing.T, scheme string) { var ( key1, _ = crypto.HexToECDSA("b71c71a67e1177ad4e901695e1b4b9ee17ae16c6668d313eac2f96dbcda3f291") addr1 = crypto.PubkeyToAddress(key1.PublicKey) db = rawdb.NewMemoryDatabase() gspec = &Genesis{Config: params.TestChainConfig, Alloc: GenesisAlloc{addr1: {Balance: big.NewInt(10000000000000000)}}} - genesis = gspec.MustCommit(db) + genesis = gspec.MustCommit(db, trie.NewDatabase(db, nil)) signer = types.LatestSigner(gspec.Config) engine = ethash.NewFaker() - blockchain, _ = NewBlockChain(db, nil, gspec.Config, engine, vm.Config{}, nil, nil) + blockchain, _ = NewBlockChain(db, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{}, nil, nil) ) defer blockchain.Stop() @@ -1098,14 +1194,19 @@ func TestLogRebirth(t *testing.T) { // This test is a variation of TestLogRebirth. It verifies that log events are emitted // when a side chain containing log events overtakes the canonical chain. func TestSideLogRebirth(t *testing.T) { + testSideLogRebirth(t, rawdb.HashScheme) + testSideLogRebirth(t, rawdb.PathScheme) +} + +func testSideLogRebirth(t *testing.T, scheme string) { var ( key1, _ = crypto.HexToECDSA("b71c71a67e1177ad4e901695e1b4b9ee17ae16c6668d313eac2f96dbcda3f291") addr1 = crypto.PubkeyToAddress(key1.PublicKey) db = rawdb.NewMemoryDatabase() gspec = &Genesis{Config: params.TestChainConfig, Alloc: GenesisAlloc{addr1: {Balance: big.NewInt(10000000000000000)}}} - genesis = gspec.MustCommit(db) + genesis = gspec.MustCommit(db, trie.NewDatabase(db, nil)) signer = types.LatestSigner(gspec.Config) - blockchain, _ = NewBlockChain(db, nil, gspec.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + blockchain, _ = NewBlockChain(db, DefaultCacheConfigWithScheme(scheme), gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) ) defer blockchain.Stop() @@ -1168,6 +1269,11 @@ func checkLogEvents(t *testing.T, logsCh <-chan []*types.Log, rmLogsCh <-chan Re } func TestReorgSideEvent(t *testing.T) { + testReorgSideEvent(t, rawdb.HashScheme) + testReorgSideEvent(t, rawdb.PathScheme) +} + +func testReorgSideEvent(t *testing.T, scheme string) { var ( db = rawdb.NewMemoryDatabase() key1, _ = crypto.HexToECDSA("b71c71a67e1177ad4e901695e1b4b9ee17ae16c6668d313eac2f96dbcda3f291") @@ -1176,11 +1282,11 @@ func TestReorgSideEvent(t *testing.T) { Config: params.TestChainConfig, Alloc: GenesisAlloc{addr1: {Balance: big.NewInt(10000000000000000)}}, } - genesis = gspec.MustCommit(db) + triedb = trie.NewDatabase(db, nil) + genesis = gspec.MustCommit(db, triedb) signer = types.LatestSigner(gspec.Config) ) - - blockchain, _ := NewBlockChain(db, nil, gspec.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + blockchain, _ := NewBlockChain(db, DefaultCacheConfigWithScheme(scheme), gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) defer blockchain.Stop() chain, _ := GenerateChain(gspec.Config, genesis, ethash.NewFaker(), db, 3, func(i int, gen *BlockGen) {}, true) @@ -1252,7 +1358,12 @@ done: // Tests if the canonical block can be fetched from the database during chain insertion. func TestCanonicalBlockRetrieval(t *testing.T) { - _, blockchain, err := newCanonical(ethash.NewFaker(), 0, true) + testCanonicalBlockRetrieval(t, rawdb.HashScheme) + testCanonicalBlockRetrieval(t, rawdb.PathScheme) +} + +func testCanonicalBlockRetrieval(t *testing.T, scheme string) { + _, blockchain, err := newCanonical(ethash.NewFaker(), 0, true, scheme) if err != nil { t.Fatalf("failed to create pristine chain: %v", err) } @@ -1298,6 +1409,11 @@ func TestCanonicalBlockRetrieval(t *testing.T) { } func TestEIP155Transition(t *testing.T) { + testEIP155Transition(t, rawdb.HashScheme) + testEIP155Transition(t, rawdb.PathScheme) +} + +func testEIP155Transition(t *testing.T, scheme string) { // Configure and generate a sample block chain var ( db = rawdb.NewMemoryDatabase() @@ -1309,10 +1425,10 @@ func TestEIP155Transition(t *testing.T) { Config: ¶ms.ChainConfig{ChainID: big.NewInt(1), EIP150Block: big.NewInt(0), EIP155Block: big.NewInt(2), HomesteadBlock: new(big.Int)}, Alloc: GenesisAlloc{address: {Balance: funds}, deleteAddr: {Balance: new(big.Int)}}, } - genesis = gspec.MustCommit(db) + triedb = trie.NewDatabase(db, nil) + genesis = gspec.MustCommit(db, triedb) ) - - blockchain, _ := NewBlockChain(db, nil, gspec.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + blockchain, _ := NewBlockChain(db, DefaultCacheConfigWithScheme(scheme), gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) defer blockchain.Stop() blocks, _ := GenerateChain(gspec.Config, genesis, ethash.NewFaker(), db, 4, func(i int, block *BlockGen) { @@ -1401,6 +1517,11 @@ func TestEIP155Transition(t *testing.T) { } func TestEIP161AccountRemoval(t *testing.T) { + testEIP161AccountRemoval(t, rawdb.HashScheme) + testEIP161AccountRemoval(t, rawdb.PathScheme) +} + +func testEIP161AccountRemoval(t *testing.T, scheme string) { // Configure and generate a sample block chain var ( db = rawdb.NewMemoryDatabase() @@ -1418,9 +1539,10 @@ func TestEIP161AccountRemoval(t *testing.T) { }, Alloc: GenesisAlloc{address: {Balance: funds}}, } - genesis = gspec.MustCommit(db) + triedb = trie.NewDatabase(db, nil) + genesis = gspec.MustCommit(db, triedb) ) - blockchain, _ := NewBlockChain(db, nil, gspec.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + blockchain, _ := NewBlockChain(db, DefaultCacheConfigWithScheme(scheme), gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) defer blockchain.Stop() blocks, _ := GenerateChain(gspec.Config, genesis, ethash.NewFaker(), db, 3, func(i int, block *BlockGen) { @@ -1473,11 +1595,16 @@ func TestEIP161AccountRemoval(t *testing.T) { // // https://github.com/ethereum/go-ethereum/pull/15941 func TestBlockchainHeaderchainReorgConsistency(t *testing.T) { + testBlockchainHeaderchainReorgConsistency(t, rawdb.HashScheme) + testBlockchainHeaderchainReorgConsistency(t, rawdb.PathScheme) +} + +func testBlockchainHeaderchainReorgConsistency(t *testing.T, scheme string) { // Generate a canonical chain to act as the main dataset engine := ethash.NewFaker() db := rawdb.NewMemoryDatabase() - genesis := (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db) + genesis := (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db, trie.NewDatabase(db, nil)) blocks, _ := GenerateChain(params.TestChainConfig, genesis, engine, db, 64, func(i int, b *BlockGen) { b.SetCoinbase(common.Address{1}) }, true) // Generate a bunch of fork blocks, each side forking from the canonical chain @@ -1493,9 +1620,10 @@ func TestBlockchainHeaderchainReorgConsistency(t *testing.T) { // Import the canonical and fork chain side by side, verifying the current block // and current header consistency diskdb := rawdb.NewMemoryDatabase() - (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(diskdb) + (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(diskdb, trie.NewDatabase(diskdb, newDbConfig(scheme))) + gspec := &Genesis{Config: params.TestChainConfig} - chain, err := NewBlockChain(diskdb, nil, params.TestChainConfig, engine, vm.Config{}, nil, nil) + chain, err := NewBlockChain(diskdb, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("failed to create tester chain: %v", err) } @@ -1522,7 +1650,7 @@ func TestTrieForkGC(t *testing.T) { engine := ethash.NewFaker() db := rawdb.NewMemoryDatabase() - genesis := (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db) + genesis := (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db, trie.NewDatabase(db, newDbConfig(rawdb.HashScheme))) blocks, _ := GenerateChain(params.TestChainConfig, genesis, engine, db, 2*DefaultTriesInMemory, func(i int, b *BlockGen) { b.SetCoinbase(common.Address{1}) }, true) // Generate a bunch of fork blocks, each side forking from the canonical chain @@ -1537,9 +1665,10 @@ func TestTrieForkGC(t *testing.T) { } // Import the canonical and fork chain side by side, forcing the trie cache to cache both diskdb := rawdb.NewMemoryDatabase() - (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(diskdb) + (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(diskdb, trie.NewDatabase(diskdb, newDbConfig(rawdb.HashScheme))) + gspec := &Genesis{Config: params.TestChainConfig} - chain, err := NewBlockChain(diskdb, nil, params.TestChainConfig, engine, vm.Config{}, nil, nil) + chain, err := NewBlockChain(diskdb, nil, gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("failed to create tester chain: %v", err) } @@ -1553,10 +1682,10 @@ func TestTrieForkGC(t *testing.T) { } // Dereference all the recent tries and ensure no past trie is left in for i := 0; i < DefaultTriesInMemory; i++ { - chain.stateCache.TrieDB().Dereference(blocks[len(blocks)-1-i].Root()) - chain.stateCache.TrieDB().Dereference(forks[len(blocks)-1-i].Root()) + chain.TrieDB().Dereference(blocks[len(blocks)-1-i].Root()) + chain.TrieDB().Dereference(forks[len(blocks)-1-i].Root()) } - if len(chain.stateCache.TrieDB().Nodes()) > 0 { + if nodes, _ := chain.TrieDB().Size(); nodes > 0 { t.Fatalf("stale tries still alive after garbase collection") } } @@ -1564,11 +1693,16 @@ func TestTrieForkGC(t *testing.T) { // Tests that doing large reorgs works even if the state associated with the // forking point is not available any more. func TestLargeReorgTrieGC(t *testing.T) { + testLargeReorgTrieGC(t, rawdb.HashScheme) + testLargeReorgTrieGC(t, rawdb.PathScheme) +} + +func testLargeReorgTrieGC(t *testing.T, scheme string) { // Generate the original common chain segment and the two competing forks engine := ethash.NewFaker() db := rawdb.NewMemoryDatabase() - genesis := (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db) + genesis := (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db, trie.NewDatabase(db, nil)) shared, _ := GenerateChain(params.TestChainConfig, genesis, engine, db, 64, func(i int, b *BlockGen) { b.SetCoinbase(common.Address{1}) }, true) original, _ := GenerateChain(params.TestChainConfig, shared[len(shared)-1], engine, db, 2*DefaultTriesInMemory, func(i int, b *BlockGen) { b.SetCoinbase(common.Address{2}) }, true) @@ -1576,9 +1710,12 @@ func TestLargeReorgTrieGC(t *testing.T) { // Import the shared chain and the original canonical one diskdb := rawdb.NewMemoryDatabase() - (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(diskdb) + (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(diskdb, trie.NewDatabase(diskdb, newDbConfig(scheme))) + gspec := &Genesis{Config: params.TestChainConfig} + db, _ = rawdb.NewDatabaseWithFreezer(rawdb.NewMemoryDatabase(), t.TempDir(), "", false) + defer db.Close() - chain, err := NewBlockChain(diskdb, nil, params.TestChainConfig, engine, vm.Config{}, nil, nil) + chain, err := NewBlockChain(db, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("failed to create tester chain: %v", err) } @@ -1589,7 +1726,7 @@ func TestLargeReorgTrieGC(t *testing.T) { t.Fatalf("failed to insert original chain: %v", err) } // Ensure that the state associated with the forking point is pruned away - if node, _ := chain.stateCache.TrieDB().Node(shared[len(shared)-1].Root()); node != nil { + if chain.HasState(shared[len(shared)-1].Root()) { t.Fatalf("common-but-old ancestor still cache") } // Import the competitor chain without exceeding the canonical's TD and ensure @@ -1598,7 +1735,7 @@ func TestLargeReorgTrieGC(t *testing.T) { t.Fatalf("failed to insert competitor chain: %v", err) } for i, block := range competitor[:len(competitor)-2] { - if node, _ := chain.stateCache.TrieDB().Node(block.Root()); node != nil { + if chain.HasState(block.Root()) { t.Fatalf("competitor %d: low TD chain became processed", i) } } @@ -1607,14 +1744,30 @@ func TestLargeReorgTrieGC(t *testing.T) { if _, err := chain.InsertChain(competitor[len(competitor)-2:], nil); err != nil { t.Fatalf("failed to finalize competitor chain: %v", err) } - for i, block := range competitor[:len(competitor)-DefaultTriesInMemory] { - if node, _ := chain.stateCache.TrieDB().Node(block.Root()); node != nil { + // In path-based trie database implementation, it will keep 128 diff + 1 disk + // layers, totally 129 latest states available. In hash-based it's 128. + states := 128 + if scheme == rawdb.PathScheme { + states = states + 1 + } + for i, block := range competitor[:len(competitor)-states] { + if chain.HasState(block.Root()) { + t.Fatalf("competitor %d: unexpected competing chain state", i) + } + } + for i, block := range competitor[len(competitor)-states:] { + if !chain.HasState(block.Root()) { t.Fatalf("competitor %d: competing chain state missing", i) } } } func TestBlockchainRecovery(t *testing.T) { + testBlockchainRecovery(t, rawdb.HashScheme) + testBlockchainRecovery(t, rawdb.PathScheme) +} + +func testBlockchainRecovery(t *testing.T, scheme string) { // Configure and generate a sample block chain var ( gendb = rawdb.NewMemoryDatabase() @@ -1622,7 +1775,8 @@ func TestBlockchainRecovery(t *testing.T) { address = crypto.PubkeyToAddress(key.PublicKey) funds = big.NewInt(1000000000) gspec = &Genesis{Config: params.TestChainConfig, Alloc: GenesisAlloc{address: {Balance: funds}}} - genesis = gspec.MustCommit(gendb) + triedb = trie.NewDatabase(gendb, nil) + genesis = gspec.MustCommit(gendb, triedb) ) height := uint64(1024) blocks, receipts := GenerateChain(gspec.Config, genesis, ethash.NewFaker(), gendb, int(height), nil, true) @@ -1638,8 +1792,8 @@ func TestBlockchainRecovery(t *testing.T) { if err != nil { t.Fatalf("failed to create temp freezer db: %v", err) } - gspec.MustCommit(ancientDb) - ancient, _ := NewBlockChain(ancientDb, nil, gspec.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + gspec.MustCommit(ancientDb, trie.NewDatabase(ancientDb, nil)) + ancient, _ := NewBlockChain(ancientDb, DefaultCacheConfigWithScheme(scheme), gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) headers := make([]*types.Header, len(blocks)) for i, block := range blocks { @@ -1659,7 +1813,7 @@ func TestBlockchainRecovery(t *testing.T) { rawdb.WriteHeadFastBlockHash(ancientDb, midBlock.Hash()) // Reopen broken blockchain again - ancient, _ = NewBlockChain(ancientDb, nil, gspec.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + ancient, _ = NewBlockChain(ancientDb, DefaultCacheConfigWithScheme(scheme), gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) defer ancient.Stop() if num := ancient.CurrentBlock().NumberU64(); num != 0 { t.Errorf("head block mismatch: have #%v, want #%v", num, 0) @@ -1674,8 +1828,13 @@ func TestBlockchainRecovery(t *testing.T) { // This test checks that InsertReceiptChain will roll back correctly when attempting to insert a side chain. func TestInsertReceiptChainRollback(t *testing.T) { + testInsertReceiptChainRollback(t, rawdb.HashScheme) + testInsertReceiptChainRollback(t, rawdb.PathScheme) +} + +func testInsertReceiptChainRollback(t *testing.T, scheme string) { // Generate forked chain. The returned BlockChain object is used to process the side chain blocks. - tmpChain, sideblocks, canonblocks, err := getLongAndShortChains() + tmpChain, sideblocks, canonblocks, err := getLongAndShortChains(scheme) if err != nil { t.Fatal(err) } @@ -1710,8 +1869,8 @@ func TestInsertReceiptChainRollback(t *testing.T) { t.Fatalf("failed to create temp freezer db: %v", err) } gspec := Genesis{Config: params.AllEthashProtocolChanges, BaseFee: big.NewInt(params.InitialBaseFee)} - gspec.MustCommit(ancientDb) - ancientChain, _ := NewBlockChain(ancientDb, nil, gspec.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + gspec.MustCommit(ancientDb, trie.NewDatabase(ancientDb, nil)) + ancientChain, _ := NewBlockChain(ancientDb, DefaultCacheConfigWithScheme(scheme), &gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) defer ancientChain.Stop() // Import the canonical header chain. @@ -1756,10 +1915,15 @@ func TestInsertReceiptChainRollback(t *testing.T) { // - https://github.com/ethereum/go-ethereum/issues/18977 // - https://github.com/ethereum/go-ethereum/pull/18988 func TestLowDiffLongChain(t *testing.T) { + testLowDiffLongChain(t, rawdb.HashScheme) + testLowDiffLongChain(t, rawdb.PathScheme) +} + +func testLowDiffLongChain(t *testing.T, scheme string) { // Generate a canonical chain to act as the main dataset engine := ethash.NewFaker() db := rawdb.NewMemoryDatabase() - genesis := (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db) + genesis := (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db, trie.NewDatabase(db, newDbConfig(rawdb.HashScheme))) // We must use a pretty long chain to ensure that the fork doesn't overtake us // until after at least 128 blocks post tip @@ -1769,10 +1933,12 @@ func TestLowDiffLongChain(t *testing.T) { }, true) // Import the canonical chain - diskdb := rawdb.NewMemoryDatabase() - (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(diskdb) + diskdb, _ := rawdb.NewDatabaseWithFreezer(rawdb.NewMemoryDatabase(), t.TempDir(), "", false) + defer diskdb.Close() + (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(diskdb, trie.NewDatabase(diskdb, newDbConfig(rawdb.HashScheme))) + gspec := &Genesis{Config: params.TestChainConfig} - chain, err := NewBlockChain(diskdb, nil, params.TestChainConfig, engine, vm.Config{}, nil, nil) + chain, err := NewBlockChain(diskdb, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("failed to create tester chain: %v", err) } @@ -1813,13 +1979,14 @@ func testSideImport(t *testing.T, numCanonBlocksInSidechain, blocksBetweenCommon // Generate a canonical chain to act as the main dataset engine := ethash.NewFaker() db := rawdb.NewMemoryDatabase() - genesis := (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db) + genesis := (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db, trie.NewDatabase(db, newDbConfig(rawdb.HashScheme))) // Generate and import the canonical chain blocks, _ := GenerateChain(params.TestChainConfig, genesis, engine, db, 2*DefaultTriesInMemory, nil, true) diskdb := rawdb.NewMemoryDatabase() - (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(diskdb) - chain, err := NewBlockChain(diskdb, nil, params.TestChainConfig, engine, vm.Config{}, nil, nil) + (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(diskdb, trie.NewDatabase(diskdb, newDbConfig(rawdb.HashScheme))) + gspec := &Genesis{Config: params.TestChainConfig} + chain, err := NewBlockChain(diskdb, nil, gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("failed to create tester chain: %v", err) } @@ -1886,15 +2053,24 @@ func TestPrunedImportSide(t *testing.T) { testSideImport(t, 1, -10) } -func TestInsertKnownHeaders(t *testing.T) { testInsertKnownChainData(t, "headers") } -func TestInsertKnownReceiptChain(t *testing.T) { testInsertKnownChainData(t, "receipts") } -func TestInsertKnownBlocks(t *testing.T) { testInsertKnownChainData(t, "blocks") } +func TestInsertKnownHeaders(t *testing.T) { + testInsertKnownChainData(t, "headers", rawdb.HashScheme) + testInsertKnownChainData(t, "headers", rawdb.PathScheme) +} +func TestInsertKnownReceiptChain(t *testing.T) { + testInsertKnownChainData(t, "receipts", rawdb.HashScheme) + testInsertKnownChainData(t, "receipts", rawdb.PathScheme) +} +func TestInsertKnownBlocks(t *testing.T) { + testInsertKnownChainData(t, "blocks", rawdb.HashScheme) + testInsertKnownChainData(t, "blocks", rawdb.PathScheme) +} -func testInsertKnownChainData(t *testing.T, typ string) { +func testInsertKnownChainData(t *testing.T, typ string, scheme string) { engine := ethash.NewFaker() db := rawdb.NewMemoryDatabase() - genesis := (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db) + genesis := (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db, trie.NewDatabase(db, nil)) blocks, receipts := GenerateChain(params.TestChainConfig, genesis, engine, db, 32, func(i int, b *BlockGen) { b.SetCoinbase(common.Address{1}) }, true) // A longer chain but total difficulty is lower. @@ -1914,10 +2090,11 @@ func testInsertKnownChainData(t *testing.T, typ string) { if err != nil { t.Fatalf("failed to create temp freezer db: %v", err) } - (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(chaindb) + (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(chaindb, trie.NewDatabase(chaindb, nil)) defer os.RemoveAll(dir) + gspec := &Genesis{Config: params.TestChainConfig} - chain, err := NewBlockChain(chaindb, nil, params.TestChainConfig, engine, vm.Config{}, nil, nil) + chain, err := NewBlockChain(chaindb, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("failed to create tester chain: %v", err) } @@ -2012,11 +2189,11 @@ func testInsertKnownChainData(t *testing.T, typ string) { } // getLongAndShortChains returns two chains: A is longer, B is heavier. -func getLongAndShortChains() (bc *BlockChain, longChain []*types.Block, heavyChain []*types.Block, err error) { +func getLongAndShortChains(scheme string) (bc *BlockChain, longChain []*types.Block, heavyChain []*types.Block, err error) { // Generate a canonical chain to act as the main dataset engine := ethash.NewFaker() db := rawdb.NewMemoryDatabase() - genesis := (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db) + genesis := (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db, trie.NewDatabase(db, nil)) // Generate and import the canonical chain, // Offset the time, to keep the difficulty low @@ -2024,9 +2201,10 @@ func getLongAndShortChains() (bc *BlockChain, longChain []*types.Block, heavyCha b.SetCoinbase(common.Address{1}) }, true) diskdb := rawdb.NewMemoryDatabase() - (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(diskdb) + (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(diskdb, trie.NewDatabase(diskdb, nil)) + gspec := &Genesis{Config: params.TestChainConfig} - chain, err := NewBlockChain(diskdb, nil, params.TestChainConfig, engine, vm.Config{}, nil, nil) + chain, err := NewBlockChain(diskdb, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { return nil, nil, nil, fmt.Errorf("failed to create tester chain: %v", err) } @@ -2072,7 +2250,12 @@ func getLongAndShortChains() (bc *BlockChain, longChain []*types.Block, heavyCha // 3. Then there should be no canon mapping for the block at height X // 4. The forked block should still be retrievable by hash func TestReorgToShorterRemovesCanonMapping(t *testing.T) { - chain, canonblocks, sideblocks, err := getLongAndShortChains() + testReorgToShorterRemovesCanonMapping(t, rawdb.HashScheme) + testReorgToShorterRemovesCanonMapping(t, rawdb.PathScheme) +} + +func testReorgToShorterRemovesCanonMapping(t *testing.T, scheme string) { + chain, canonblocks, sideblocks, err := getLongAndShortChains(scheme) if err != nil { t.Fatal(err) } @@ -2108,7 +2291,12 @@ func TestReorgToShorterRemovesCanonMapping(t *testing.T) { // as TestReorgToShorterRemovesCanonMapping, but applied on headerchain // imports -- that is, for fast sync func TestReorgToShorterRemovesCanonMappingHeaderChain(t *testing.T) { - chain, canonblocks, sideblocks, err := getLongAndShortChains() + testReorgToShorterRemovesCanonMappingHeaderChain(t, rawdb.HashScheme) + testReorgToShorterRemovesCanonMappingHeaderChain(t, rawdb.PathScheme) +} + +func testReorgToShorterRemovesCanonMappingHeaderChain(t *testing.T, scheme string) { + chain, canonblocks, sideblocks, err := getLongAndShortChains(scheme) if err != nil { t.Fatal(err) } @@ -2160,7 +2348,8 @@ func TestTransactionIndices(t *testing.T) { Alloc: GenesisAlloc{address: {Balance: funds}}, BaseFee: big.NewInt(params.InitialBaseFee), } - genesis = gspec.MustCommit(gendb) + triedb = trie.NewDatabase(gendb, nil) + genesis = gspec.MustCommit(gendb, triedb) signer = types.LatestSigner(gspec.Config) ) height := uint64(128) @@ -2215,11 +2404,11 @@ func TestTransactionIndices(t *testing.T) { if err != nil { t.Fatalf("failed to create temp freezer db: %v", err) } - gspec.MustCommit(ancientDb) + gspec.MustCommit(ancientDb, trie.NewDatabase(ancientDb, nil)) // Import all blocks into ancient db l := uint64(0) - chain, err := NewBlockChain(ancientDb, nil, params.TestChainConfig, ethash.NewFaker(), vm.Config{}, nil, &l) + chain, err := NewBlockChain(ancientDb, nil, gspec, nil, ethash.NewFaker(), vm.Config{}, nil, &l) if err != nil { t.Fatalf("failed to create tester chain: %v", err) } @@ -2243,8 +2432,8 @@ func TestTransactionIndices(t *testing.T) { if err != nil { t.Fatalf("failed to create temp freezer db: %v", err) } - gspec.MustCommit(ancientDb) - chain, err = NewBlockChain(ancientDb, nil, params.TestChainConfig, ethash.NewFaker(), vm.Config{}, nil, &l) + gspec.MustCommit(ancientDb, trie.NewDatabase(ancientDb, nil)) + chain, err = NewBlockChain(ancientDb, nil, gspec, nil, ethash.NewFaker(), vm.Config{}, nil, &l) if err != nil { t.Fatalf("failed to create tester chain: %v", err) } @@ -2263,12 +2452,12 @@ func TestTransactionIndices(t *testing.T) { if err != nil { t.Fatalf("failed to create temp freezer db: %v", err) } - gspec.MustCommit(ancientDb) + gspec.MustCommit(ancientDb, trie.NewDatabase(ancientDb, nil)) limit = []uint64{0, 64 /* drop stale */, 32 /* shorten history */, 64 /* extend history */, 0 /* restore all */} tails := []uint64{0, 67 /* 130 - 64 + 1 */, 100 /* 131 - 32 + 1 */, 69 /* 132 - 64 + 1 */, 0} for i, l := range limit { - chain, err = NewBlockChain(ancientDb, nil, params.TestChainConfig, ethash.NewFaker(), vm.Config{}, nil, &l) + chain, err = NewBlockChain(ancientDb, nil, gspec, nil, ethash.NewFaker(), vm.Config{}, nil, &l) if err != nil { t.Fatalf("failed to create tester chain: %v", err) } @@ -2278,8 +2467,12 @@ func TestTransactionIndices(t *testing.T) { chain.Stop() } } - func TestSkipStaleTxIndicesInFastSync(t *testing.T) { + testSkipStaleTxIndicesInFastSync(t, rawdb.HashScheme) + testSkipStaleTxIndicesInFastSync(t, rawdb.PathScheme) +} + +func testSkipStaleTxIndicesInFastSync(t *testing.T, scheme string) { // Configure and generate a sample block chain var ( gendb = rawdb.NewMemoryDatabase() @@ -2287,7 +2480,7 @@ func TestSkipStaleTxIndicesInFastSync(t *testing.T) { address = crypto.PubkeyToAddress(key.PublicKey) funds = big.NewInt(100000000000000000) gspec = &Genesis{Config: params.TestChainConfig, Alloc: GenesisAlloc{address: {Balance: funds}}} - genesis = gspec.MustCommit(gendb) + genesis = gspec.MustCommit(gendb, trie.NewDatabase(gendb, nil)) signer = types.LatestSigner(gspec.Config) ) height := uint64(128) @@ -2342,11 +2535,14 @@ func TestSkipStaleTxIndicesInFastSync(t *testing.T) { if err != nil { t.Fatalf("failed to create temp freezer db: %v", err) } - gspec.MustCommit(ancientDb) + triedb := trie.NewDatabase(ancientDb, nil) + gspec.MustCommit(ancientDb, triedb) + + defer ancientDb.Close() // Import all blocks into ancient db, only HEAD-32 indices are kept. l := uint64(32) - chain, err := NewBlockChain(ancientDb, nil, params.TestChainConfig, ethash.NewFaker(), vm.Config{}, nil, &l) + chain, err := NewBlockChain(ancientDb, DefaultCacheConfigWithScheme(scheme), gspec, nil, ethash.NewFaker(), vm.Config{}, nil, &l) if err != nil { t.Fatalf("failed to create tester chain: %v", err) } @@ -2387,7 +2583,7 @@ func benchmarkLargeNumberOfValueToNonexisting(b *testing.B, numTxs, numBlocks in // Generate the original common chain segment and the two competing forks engine := ethash.NewFaker() db := rawdb.NewMemoryDatabase() - genesis := gspec.MustCommit(db) + genesis := gspec.MustCommit(db, trie.NewDatabase(db, newDbConfig(rawdb.HashScheme))) blockGenerator := func(i int, block *BlockGen) { block.SetCoinbase(common.Address{1}) @@ -2408,9 +2604,9 @@ func benchmarkLargeNumberOfValueToNonexisting(b *testing.B, numTxs, numBlocks in for i := 0; i < b.N; i++ { // Import the shared chain and the original canonical one diskdb := rawdb.NewMemoryDatabase() - gspec.MustCommit(diskdb) + gspec.MustCommit(diskdb, trie.NewDatabase(diskdb, newDbConfig(rawdb.HashScheme))) - chain, err := NewBlockChain(diskdb, nil, params.TestChainConfig, engine, vm.Config{}, nil, nil) + chain, err := NewBlockChain(diskdb, nil, &gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { b.Fatalf("failed to create tester chain: %v", err) } @@ -2483,16 +2679,22 @@ func BenchmarkBlockChain_1x1000Executions(b *testing.B) { // 2. Downloader starts to sync again // 3. The blocks fetched are all known and canonical blocks func TestSideImportPrunedBlocks(t *testing.T) { + testSideImportPrunedBlocks(t, rawdb.HashScheme) + testSideImportPrunedBlocks(t, rawdb.PathScheme) +} + +func testSideImportPrunedBlocks(t *testing.T, scheme string) { // Generate a canonical chain to act as the main dataset engine := ethash.NewFaker() db := rawdb.NewMemoryDatabase() - genesis := (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db) + genesis := (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db, trie.NewDatabase(db, nil)) // Generate and import the canonical chain blocks, _ := GenerateChain(params.TestChainConfig, genesis, engine, db, 2*DefaultTriesInMemory, nil, true) diskdb := rawdb.NewMemoryDatabase() - (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(diskdb) - chain, err := NewBlockChain(diskdb, nil, params.TestChainConfig, engine, vm.Config{}, nil, nil) + (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(diskdb, trie.NewDatabase(diskdb, nil)) + gspec := &Genesis{Config: params.TestChainConfig} + chain, err := NewBlockChain(diskdb, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("failed to create tester chain: %v", err) } @@ -2500,14 +2702,20 @@ func TestSideImportPrunedBlocks(t *testing.T) { t.Fatalf("block %d: failed to insert into chain: %v", n, err) } - lastPrunedIndex := len(blocks) - DefaultTriesInMemory - 1 + // In path-based trie database implementation, it will keep 128 diff + 1 disk + // layers, totally 129 latest states available. In hash-based it's 128. + states := DefaultTriesInMemory + if scheme == rawdb.PathScheme { + states = DefaultTriesInMemory + 1 + } + lastPrunedIndex := len(blocks) - states - 1 lastPrunedBlock := blocks[lastPrunedIndex] // Verify pruning of lastPrunedBlock if chain.HasBlockAndState(lastPrunedBlock.Hash(), lastPrunedBlock.NumberU64()) { t.Errorf("Block %d not pruned", lastPrunedBlock.NumberU64()) } - firstNonPrunedBlock := blocks[len(blocks)-DefaultTriesInMemory] + firstNonPrunedBlock := blocks[len(blocks)-states] // Verify firstNonPrunedBlock is not pruned if !chain.HasBlockAndState(firstNonPrunedBlock.Hash(), firstNonPrunedBlock.NumberU64()) { t.Errorf("Block %d pruned", firstNonPrunedBlock.NumberU64()) @@ -2529,6 +2737,11 @@ func TestSideImportPrunedBlocks(t *testing.T) { // each transaction, so this works ok. The rework accumulated writes in memory // first, but the journal wiped the entire state object on create-revert. func TestDeleteCreateRevert(t *testing.T) { + testDeleteCreateRevert(t, rawdb.HashScheme) + testDeleteCreateRevert(t, rawdb.PathScheme) +} + +func testDeleteCreateRevert(t *testing.T, scheme string) { var ( aa = common.HexToAddress("0x000000000000000000000000000000000000aaaa") bb = common.HexToAddress("0x000000000000000000000000000000000000bbbb") @@ -2568,7 +2781,8 @@ func TestDeleteCreateRevert(t *testing.T) { }, }, } - genesis = gspec.MustCommit(db) + triedb = trie.NewDatabase(db, nil) + genesis = gspec.MustCommit(db, triedb) ) blocks, _ := GenerateChain(params.TestChainConfig, genesis, engine, db, 1, func(i int, b *BlockGen) { @@ -2584,9 +2798,9 @@ func TestDeleteCreateRevert(t *testing.T) { }, true) // Import the canonical chain diskdb := rawdb.NewMemoryDatabase() - gspec.MustCommit(diskdb) + gspec.MustCommit(diskdb, trie.NewDatabase(diskdb, nil)) - chain, err := NewBlockChain(diskdb, nil, params.TestChainConfig, engine, vm.Config{}, nil, nil) + chain, err := NewBlockChain(diskdb, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("failed to create tester chain: %v", err) } @@ -2603,6 +2817,11 @@ func TestDeleteCreateRevert(t *testing.T) { // Expected outcome is that _all_ slots are cleared from A, due to the selfdestruct, // and then the new slots exist func TestDeleteRecreateSlots(t *testing.T) { + testDeleteRecreateSlots(t, rawdb.HashScheme) + testDeleteRecreateSlots(t, rawdb.PathScheme) +} + +func testDeleteRecreateSlots(t *testing.T, scheme string) { var ( // Generate a canonical chain to act as the main dataset engine = ethash.NewFaker() @@ -2683,7 +2902,8 @@ func TestDeleteRecreateSlots(t *testing.T) { }, }, } - genesis := gspec.MustCommit(db) + triedb := trie.NewDatabase(db, nil) + genesis := gspec.MustCommit(db, triedb) blocks, _ := GenerateChain(&chainConfig, genesis, engine, db, 1, func(i int, b *BlockGen) { b.SetCoinbase(common.Address{1}) @@ -2697,9 +2917,9 @@ func TestDeleteRecreateSlots(t *testing.T) { b.AddTx(tx) }, true) // Import the canonical chain + db.Close() diskdb := rawdb.NewMemoryDatabase() - gspec.MustCommit(diskdb) - chain, err := NewBlockChain(diskdb, nil, &chainConfig, engine, vm.Config{ + chain, err := NewBlockChain(diskdb, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{ Debug: true, Tracer: logger.NewJSONLogger(nil, os.Stdout), }, nil, nil) @@ -2732,6 +2952,11 @@ func TestDeleteRecreateSlots(t *testing.T) { // regular value-transfer // Expected outcome is that _all_ slots are cleared from A func TestDeleteRecreateAccount(t *testing.T) { + testDeleteRecreateAccount(t, rawdb.HashScheme) + testDeleteRecreateAccount(t, rawdb.PathScheme) +} + +func testDeleteRecreateAccount(t *testing.T, scheme string) { var ( // Generate a canonical chain to act as the main dataset engine = ethash.NewFaker() @@ -2765,7 +2990,7 @@ func TestDeleteRecreateAccount(t *testing.T) { }, }, } - genesis := gspec.MustCommit(db) + genesis := gspec.MustCommit(db, trie.NewDatabase(db, nil)) blocks, _ := GenerateChain(&chainConfig, genesis, engine, db, 1, func(i int, b *BlockGen) { b.SetCoinbase(common.Address{1}) @@ -2780,8 +3005,8 @@ func TestDeleteRecreateAccount(t *testing.T) { }, true) // Import the canonical chain diskdb := rawdb.NewMemoryDatabase() - gspec.MustCommit(diskdb) - chain, err := NewBlockChain(diskdb, nil, &chainConfig, engine, vm.Config{ + gspec.MustCommit(diskdb, trie.NewDatabase(diskdb, nil)) + chain, err := NewBlockChain(diskdb, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{ Debug: true, Tracer: logger.NewJSONLogger(nil, os.Stdout), }, nil, nil) @@ -2810,6 +3035,11 @@ func TestDeleteRecreateAccount(t *testing.T) { // Expected outcome is that _all_ slots are cleared from A, due to the selfdestruct, // and then the new slots exist func TestDeleteRecreateSlotsAcrossManyBlocks(t *testing.T) { + testDeleteRecreateSlotsAcrossManyBlocks(t, rawdb.HashScheme) + testDeleteRecreateSlotsAcrossManyBlocks(t, rawdb.PathScheme) +} + +func testDeleteRecreateSlotsAcrossManyBlocks(t *testing.T, scheme string) { var ( // Generate a canonical chain to act as the main dataset engine = ethash.NewFaker() @@ -2891,7 +3121,9 @@ func TestDeleteRecreateSlotsAcrossManyBlocks(t *testing.T) { }, }, } - genesis := gspec.MustCommit(db) + triedb := trie.NewDatabase(db, nil) + genesis := gspec.MustCommit(db, triedb) + var nonce uint64 type expectation struct { @@ -2955,8 +3187,10 @@ func TestDeleteRecreateSlotsAcrossManyBlocks(t *testing.T) { }, true) // Import the canonical chain diskdb := rawdb.NewMemoryDatabase() - gspec.MustCommit(diskdb) - chain, err := NewBlockChain(diskdb, nil, &chainConfig, engine, vm.Config{ + triedb = trie.NewDatabase(diskdb, nil) + gspec.MustCommit(diskdb, triedb) + + chain, err := NewBlockChain(diskdb, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{ //Debug: true, //Tracer: vm.NewJSONLogger(nil, os.Stdout), }, nil, nil) @@ -3013,6 +3247,11 @@ func TestDeleteRecreateSlotsAcrossManyBlocks(t *testing.T) { // We need to either roll back the snapDestructs, or not place it into snapDestructs // in the first place. func TestInitThenFailCreateContract(t *testing.T) { + testInitThenFailCreateContract(t, rawdb.HashScheme) + testInitThenFailCreateContract(t, rawdb.PathScheme) +} + +func testInitThenFailCreateContract(t *testing.T, scheme string) { var ( // Generate a canonical chain to act as the main dataset engine = ethash.NewFaker() @@ -3075,7 +3314,9 @@ func TestInitThenFailCreateContract(t *testing.T) { }, }, } - genesis := gspec.MustCommit(db) + triedb := trie.NewDatabase(db, nil) + genesis := gspec.MustCommit(db, triedb) + nonce := uint64(0) blocks, _ := GenerateChain(params.TestChainConfig, genesis, engine, db, 4, func(i int, b *BlockGen) { b.SetCoinbase(common.Address{1}) @@ -3088,8 +3329,9 @@ func TestInitThenFailCreateContract(t *testing.T) { // Import the canonical chain diskdb := rawdb.NewMemoryDatabase() - gspec.MustCommit(diskdb) - chain, err := NewBlockChain(diskdb, nil, params.TestChainConfig, engine, vm.Config{ + triedb = trie.NewDatabase(diskdb, nil) + gspec.MustCommit(diskdb, triedb) + chain, err := NewBlockChain(diskdb, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{ //Debug: true, //Tracer: vm.NewJSONLogger(nil, os.Stdout), }, nil, nil) @@ -3125,6 +3367,11 @@ func TestInitThenFailCreateContract(t *testing.T) { // checking that the gas usage of a hot SLOAD and a cold SLOAD are calculated // correctly. func TestEIP2718Transition(t *testing.T) { + testEIP2718Transition(t, rawdb.HashScheme) + testEIP2718Transition(t, rawdb.PathScheme) +} + +func testEIP2718Transition(t *testing.T, scheme string) { var ( aa = common.HexToAddress("0x000000000000000000000000000000000000aaaa") @@ -3153,7 +3400,8 @@ func TestEIP2718Transition(t *testing.T) { }, }, } - genesis = gspec.MustCommit(db) + triedb = trie.NewDatabase(db, nil) + genesis = gspec.MustCommit(db, triedb) ) blocks, _ := GenerateChain(gspec.Config, genesis, engine, db, 1, func(i int, b *BlockGen) { @@ -3177,9 +3425,9 @@ func TestEIP2718Transition(t *testing.T) { // Import the canonical chain diskdb := rawdb.NewMemoryDatabase() - gspec.MustCommit(diskdb) + gspec.MustCommit(diskdb, trie.NewDatabase(diskdb, nil)) - chain, err := NewBlockChain(diskdb, nil, gspec.Config, engine, vm.Config{}, nil, nil) + chain, err := NewBlockChain(diskdb, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("failed to create tester chain: %v", err) } @@ -3208,6 +3456,11 @@ func TestEIP2718Transition(t *testing.T) { // gasFeeCap - gasTipCap < baseFee. // 6. Legacy transaction behave as expected (e.g. gasPrice = gasFeeCap = gasTipCap). func TestEIP1559Transition(t *testing.T) { + testEIP1559Transition(t, rawdb.HashScheme) + testEIP1559Transition(t, rawdb.PathScheme) +} + +func testEIP1559Transition(t *testing.T, scheme string) { var ( aa = common.HexToAddress("0x000000000000000000000000000000000000aaaa") @@ -3243,7 +3496,9 @@ func TestEIP1559Transition(t *testing.T) { gspec.Config.BerlinBlock = common.Big0 gspec.Config.LondonBlock = common.Big0 - genesis := gspec.MustCommit(db) + triedb := trie.NewDatabase(db, nil) + genesis := gspec.MustCommit(db, triedb) + signer := types.LatestSigner(gspec.Config) blocks, _ := GenerateChain(gspec.Config, genesis, engine, db, 1, func(i int, b *BlockGen) { @@ -3272,9 +3527,10 @@ func TestEIP1559Transition(t *testing.T) { }, true) diskdb := rawdb.NewMemoryDatabase() - gspec.MustCommit(diskdb) + triedb = trie.NewDatabase(diskdb, nil) + gspec.MustCommit(diskdb, triedb) - chain, err := NewBlockChain(diskdb, nil, gspec.Config, engine, vm.Config{}, nil, nil) + chain, err := NewBlockChain(diskdb, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("failed to create tester chain: %v", err) } @@ -3352,6 +3608,11 @@ func TestEIP1559Transition(t *testing.T) { } func TestSponsoredTxTransitionBeforeMiko(t *testing.T) { + testSponsoredTxTransitionBeforeMiko(t, rawdb.HashScheme) + testSponsoredTxTransitionBeforeMiko(t, rawdb.PathScheme) +} + +func testSponsoredTxTransitionBeforeMiko(t *testing.T, scheme string) { var chainConfig params.ChainConfig chainConfig.HomesteadBlock = common.Big0 @@ -3375,8 +3636,8 @@ func TestSponsoredTxTransitionBeforeMiko(t *testing.T) { gspec := &Genesis{ Config: &chainConfig, } - genesis := gspec.MustCommit(db) - chain, err := NewBlockChain(db, nil, &chainConfig, engine, vm.Config{}, nil, nil) + genesis := gspec.MustCommit(db, trie.NewDatabase(db, nil)) + chain, err := NewBlockChain(db, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to create blockchain, err %s", err) } @@ -3418,6 +3679,10 @@ func TestSponsoredTxTransitionBeforeMiko(t *testing.T) { } func TestSponsoredTxTransition(t *testing.T) { + testSponsoredTxTransition(t, rawdb.HashScheme) + testSponsoredTxTransition(t, rawdb.PathScheme) +} +func testSponsoredTxTransition(t *testing.T, scheme string) { var chainConfig params.ChainConfig chainConfig.HomesteadBlock = common.Big0 @@ -3454,8 +3719,8 @@ func TestSponsoredTxTransition(t *testing.T) { adminAddr: {Balance: math.BigPow(10, 18)}, }, } - genesis := gspec.MustCommit(db) - chain, err := NewBlockChain(db, nil, &chainConfig, engine, vm.Config{}, nil, nil) + genesis := gspec.MustCommit(db, trie.NewDatabase(db, nil)) + chain, err := NewBlockChain(db, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to create blockchain, err %s", err) } @@ -3567,6 +3832,7 @@ func TestSponsoredTxTransition(t *testing.T) { // 5. Sender does not have sufficient fund gasFee := new(big.Int).Mul(innerTx.GasFeeCap, new(big.Int).SetUint64(innerTx.Gas)) + genesis = gspec.MustCommit(db, trie.NewDatabase(db, nil)) blocks, _ := GenerateChain(&chainConfig, genesis, engine, db, 1, func(i int, bg *BlockGen) { tx, err := types.SignTx(types.NewTransaction(0, payerAddr, gasFee, params.TxGas, bg.header.BaseFee, nil), mikoSigner, adminKey) if err != nil { @@ -3673,6 +3939,11 @@ func TestSponsoredTxTransition(t *testing.T) { // TestTransientStorageReset ensures the transient storage is wiped correctly // between transactions. func TestTransientStorageReset(t *testing.T) { + testTransientStorageReset(t, rawdb.HashScheme) + testTransientStorageReset(t, rawdb.PathScheme) +} + +func testTransientStorageReset(t *testing.T, scheme string) { var ( engine = ethash.NewFaker() key, _ = crypto.HexToECDSA("b71c71a67e1177ad4e901695e1b4b9ee17ae16c6668d313eac2f96dbcda3f291") @@ -3744,7 +4015,7 @@ func TestTransientStorageReset(t *testing.T) { }) // Initialize the blockchain with 1153 enabled. - chain, err := NewBlockChain(db, nil, gspec.Config, engine, vmConfig, nil, nil) + chain, err := NewBlockChain(db, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vmConfig, nil, nil) if err != nil { t.Fatalf("failed to create tester chain: %v", err) } @@ -3765,6 +4036,11 @@ func TestTransientStorageReset(t *testing.T) { } func TestEIP3651(t *testing.T) { + testEIP3651(t, rawdb.HashScheme) + testEIP3651(t, rawdb.PathScheme) +} + +func testEIP3651(t *testing.T, scheme string) { var ( aa = common.HexToAddress("0x000000000000000000000000000000000000aaaa") bb = common.HexToAddress("0x000000000000000000000000000000000000bbbb") @@ -3835,7 +4111,7 @@ func TestEIP3651(t *testing.T) { b.AddTx(tx) }) - chain, err := NewBlockChain(db, nil, gspec.Config, engine, vm.Config{Tracer: logger.NewMarkdownLogger(&logger.Config{}, os.Stderr)}, nil, nil) + chain, err := NewBlockChain(db, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{Tracer: logger.NewMarkdownLogger(&logger.Config{}, os.Stderr)}, nil, nil) if err != nil { t.Fatalf("failed to create tester chain: %v", err) } @@ -3924,6 +4200,11 @@ func randBlob() (*kzg4844.Blob, *kzg4844.Commitment, *kzg4844.Proof) { } func TestInsertChainWithSidecars(t *testing.T) { + testInsertChainWithSidecars(t, rawdb.HashScheme) + testInsertChainWithSidecars(t, rawdb.PathScheme) +} + +func testInsertChainWithSidecars(t *testing.T, scheme string) { privateKey, _ := crypto.GenerateKey() address := crypto.PubkeyToAddress(privateKey.PublicKey) chainConfig := params.TestChainConfig @@ -3938,8 +4219,10 @@ func TestInsertChainWithSidecars(t *testing.T) { }, }, } - genesis := gspec.MustCommit(db) - chain, err := NewBlockChain(db, nil, chainConfig, engine, vm.Config{}, nil, nil) + triedb := trie.NewDatabase(db, nil) + gspec.MustCommit(db, triedb) + + chain, err := NewBlockChain(db, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to create blockchain, err %s", err) } @@ -3989,6 +4272,7 @@ func TestInsertChainWithSidecars(t *testing.T) { t.Fatal(err) } + genesis := gspec.MustCommit(db, trie.NewDatabase(db, nil)) blocks, _ := GenerateChain(chainConfig, genesis, engine, db, 1, func(i int, bg *BlockGen) { bg.AddTx(tx1) bg.AddTx(tx2) @@ -4023,8 +4307,11 @@ func TestInsertChainWithSidecars(t *testing.T) { // Reset database db = rawdb.NewMemoryDatabase() - gspec.MustCommit(db) - chain, err = NewBlockChain(db, nil, chainConfig, engine, vm.Config{}, nil, nil) + triedb = trie.NewDatabase(db, nil) + genesis = gspec.MustCommit(db, triedb) + + chain.triedb.Close() + chain, err = NewBlockChain(db, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to create blockchain, err %s", err) } @@ -4047,8 +4334,11 @@ func TestInsertChainWithSidecars(t *testing.T) { // Reset database db = rawdb.NewMemoryDatabase() - gspec.MustCommit(db) - chain, err = NewBlockChain(db, nil, chainConfig, engine, vm.Config{}, nil, nil) + triedb = trie.NewDatabase(db, nil) + + chain.triedb.Close() + genesis = gspec.MustCommit(db, triedb) + chain, err = NewBlockChain(db, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to create blockchain, err %s", err) } @@ -4095,8 +4385,11 @@ func TestInsertChainWithSidecars(t *testing.T) { // Reset database db = rawdb.NewMemoryDatabase() - gspec.MustCommit(db) - chain, err = NewBlockChain(db, nil, chainConfig, engine, vm.Config{}, nil, nil) + triedb = trie.NewDatabase(db, nil) + genesis = gspec.MustCommit(db, triedb) + + chain.triedb.Close() + chain, err = NewBlockChain(db, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to create blockchain, err %s", err) } @@ -4157,8 +4450,8 @@ func TestInsertChainWithSidecars(t *testing.T) { // Reset database db := rawdb.NewMemoryDatabase() - gspec.MustCommit(db) - chain, err := NewBlockChain(db, nil, chainConfig, engine, vm.Config{}, nil, nil) + gspec.MustCommit(db, trie.NewDatabase(db, nil)) + chain, err := NewBlockChain(db, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to create blockchain, err %s", err) } @@ -4191,8 +4484,9 @@ func TestInsertChainWithSidecars(t *testing.T) { // Reset database db = rawdb.NewMemoryDatabase() - gspec.MustCommit(db) - chain, err = NewBlockChain(db, nil, chainConfig, engine, vm.Config{}, nil, nil) + triedb = trie.NewDatabase(db, nil) + genesis = gspec.MustCommit(db, triedb) + chain, err = NewBlockChain(db, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to create blockchain, err %s", err) } @@ -4224,6 +4518,11 @@ func TestInsertChainWithSidecars(t *testing.T) { } func TestSidecarsPruning(t *testing.T) { + testSidecarsPruning(t, rawdb.HashScheme) + testSidecarsPruning(t, rawdb.PathScheme) +} + +func testSidecarsPruning(t *testing.T, scheme string) { var prunePeriod uint64 = 1000 privateKey, _ := crypto.GenerateKey() address := crypto.PubkeyToAddress(privateKey.PublicKey) @@ -4239,8 +4538,8 @@ func TestSidecarsPruning(t *testing.T) { }, }, } - genesis := gspec.MustCommit(db) - chain, err := NewBlockChain(db, nil, chainConfig, engine, vm.Config{}, nil, nil) + genesis := gspec.MustCommit(db, trie.NewDatabase(db, nil)) + chain, err := NewBlockChain(db, DefaultCacheConfigWithScheme(scheme), gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to create blockchain, err %s", err) } @@ -4314,3 +4613,113 @@ func TestSidecarsPruning(t *testing.T) { } } } + +func TestDeleteThenCreate(t *testing.T) { + var ( + engine = ethash.NewFaker() + key, _ = crypto.HexToECDSA("b71c71a67e1177ad4e901695e1b4b9ee17ae16c6668d313eac2f96dbcda3f291") + address = crypto.PubkeyToAddress(key.PublicKey) + factoryAddr = crypto.CreateAddress(address, 0) + funds = big.NewInt(1000000000000000) + ) + /* + contract Factory { + function deploy(bytes memory code) public { + address addr; + assembly { + addr := create2(0, add(code, 0x20), mload(code), 0) + if iszero(extcodesize(addr)) { + revert(0, 0) + } + } + } + } + */ + factoryBIN := common.Hex2Bytes("608060405234801561001057600080fd5b50610241806100206000396000f3fe608060405234801561001057600080fd5b506004361061002a5760003560e01c80627743601461002f575b600080fd5b610049600480360381019061004491906100d8565b61004b565b005b6000808251602084016000f59050803b61006457600080fd5b5050565b600061007b61007684610146565b610121565b905082815260208101848484011115610097576100966101eb565b5b6100a2848285610177565b509392505050565b600082601f8301126100bf576100be6101e6565b5b81356100cf848260208601610068565b91505092915050565b6000602082840312156100ee576100ed6101f5565b5b600082013567ffffffffffffffff81111561010c5761010b6101f0565b5b610118848285016100aa565b91505092915050565b600061012b61013c565b90506101378282610186565b919050565b6000604051905090565b600067ffffffffffffffff821115610161576101606101b7565b5b61016a826101fa565b9050602081019050919050565b82818337600083830152505050565b61018f826101fa565b810181811067ffffffffffffffff821117156101ae576101ad6101b7565b5b80604052505050565b7f4e487b7100000000000000000000000000000000000000000000000000000000600052604160045260246000fd5b600080fd5b600080fd5b600080fd5b600080fd5b6000601f19601f830116905091905056fea2646970667358221220ea8b35ed310d03b6b3deef166941140b4d9e90ea2c92f6b41eb441daf49a59c364736f6c63430008070033") + + /* + contract C { + uint256 value; + constructor() { + value = 100; + } + function destruct() public payable { + selfdestruct(payable(msg.sender)); + } + receive() payable external {} + } + */ + contractABI := common.Hex2Bytes("6080604052348015600f57600080fd5b5060646000819055506081806100266000396000f3fe608060405260043610601f5760003560e01c80632b68b9c614602a576025565b36602557005b600080fd5b60306032565b005b3373ffffffffffffffffffffffffffffffffffffffff16fffea2646970667358221220ab749f5ed1fcb87bda03a74d476af3f074bba24d57cb5a355e8162062ad9a4e664736f6c63430008070033") + contractAddr := crypto.CreateAddress2(factoryAddr, [32]byte{}, crypto.Keccak256(contractABI)) + + gspec := &Genesis{ + Config: params.TestChainConfig, + Alloc: GenesisAlloc{ + address: {Balance: funds}, + }, + } + nonce := uint64(0) + signer := types.HomesteadSigner{} + _, blocks, _ := GenerateChainWithGenesis(gspec, engine, 2, func(i int, b *BlockGen) { + fee := big.NewInt(1) + if b.header.BaseFee != nil { + fee = b.header.BaseFee + } + b.SetCoinbase(common.Address{1}) + + // Block 1 + if i == 0 { + tx, _ := types.SignNewTx(key, signer, &types.LegacyTx{ + Nonce: nonce, + GasPrice: new(big.Int).Set(fee), + Gas: 500000, + Data: factoryBIN, + }) + nonce++ + b.AddTx(tx) + + data := common.Hex2Bytes("00774360000000000000000000000000000000000000000000000000000000000000002000000000000000000000000000000000000000000000000000000000000000a76080604052348015600f57600080fd5b5060646000819055506081806100266000396000f3fe608060405260043610601f5760003560e01c80632b68b9c614602a576025565b36602557005b600080fd5b60306032565b005b3373ffffffffffffffffffffffffffffffffffffffff16fffea2646970667358221220ab749f5ed1fcb87bda03a74d476af3f074bba24d57cb5a355e8162062ad9a4e664736f6c6343000807003300000000000000000000000000000000000000000000000000") + tx, _ = types.SignNewTx(key, signer, &types.LegacyTx{ + Nonce: nonce, + GasPrice: new(big.Int).Set(fee), + Gas: 500000, + To: &factoryAddr, + Data: data, + }) + b.AddTx(tx) + nonce++ + } else { + // Block 2 + tx, _ := types.SignNewTx(key, signer, &types.LegacyTx{ + Nonce: nonce, + GasPrice: new(big.Int).Set(fee), + Gas: 500000, + To: &contractAddr, + Data: common.Hex2Bytes("2b68b9c6"), // destruct + }) + nonce++ + b.AddTx(tx) + + data := common.Hex2Bytes("00774360000000000000000000000000000000000000000000000000000000000000002000000000000000000000000000000000000000000000000000000000000000a76080604052348015600f57600080fd5b5060646000819055506081806100266000396000f3fe608060405260043610601f5760003560e01c80632b68b9c614602a576025565b36602557005b600080fd5b60306032565b005b3373ffffffffffffffffffffffffffffffffffffffff16fffea2646970667358221220ab749f5ed1fcb87bda03a74d476af3f074bba24d57cb5a355e8162062ad9a4e664736f6c6343000807003300000000000000000000000000000000000000000000000000") + tx, _ = types.SignNewTx(key, signer, &types.LegacyTx{ + Nonce: nonce, + GasPrice: new(big.Int).Set(fee), + Gas: 500000, + To: &factoryAddr, // re-creation + Data: data, + }) + b.AddTx(tx) + nonce++ + } + }) + // Import the canonical chain + chain, err := NewBlockChain(rawdb.NewMemoryDatabase(), nil, gspec, nil, engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("failed to create tester chain: %v", err) + } + for _, block := range blocks { + if _, err := chain.InsertChain([]*types.Block{block}, nil); err != nil { + t.Fatalf("block %d: failed to insert into chain: %v", block.NumberU64(), err) + } + } +} diff --git a/core/chain_makers.go b/core/chain_makers.go index 1bb2079f04..08f82b157d 100644 --- a/core/chain_makers.go +++ b/core/chain_makers.go @@ -21,6 +21,7 @@ import ( "math/big" "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/trie" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/consensus" @@ -264,7 +265,7 @@ func generateChain( } blocks, receipts := make(types.Blocks, n), make([]types.Receipts, n) chainreader := newFakeChainReader(config, db) - genblock := func(i int, parent *types.Block, statedb *state.StateDB) (*types.Block, types.Receipts) { + genblock := func(i int, parent *types.Block, triedb *trie.Database, statedb *state.StateDB) (*types.Block, types.Receipts) { b := &BlockGen{i: i, chain: blocks, parent: parent, statedb: statedb, config: config, engine: engine} b.header = makeHeader(chainreader, parent, statedb, b.engine) @@ -318,12 +319,12 @@ func generateChain( } // Write state changes to db - root, err := statedb.Commit(config.IsEIP158(b.header.Number)) + root, err := statedb.Commit(b.header.Number.Uint64(), config.IsEIP158(b.header.Number)) if err != nil { panic(fmt.Sprintf("state write error: %v", err)) } if flushDisk { - if err := statedb.Database().TrieDB().Commit(root, false, nil); err != nil { + if err := triedb.Commit(root, false); err != nil { panic(fmt.Sprintf("trie write error: %v", err)) } } @@ -331,14 +332,16 @@ func generateChain( } return nil, nil } - // Create an ephemeral database - database := state.NewDatabase(db) + // Forcibly use hash-based state scheme for retaining all nodes in disk. + triedb := trie.NewDatabase(db, trie.HashDefaults) + defer triedb.Close() + for i := 0; i < n; i++ { - statedb, err := state.New(parent.Root(), database, nil) + statedb, err := state.New(parent.Root(), state.NewDatabaseWithNodeDB(db, triedb), nil) if err != nil { panic(err) } - block, receipt := genblock(i, parent, statedb) + block, receipt := genblock(i, parent, triedb, statedb) // Prepare Blob receipt var blobGasPrice *big.Int @@ -362,11 +365,11 @@ func generateChain( // then generate chain on top. func GenerateChainWithGenesis(genesis *Genesis, engine consensus.Engine, n int, gen func(int, *BlockGen)) (ethdb.Database, []*types.Block, []types.Receipts) { db := rawdb.NewMemoryDatabase() - _, err := genesis.Commit(db) - if err != nil { - panic(err) - } - blocks, receipts := GenerateChain(genesis.Config, genesis.ToBlock(db), engine, db, n, gen, true) + triedb := trie.NewDatabase(db, trie.HashDefaults) + + defer triedb.Close() + genesis.MustCommit(db, triedb) + blocks, receipts := GenerateChain(genesis.Config, genesis.ToBlock(), engine, db, n, gen, true) return db, blocks, receipts } diff --git a/core/chain_makers_test.go b/core/chain_makers_test.go index 968c0f069e..adf2476bc3 100644 --- a/core/chain_makers_test.go +++ b/core/chain_makers_test.go @@ -26,6 +26,7 @@ import ( "github.com/ethereum/go-ethereum/core/vm" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/params" + "github.com/ethereum/go-ethereum/trie" ) func ExampleGenerateChain() { @@ -44,7 +45,7 @@ func ExampleGenerateChain() { Config: ¶ms.ChainConfig{HomesteadBlock: new(big.Int)}, Alloc: GenesisAlloc{addr1: {Balance: big.NewInt(1000000)}}, } - genesis := gspec.MustCommit(db) + genesis := gspec.MustCommit(db, trie.NewDatabase(db, trie.HashDefaults)) // This call generates a chain of 5 blocks. The function runs for // each block and adds different features to gen based on the @@ -79,7 +80,7 @@ func ExampleGenerateChain() { }, true) // Import the chain. This runs all block validation rules. - blockchain, _ := NewBlockChain(db, nil, gspec.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + blockchain, _ := NewBlockChain(db, DefaultCacheConfigWithScheme(rawdb.HashScheme), gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) defer blockchain.Stop() if i, err := blockchain.InsertChain(chain, nil); err != nil { diff --git a/core/dao_test.go b/core/dao_test.go index 08faacd388..0c862a9c11 100644 --- a/core/dao_test.go +++ b/core/dao_test.go @@ -30,32 +30,39 @@ import ( // blocks based on their extradata fields. func TestDAOForkRangeExtradata(t *testing.T) { forkBlock := big.NewInt(32) + chainConfig := *params.NonActivatedConfig + chainConfig.HomesteadBlock = big.NewInt(0) // Generate a common prefix for both pro-forkers and non-forkers - db := rawdb.NewMemoryDatabase() - gspec := &Genesis{BaseFee: big.NewInt(params.InitialBaseFee)} - genesis := gspec.MustCommit(db) - prefix, _ := GenerateChain(params.TestChainConfig, genesis, ethash.NewFaker(), db, int(forkBlock.Int64()-1), func(i int, gen *BlockGen) {}, true) + gspec := &Genesis{ + BaseFee: big.NewInt(params.InitialBaseFee), + Config: &chainConfig, + } + db, prefix, _ := GenerateChainWithGenesis(gspec, ethash.NewFaker(), int(forkBlock.Int64()-1), func(i int, gen *BlockGen) {}) // Create the concurrent, conflicting two nodes proDb := rawdb.NewMemoryDatabase() - gspec.MustCommit(proDb) - - proConf := *params.TestChainConfig + proConf := *params.NonActivatedConfig + proConf.HomesteadBlock = big.NewInt(0) proConf.DAOForkBlock = forkBlock proConf.DAOForkSupport = true - - proBc, _ := NewBlockChain(proDb, nil, &proConf, ethash.NewFaker(), vm.Config{}, nil, nil) + progspec := &Genesis{ + BaseFee: big.NewInt(params.InitialBaseFee), + Config: &proConf, + } + proBc, _ := NewBlockChain(proDb, nil, progspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) defer proBc.Stop() conDb := rawdb.NewMemoryDatabase() - gspec.MustCommit(conDb) - - conConf := *params.TestChainConfig + conConf := *params.NonActivatedConfig + conConf.HomesteadBlock = big.NewInt(0) conConf.DAOForkBlock = forkBlock conConf.DAOForkSupport = false - - conBc, _ := NewBlockChain(conDb, nil, &conConf, ethash.NewFaker(), vm.Config{}, nil, nil) + congspec := &Genesis{ + BaseFee: big.NewInt(params.InitialBaseFee), + Config: &conConf, + } + conBc, _ := NewBlockChain(conDb, nil, congspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) defer conBc.Stop() if _, err := proBc.InsertChain(prefix, nil); err != nil { @@ -67,10 +74,7 @@ func TestDAOForkRangeExtradata(t *testing.T) { // Try to expand both pro-fork and non-fork chains iteratively with other camp's blocks for i := int64(0); i < params.DAOForkExtraRange.Int64(); i++ { // Create a pro-fork block, and try to feed into the no-fork chain - db = rawdb.NewMemoryDatabase() - gspec.MustCommit(db) - bc, _ := NewBlockChain(db, nil, &conConf, ethash.NewFaker(), vm.Config{}, nil, nil) - defer bc.Stop() + bc, _ := NewBlockChain(rawdb.NewMemoryDatabase(), nil, congspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) blocks := conBc.GetBlocksFromHash(conBc.CurrentBlock().Hash(), int(conBc.CurrentBlock().NumberU64())) for j := 0; j < len(blocks)/2; j++ { @@ -79,7 +83,7 @@ func TestDAOForkRangeExtradata(t *testing.T) { if _, err := bc.InsertChain(blocks, nil); err != nil { t.Fatalf("failed to import contra-fork chain for expansion: %v", err) } - if err := bc.stateCache.TrieDB().Commit(bc.CurrentHeader().Root, true, nil); err != nil { + if err := bc.triedb.Commit(bc.CurrentHeader().Root, true); err != nil { t.Fatalf("failed to commit contra-fork head for expansion: %v", err) } blocks, _ = GenerateChain(&proConf, conBc.CurrentBlock(), ethash.NewFaker(), db, 1, func(i int, gen *BlockGen) {}, true) @@ -92,9 +96,8 @@ func TestDAOForkRangeExtradata(t *testing.T) { t.Fatalf("contra-fork chain didn't accepted no-fork block: %v", err) } // Create a no-fork block, and try to feed into the pro-fork chain - db = rawdb.NewMemoryDatabase() - gspec.MustCommit(db) - bc, _ = NewBlockChain(db, nil, &proConf, ethash.NewFaker(), vm.Config{}, nil, nil) + bc, _ = NewBlockChain(rawdb.NewMemoryDatabase(), nil, progspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) + defer bc.Stop() blocks = proBc.GetBlocksFromHash(proBc.CurrentBlock().Hash(), int(proBc.CurrentBlock().NumberU64())) @@ -104,7 +107,7 @@ func TestDAOForkRangeExtradata(t *testing.T) { if _, err := bc.InsertChain(blocks, nil); err != nil { t.Fatalf("failed to import pro-fork chain for expansion: %v", err) } - if err := bc.stateCache.TrieDB().Commit(bc.CurrentHeader().Root, true, nil); err != nil { + if err := bc.triedb.Commit(bc.CurrentHeader().Root, true); err != nil { t.Fatalf("failed to commit pro-fork head for expansion: %v", err) } blocks, _ = GenerateChain(&conConf, proBc.CurrentBlock(), ethash.NewFaker(), db, 1, func(i int, gen *BlockGen) {}, true) @@ -118,9 +121,7 @@ func TestDAOForkRangeExtradata(t *testing.T) { } } // Verify that contra-forkers accept pro-fork extra-datas after forking finishes - db = rawdb.NewMemoryDatabase() - gspec.MustCommit(db) - bc, _ := NewBlockChain(db, nil, &conConf, ethash.NewFaker(), vm.Config{}, nil, nil) + bc, _ := NewBlockChain(rawdb.NewMemoryDatabase(), nil, congspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) defer bc.Stop() blocks := conBc.GetBlocksFromHash(conBc.CurrentBlock().Hash(), int(conBc.CurrentBlock().NumberU64())) @@ -130,7 +131,7 @@ func TestDAOForkRangeExtradata(t *testing.T) { if _, err := bc.InsertChain(blocks, nil); err != nil { t.Fatalf("failed to import contra-fork chain for expansion: %v", err) } - if err := bc.stateCache.TrieDB().Commit(bc.CurrentHeader().Root, true, nil); err != nil { + if err := bc.triedb.Commit(bc.CurrentHeader().Root, true); err != nil { t.Fatalf("failed to commit contra-fork head for expansion: %v", err) } blocks, _ = GenerateChain(&proConf, conBc.CurrentBlock(), ethash.NewFaker(), db, 1, func(i int, gen *BlockGen) {}, true) @@ -138,9 +139,7 @@ func TestDAOForkRangeExtradata(t *testing.T) { t.Fatalf("contra-fork chain didn't accept pro-fork block post-fork: %v", err) } // Verify that pro-forkers accept contra-fork extra-datas after forking finishes - db = rawdb.NewMemoryDatabase() - gspec.MustCommit(db) - bc, _ = NewBlockChain(db, nil, &proConf, ethash.NewFaker(), vm.Config{}, nil, nil) + bc, _ = NewBlockChain(rawdb.NewMemoryDatabase(), nil, progspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) defer bc.Stop() blocks = proBc.GetBlocksFromHash(proBc.CurrentBlock().Hash(), int(proBc.CurrentBlock().NumberU64())) @@ -150,7 +149,7 @@ func TestDAOForkRangeExtradata(t *testing.T) { if _, err := bc.InsertChain(blocks, nil); err != nil { t.Fatalf("failed to import pro-fork chain for expansion: %v", err) } - if err := bc.stateCache.TrieDB().Commit(bc.CurrentHeader().Root, true, nil); err != nil { + if err := bc.triedb.Commit(bc.CurrentHeader().Root, true); err != nil { t.Fatalf("failed to commit pro-fork head for expansion: %v", err) } blocks, _ = GenerateChain(&conConf, proBc.CurrentBlock(), ethash.NewFaker(), db, 1, func(i int, gen *BlockGen) {}, true) diff --git a/core/error.go b/core/error.go index ca1724b9a6..a36db136e9 100644 --- a/core/error.go +++ b/core/error.go @@ -1,3 +1,4 @@ + // Copyright 2014 The go-ethereum Authors // This file is part of the go-ethereum library. // diff --git a/core/genesis.go b/core/genesis.go index 11cc039d38..d1c8ff94fe 100644 --- a/core/genesis.go +++ b/core/genesis.go @@ -80,6 +80,100 @@ func (ga *GenesisAlloc) UnmarshalJSON(data []byte) error { return nil } +// deriveHash computes the state root according to the genesis specification. +func (ga *GenesisAlloc) deriveHash() (common.Hash, error) { + // Create an ephemeral in-memory database for computing hash, + // all the derived states will be discarded to not pollute disk. + db := state.NewDatabase(rawdb.NewMemoryDatabase()) + statedb, err := state.New(common.Hash{}, db, nil) + if err != nil { + return common.Hash{}, err + } + for addr, account := range *ga { + statedb.AddBalance(addr, account.Balance) + statedb.SetCode(addr, account.Code) + statedb.SetNonce(addr, account.Nonce) + for key, value := range account.Storage { + statedb.SetState(addr, key, value) + } + } + return statedb.Commit(0, false) +} + +// flush is very similar with deriveHash, but the main difference is +// all the generated states will be persisted into the given database. +// Also, the genesis state specification will be flushed as well. +func (ga *GenesisAlloc) flush(db ethdb.Database, triedb *trie.Database) error { + statedb, err := state.New(common.Hash{}, state.NewDatabaseWithNodeDB(db, triedb), nil) + if err != nil { + return err + } + for addr, account := range *ga { + statedb.AddBalance(addr, account.Balance) + statedb.SetCode(addr, account.Code) + statedb.SetNonce(addr, account.Nonce) + for key, value := range account.Storage { + statedb.SetState(addr, key, value) + } + } + // Commit current state, return the root hash. + root, err := statedb.Commit(0, false) + if err != nil { + return err + } + // Commit newly generated states into disk if it's not empty. + if root != types.EmptyRootHash { + if err := triedb.Commit(root, true); err != nil { + return err + } + } + + // Marshal the genesis state specification and persist. + blob, err := json.Marshal(ga) + if err != nil { + return err + } + rawdb.WriteGenesisStateSpec(db, root, blob) + return nil +} + +// CommitGenesisState loads the stored genesis state with the given block +// hash and commits it into the provided database handler. +func CommitGenesisState(db ethdb.Database, triedb *trie.Database, hash common.Hash) error { + var alloc GenesisAlloc + blob := rawdb.ReadGenesisStateSpec(db, hash) + if len(blob) != 0 { + if err := alloc.UnmarshalJSON(blob); err != nil { + return err + } + } else { + // Genesis allocation is missing and there are several possibilities: + // the node is legacy which doesn't persist the genesis allocation or + // the persisted allocation is just lost. + // - supported networks(mainnet, testnets), recover with defined allocations + // - private network, can't recover + var genesis *Genesis + switch hash { + case params.MainnetGenesisHash: + genesis = DefaultGenesisBlock() + case params.RopstenGenesisHash: + genesis = DefaultRopstenGenesisBlock() + case params.RinkebyGenesisHash: + genesis = DefaultRinkebyGenesisBlock() + case params.GoerliGenesisHash: + genesis = DefaultGoerliGenesisBlock() + case params.SepoliaGenesisHash: + genesis = DefaultSepoliaGenesisBlock() + } + if genesis != nil { + alloc = genesis.Alloc + } else { + return errors.New("not found") + } + } + return alloc.flush(db, triedb) +} + // GenesisAccount is an account in the state of the genesis block. type GenesisAccount struct { Code []byte `json:"code,omitempty"` @@ -154,14 +248,15 @@ func (e *GenesisMismatchError) Error() string { // error is a *params.ConfigCompatError and the new, unwritten config is returned. // // The returned chain configuration is never nil. -func SetupGenesisBlock(db ethdb.Database, genesis *Genesis, overrideGenesis bool) (*params.ChainConfig, common.Hash, error) { - return SetupGenesisBlockWithOverride(db, genesis, nil, overrideGenesis) +func SetupGenesisBlock(db ethdb.Database, triedb *trie.Database, genesis *Genesis, overrideGenesis bool) (*params.ChainConfig, common.Hash, error) { + return SetupGenesisBlockWithOverride(db, triedb, genesis, nil, overrideGenesis) } -func SetupGenesisBlockWithOverride(db ethdb.Database, genesis *Genesis, overrideArrowGlacier *big.Int, forceOverrideChainConfig bool) (*params.ChainConfig, common.Hash, error) { +func SetupGenesisBlockWithOverride(db ethdb.Database, triedb *trie.Database, genesis *Genesis, overrideArrowGlacier *big.Int, forceOverrideChainConfig bool) (*params.ChainConfig, common.Hash, error) { if genesis != nil && genesis.Config == nil { return params.AllEthashProtocolChanges, common.Hash{}, errGenesisNoConfig } + // Just commit the new block if there is no stored genesis block. stored := rawdb.ReadCanonicalHash(db, 0) if (stored == common.Hash{}) { @@ -171,25 +266,27 @@ func SetupGenesisBlockWithOverride(db ethdb.Database, genesis *Genesis, override } else { log.Info("Writing custom genesis block") } - block, err := genesis.Commit(db) + block, err := genesis.Commit(db, triedb) if err != nil { return genesis.Config, common.Hash{}, err } return genesis.Config, block.Hash(), nil } - // We have the genesis block in database(perhaps in ancient database) - // but the corresponding state is missing. + // The genesis block is present(perhaps in ancient database) while the + // state database is not initialized yet. It can happen that the node + // is initialized with an external ancient store. Commit genesis state + // in this case. header := rawdb.ReadHeader(db, stored, 0) - if _, err := state.New(header.Root, state.NewDatabaseWithConfig(db, nil), nil); err != nil { + if header.Root != types.EmptyRootHash && !triedb.Initialized(header.Root) { if genesis == nil { genesis = DefaultGenesisBlock() } // Ensure the stored genesis matches with the given one. - hash := genesis.ToBlock(nil).Hash() + hash := genesis.ToBlock().Hash() if hash != stored { return genesis.Config, hash, &GenesisMismatchError{stored, hash} } - block, err := genesis.Commit(db) + block, err := genesis.Commit(db, triedb) if err != nil { return genesis.Config, hash, err } @@ -197,7 +294,7 @@ func SetupGenesisBlockWithOverride(db ethdb.Database, genesis *Genesis, override } // Check whether the genesis block is already written. if genesis != nil { - hash := genesis.ToBlock(nil).Hash() + hash := genesis.ToBlock().Hash() if hash != stored { return genesis.Config, hash, &GenesisMismatchError{stored, hash} } @@ -241,6 +338,26 @@ func SetupGenesisBlockWithOverride(db ethdb.Database, genesis *Genesis, override return newcfg, stored, nil } +// LoadChainConfig loads the stored chain config if the chain config +// is already present in database, otherwise, return the config in the +// provided genesis specification. +func LoadChainConfig(db ethdb.Database, genesis *Genesis) (*params.ChainConfig, error) { + if genesis != nil && genesis.Config == nil { + return params.AllEthashProtocolChanges, errGenesisNoConfig + } + stored := rawdb.ReadCanonicalHash(db, 0) + if (stored != common.Hash{}) { + storedcfg := rawdb.ReadChainConfig(db, stored) + if storedcfg != nil { + return storedcfg, nil + } + } + if genesis == nil { + genesis = DefaultGenesisBlock() + } + return genesis.Config, nil +} + func (g *Genesis) configOrDefault(ghash common.Hash) *params.ChainConfig { switch { case g != nil: @@ -262,25 +379,12 @@ func (g *Genesis) configOrDefault(ghash common.Hash) *params.ChainConfig { } } -// ToBlock creates the genesis block and writes state of a genesis specification -// to the given database (or discards it if nil). -func (g *Genesis) ToBlock(db ethdb.Database) *types.Block { - if db == nil { - db = rawdb.NewMemoryDatabase() - } - statedb, err := state.New(common.Hash{}, state.NewDatabase(db), nil) +// ToBlock returns the genesis block according to genesis specification. +func (g *Genesis) ToBlock() *types.Block { + root, err := g.Alloc.deriveHash() if err != nil { panic(err) } - for addr, account := range g.Alloc { - statedb.AddBalance(addr, account.Balance) - statedb.SetCode(addr, account.Code) - statedb.SetNonce(addr, account.Nonce) - for key, value := range account.Storage { - statedb.SetState(addr, key, value) - } - } - root := statedb.IntermediateRoot(false) head := &types.Header{ Number: new(big.Int).SetUint64(g.Number), Nonce: types.EncodeNonce(g.Nonce), @@ -308,16 +412,13 @@ func (g *Genesis) ToBlock(db ethdb.Database) *types.Block { head.BaseFee = new(big.Int).SetUint64(params.InitialBaseFee) } } - statedb.Commit(false) - statedb.Database().TrieDB().Commit(root, true, nil) - return types.NewBlock(head, nil, nil, nil, trie.NewStackTrie(nil)) } // Commit writes the block and state of a genesis specification to the database. // The block is committed as the canonical head block. -func (g *Genesis) Commit(db ethdb.Database) (*types.Block, error) { - block := g.ToBlock(db) +func (g *Genesis) Commit(db ethdb.Database, triedb *trie.Database) (*types.Block, error) { + block := g.ToBlock() if block.Number().Sign() != 0 { return nil, errors.New("can't commit genesis block with number > 0") } @@ -331,6 +432,12 @@ func (g *Genesis) Commit(db ethdb.Database) (*types.Block, error) { if config.Clique != nil && len(block.Extra()) == 0 { return nil, errors.New("can't start clique chain without signers") } + // All the checks has passed, flush the states derived from the genesis + // specification as well as the specification itself into the provided + // database. + if err := g.Alloc.flush(db, triedb); err != nil { + return nil, err + } rawdb.WriteTd(db, block.Hash(), block.NumberU64(), block.Difficulty()) rawdb.WriteBlock(db, block) rawdb.WriteReceipts(db, block.Hash(), block.NumberU64(), nil) @@ -344,23 +451,14 @@ func (g *Genesis) Commit(db ethdb.Database) (*types.Block, error) { // MustCommit writes the genesis block and state to db, panicking on error. // The block is committed as the canonical head block. -func (g *Genesis) MustCommit(db ethdb.Database) *types.Block { - block, err := g.Commit(db) +func (g *Genesis) MustCommit(db ethdb.Database, triedb *trie.Database) *types.Block { + block, err := g.Commit(db, triedb) if err != nil { panic(err) } return block } -// GenesisBlockForTesting creates and writes a block in which addr has the given wei balance. -func GenesisBlockForTesting(db ethdb.Database, addr common.Address, balance *big.Int) *types.Block { - g := Genesis{ - Alloc: GenesisAlloc{addr: {Balance: balance}}, - BaseFee: big.NewInt(params.InitialBaseFee), - } - return g.MustCommit(db) -} - // DefaultGenesisBlock returns the Ethereum main net genesis block. func DefaultGenesisBlock() *Genesis { return &Genesis{ diff --git a/core/genesis_test.go b/core/genesis_test.go index 078f22ca30..9fb0a988e8 100644 --- a/core/genesis_test.go +++ b/core/genesis_test.go @@ -17,6 +17,7 @@ package core import ( + "encoding/json" "math/big" "reflect" "testing" @@ -28,17 +29,24 @@ import ( "github.com/ethereum/go-ethereum/core/vm" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/params" + "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/triedb/pathdb" ) func TestInvalidCliqueConfig(t *testing.T) { block := DefaultGoerliGenesisBlock() block.ExtraData = []byte{} - if _, err := block.Commit(nil); err == nil { + db := rawdb.NewMemoryDatabase() + if _, err := block.Commit(db, trie.NewDatabase(db, nil)); err == nil { t.Fatal("Expected error on invalid clique config") } } - func TestSetupGenesis(t *testing.T) { + testSetupGenesis(t, rawdb.HashScheme) + testSetupGenesis(t, rawdb.PathScheme) +} + +func testSetupGenesis(t *testing.T, scheme string) { var ( customghash = common.HexToHash("0x89c99d90b79719238d2645c7642f2c9295246e80775b38cfd162b696817fbd50") customg = Genesis{ @@ -60,7 +68,7 @@ func TestSetupGenesis(t *testing.T) { { name: "genesis without ChainConfig", fn: func(db ethdb.Database) (*params.ChainConfig, common.Hash, error) { - return SetupGenesisBlock(db, new(Genesis), false) + return SetupGenesisBlock(db, trie.NewDatabase(db, newDbConfig(scheme)), new(Genesis), false) }, wantErr: errGenesisNoConfig, wantConfig: params.AllEthashProtocolChanges, @@ -68,7 +76,7 @@ func TestSetupGenesis(t *testing.T) { { name: "no block in DB, genesis == nil", fn: func(db ethdb.Database) (*params.ChainConfig, common.Hash, error) { - return SetupGenesisBlock(db, nil, false) + return SetupGenesisBlock(db, trie.NewDatabase(db, newDbConfig(scheme)), nil, false) }, wantHash: params.MainnetGenesisHash, wantConfig: params.MainnetChainConfig, @@ -76,8 +84,9 @@ func TestSetupGenesis(t *testing.T) { { name: "mainnet block in DB, genesis == nil", fn: func(db ethdb.Database) (*params.ChainConfig, common.Hash, error) { - DefaultGenesisBlock().MustCommit(db) - return SetupGenesisBlock(db, nil, false) + tdb := newDbConfig(scheme) + DefaultGenesisBlock().MustCommit(db, trie.NewDatabase(db, tdb)) + return SetupGenesisBlock(db, trie.NewDatabase(db, tdb), nil, false) }, wantHash: params.MainnetGenesisHash, wantConfig: params.MainnetChainConfig, @@ -85,8 +94,9 @@ func TestSetupGenesis(t *testing.T) { { name: "custom block in DB, genesis == nil", fn: func(db ethdb.Database) (*params.ChainConfig, common.Hash, error) { - customg.MustCommit(db) - return SetupGenesisBlock(db, nil, false) + tdb := newDbConfig(scheme) + customg.MustCommit(db, trie.NewDatabase(db, tdb)) + return SetupGenesisBlock(db, trie.NewDatabase(db, tdb), nil, false) }, wantHash: customghash, wantConfig: customg.Config, @@ -94,8 +104,9 @@ func TestSetupGenesis(t *testing.T) { { name: "custom block in DB, genesis == ropsten", fn: func(db ethdb.Database) (*params.ChainConfig, common.Hash, error) { - customg.MustCommit(db) - return SetupGenesisBlock(db, DefaultRopstenGenesisBlock(), false) + tdb := newDbConfig(scheme) + customg.MustCommit(db, trie.NewDatabase(db, tdb)) + return SetupGenesisBlock(db, trie.NewDatabase(db, tdb), DefaultRopstenGenesisBlock(), false) }, wantErr: &GenesisMismatchError{Stored: customghash, New: params.RopstenGenesisHash}, wantHash: params.RopstenGenesisHash, @@ -104,8 +115,9 @@ func TestSetupGenesis(t *testing.T) { { name: "compatible config in DB", fn: func(db ethdb.Database) (*params.ChainConfig, common.Hash, error) { - oldcustomg.MustCommit(db) - return SetupGenesisBlock(db, &customg, false) + tdb := newDbConfig(scheme) + oldcustomg.MustCommit(db, trie.NewDatabase(db, tdb)) + return SetupGenesisBlock(db, trie.NewDatabase(db, tdb), &customg, false) }, wantHash: customghash, wantConfig: customg.Config, @@ -115,16 +127,17 @@ func TestSetupGenesis(t *testing.T) { fn: func(db ethdb.Database) (*params.ChainConfig, common.Hash, error) { // Commit the 'old' genesis block with Homestead transition at #2. // Advance to block #4, past the homestead transition block of customg. - genesis := oldcustomg.MustCommit(db) + tdb := trie.NewDatabase(db, newDbConfig(scheme)) + oldcustomg.MustCommit(db, tdb) - bc, _ := NewBlockChain(db, nil, oldcustomg.Config, ethash.NewFullFaker(), vm.Config{}, nil, nil) + bc, _ := NewBlockChain(db, DefaultCacheConfigWithScheme(scheme), &oldcustomg, nil, ethash.NewFullFaker(), vm.Config{}, nil, nil) defer bc.Stop() - blocks, _ := GenerateChain(oldcustomg.Config, genesis, ethash.NewFaker(), db, 4, nil, true) + _, blocks, _ := GenerateChainWithGenesis(&oldcustomg, ethash.NewFaker(), 4, nil) bc.InsertChain(blocks, nil) bc.CurrentBlock() // This should return a compatibility error. - return SetupGenesisBlock(db, &customg, false) + return SetupGenesisBlock(db, tdb, &customg, false) }, wantHash: customghash, wantConfig: customg.Config, @@ -174,11 +187,12 @@ func TestGenesisHashes(t *testing.T) { {DefaultSepoliaGenesisBlock(), params.SepoliaGenesisHash}, } { // Test via MustCommit - if have := c.genesis.MustCommit(rawdb.NewMemoryDatabase()).Hash(); have != c.want { + db := rawdb.NewMemoryDatabase() + if have := c.genesis.MustCommit(db, trie.NewDatabase(db, trie.HashDefaults)).Hash(); have != c.want { t.Errorf("case: %d a), want: %s, got: %s", i, c.want.Hex(), have.Hex()) } // Test via ToBlock - if have := c.genesis.ToBlock(nil).Hash(); have != c.want { + if have := c.genesis.ToBlock().Hash(); have != c.want { t.Errorf("case: %d a), want: %s, got: %s", i, c.want.Hex(), have.Hex()) } } @@ -192,11 +206,7 @@ func TestGenesis_Commit(t *testing.T) { } db := rawdb.NewMemoryDatabase() - genesisBlock, err := genesis.Commit(db) - if err != nil { - t.Fatal(err) - } - + genesisBlock := genesis.MustCommit(db, trie.NewDatabase(db, trie.HashDefaults)) if genesis.Difficulty != nil { t.Fatalf("assumption wrong") } @@ -213,3 +223,41 @@ func TestGenesis_Commit(t *testing.T) { t.Errorf("inequal difficulty; stored: %v, genesisBlock: %v", stored, genesisBlock.Difficulty()) } } + +func TestReadWriteGenesisAlloc(t *testing.T) { + var ( + db = rawdb.NewMemoryDatabase() + alloc = &GenesisAlloc{ + {1}: {Balance: big.NewInt(1), Storage: map[common.Hash]common.Hash{{1}: {1}}}, + {2}: {Balance: big.NewInt(2), Storage: map[common.Hash]common.Hash{{2}: {2}}}, + } + hash, _ = alloc.deriveHash() + ) + blob, _ := json.Marshal(alloc) + rawdb.WriteGenesisStateSpec(db, hash, blob) + + var reload GenesisAlloc + err := reload.UnmarshalJSON(rawdb.ReadGenesisStateSpec(db, hash)) + if err != nil { + t.Fatalf("Failed to load genesis state %v", err) + } + if len(reload) != len(*alloc) { + t.Fatal("Unexpected genesis allocation") + } + for addr, account := range reload { + want, ok := (*alloc)[addr] + if !ok { + t.Fatal("Account is not found") + } + if !reflect.DeepEqual(want, account) { + t.Fatal("Unexpected account") + } + } +} + +func newDbConfig(scheme string) *trie.Config { + if scheme == rawdb.HashScheme { + return trie.HashDefaults + } + return &trie.Config{PathDB: pathdb.Defaults} +} diff --git a/core/headerchain_test.go b/core/headerchain_test.go index f3e40b6213..2163926fb5 100644 --- a/core/headerchain_test.go +++ b/core/headerchain_test.go @@ -29,6 +29,7 @@ import ( "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/params" + "github.com/ethereum/go-ethereum/trie" ) func verifyUnbrokenCanonchain(hc *HeaderChain) error { @@ -71,7 +72,8 @@ func testInsert(t *testing.T, hc *HeaderChain, chain []*types.Header, wantStatus func TestHeaderInsertion(t *testing.T) { var ( db = rawdb.NewMemoryDatabase() - genesis = (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db) + gspec = &Genesis{Config: params.TestChainConfig, BaseFee: big.NewInt(params.InitialBaseFee)} + genesis = gspec.MustCommit(db, trie.NewDatabase(db, nil)) ) hc, err := NewHeaderChain(db, params.AllEthashProtocolChanges, ethash.NewFaker(), func() bool { return false }) diff --git a/core/rawdb/accessors_chain.go b/core/rawdb/accessors_chain.go index 50877048ce..4f1994bba3 100644 --- a/core/rawdb/accessors_chain.go +++ b/core/rawdb/accessors_chain.go @@ -37,8 +37,8 @@ import ( // ReadCanonicalHash retrieves the hash assigned to a canonical block number. func ReadCanonicalHash(db ethdb.Reader, number uint64) common.Hash { var data []byte - db.ReadAncients(func(reader ethdb.AncientReader) error { - data, _ = reader.Ancient(freezerHashTable, number) + db.ReadAncients(func(reader ethdb.AncientReaderOp) error { + data, _ = reader.Ancient(chainFreezerHashTable, number) if len(data) == 0 { // Get it by hash from leveldb data, _ = db.Get(headerHashKey(number)) @@ -301,11 +301,11 @@ func WriteFastTxLookupLimit(db ethdb.KeyValueWriter, number uint64) { // ReadHeaderRLP retrieves a block header in its raw RLP database encoding. func ReadHeaderRLP(db ethdb.Reader, hash common.Hash, number uint64) rlp.RawValue { var data []byte - db.ReadAncients(func(reader ethdb.AncientReader) error { + db.ReadAncients(func(reader ethdb.AncientReaderOp) error { // First try to look up the data in ancient database. Extra hash // comparison is necessary since ancient database only maintains // the canonical data. - data, _ = reader.Ancient(freezerHeaderTable, number) + data, _ = reader.Ancient(chainFreezerHeaderTable, number) if len(data) > 0 && crypto.Keccak256Hash(data) == hash { return nil } @@ -380,8 +380,8 @@ func deleteHeaderWithoutNumber(db ethdb.KeyValueWriter, hash common.Hash, number // isCanon is an internal utility method, to check whether the given number/hash // is part of the ancient (canon) set. -func isCanon(reader ethdb.AncientReader, number uint64, hash common.Hash) bool { - h, err := reader.Ancient(freezerHashTable, number) +func isCanon(reader ethdb.AncientReaderOp, number uint64, hash common.Hash) bool { + h, err := reader.Ancient(chainFreezerHashTable, number) if err != nil { return false } @@ -394,10 +394,10 @@ func ReadBodyRLP(db ethdb.Reader, hash common.Hash, number uint64) rlp.RawValue // comparison is necessary since ancient database only maintains // the canonical data. var data []byte - db.ReadAncients(func(reader ethdb.AncientReader) error { + db.ReadAncients(func(reader ethdb.AncientReaderOp) error { // Check if the data is in ancients if isCanon(reader, number, hash) { - data, _ = reader.Ancient(freezerBodiesTable, number) + data, _ = reader.Ancient(chainFreezerBodiesTable, number) return nil } // If not, try reading from leveldb @@ -411,8 +411,8 @@ func ReadBodyRLP(db ethdb.Reader, hash common.Hash, number uint64) rlp.RawValue // block at number, in RLP encoding. func ReadCanonicalBodyRLP(db ethdb.Reader, number uint64) rlp.RawValue { var data []byte - db.ReadAncients(func(reader ethdb.AncientReader) error { - data, _ = reader.Ancient(freezerBodiesTable, number) + db.ReadAncients(func(reader ethdb.AncientReaderOp) error { + data, _ = reader.Ancient(chainFreezerBodiesTable, number) if len(data) > 0 { return nil } @@ -502,10 +502,10 @@ func WriteInternalTransactions(db ethdb.KeyValueWriter, hash common.Hash, intern // ReadTdRLP retrieves a block's total difficulty corresponding to the hash in RLP encoding. func ReadTdRLP(db ethdb.Reader, hash common.Hash, number uint64) rlp.RawValue { var data []byte - db.ReadAncients(func(reader ethdb.AncientReader) error { + db.ReadAncients(func(reader ethdb.AncientReaderOp) error { // Check if the data is in ancients if isCanon(reader, number, hash) { - data, _ = reader.Ancient(freezerDifficultyTable, number) + data, _ = reader.Ancient(chainFreezerDifficultyTable, number) return nil } // If not, try reading from leveldb @@ -562,10 +562,10 @@ func HasReceipts(db ethdb.Reader, hash common.Hash, number uint64) bool { // ReadReceiptsRLP retrieves all the transaction receipts belonging to a block in RLP encoding. func ReadReceiptsRLP(db ethdb.Reader, hash common.Hash, number uint64) rlp.RawValue { var data []byte - db.ReadAncients(func(reader ethdb.AncientReader) error { + db.ReadAncients(func(reader ethdb.AncientReaderOp) error { // Check if the data is in ancients if isCanon(reader, number, hash) { - data, _ = reader.Ancient(freezerReceiptTable, number) + data, _ = reader.Ancient(chainFreezerReceiptTable, number) return nil } // If not, try reading from leveldb @@ -802,19 +802,19 @@ func WriteAncientBlocks(db ethdb.AncientWriter, blocks []*types.Block, receipts func writeAncientBlock(op ethdb.AncientWriteOp, block *types.Block, header *types.Header, receipts []*types.ReceiptForStorage, td *big.Int) error { num := block.NumberU64() - if err := op.AppendRaw(freezerHashTable, num, block.Hash().Bytes()); err != nil { + if err := op.AppendRaw(chainFreezerHashTable, num, block.Hash().Bytes()); err != nil { return fmt.Errorf("can't add block %d hash: %v", num, err) } - if err := op.Append(freezerHeaderTable, num, header); err != nil { + if err := op.Append(chainFreezerHeaderTable, num, header); err != nil { return fmt.Errorf("can't append block header %d: %v", num, err) } - if err := op.Append(freezerBodiesTable, num, block.Body()); err != nil { + if err := op.Append(chainFreezerBodiesTable, num, block.Body()); err != nil { return fmt.Errorf("can't append block body %d: %v", num, err) } - if err := op.Append(freezerReceiptTable, num, receipts); err != nil { + if err := op.Append(chainFreezerReceiptTable, num, receipts); err != nil { return fmt.Errorf("can't append block %d receipts: %v", num, err) } - if err := op.Append(freezerDifficultyTable, num, td); err != nil { + if err := op.Append(chainFreezerDifficultyTable, num, td); err != nil { return fmt.Errorf("can't append block %d total difficulty: %v", num, err) } return nil diff --git a/core/rawdb/accessors_chain_test.go b/core/rawdb/accessors_chain_test.go index 7168fa6969..fa0a0b03b0 100644 --- a/core/rawdb/accessors_chain_test.go +++ b/core/rawdb/accessors_chain_test.go @@ -88,7 +88,7 @@ func TestBodyStorage(t *testing.T) { WriteBody(db, hash, 0, body) if entry := ReadBody(db, hash, 0); entry == nil { t.Fatalf("Stored body not found") - } else if types.DeriveSha(types.Transactions(entry.Transactions), newHasher()) != types.DeriveSha(types.Transactions(body.Transactions), newHasher()) || types.CalcUncleHash(entry.Uncles) != types.CalcUncleHash(body.Uncles) { + } else if types.DeriveSha(types.Transactions(entry.Transactions), newTestHasher()) != types.DeriveSha(types.Transactions(body.Transactions), newTestHasher()) || types.CalcUncleHash(entry.Uncles) != types.CalcUncleHash(body.Uncles) { t.Fatalf("Retrieved body mismatch: have %v, want %v", entry, body) } if entry := ReadBodyRLP(db, hash, 0); entry == nil { @@ -142,7 +142,7 @@ func TestBlockStorage(t *testing.T) { } if entry := ReadBody(db, block.Hash(), block.NumberU64()); entry == nil { t.Fatalf("Stored body not found") - } else if types.DeriveSha(types.Transactions(entry.Transactions), newHasher()) != types.DeriveSha(block.Transactions(), newHasher()) || types.CalcUncleHash(entry.Uncles) != types.CalcUncleHash(block.Uncles()) { + } else if types.DeriveSha(types.Transactions(entry.Transactions), newTestHasher()) != types.DeriveSha(block.Transactions(), newTestHasher()) || types.CalcUncleHash(entry.Uncles) != types.CalcUncleHash(block.Uncles()) { t.Fatalf("Retrieved body mismatch: have %v, want %v", entry, block.Body()) } // Delete the block and verify the execution diff --git a/core/rawdb/accessors_indexes_test.go b/core/rawdb/accessors_indexes_test.go index 4734e986e2..dcb474c180 100644 --- a/core/rawdb/accessors_indexes_test.go +++ b/core/rawdb/accessors_indexes_test.go @@ -37,7 +37,7 @@ type testHasher struct { hasher hash.Hash } -func newHasher() *testHasher { +func newTestHasher() *testHasher { return &testHasher{hasher: sha3.NewLegacyKeccak256()} } @@ -99,7 +99,7 @@ func TestLookupStorage(t *testing.T) { tx3 := types.NewTransaction(3, common.BytesToAddress([]byte{0x33}), big.NewInt(333), 3333, big.NewInt(33333), []byte{0x33, 0x33, 0x33}) txs := []*types.Transaction{tx1, tx2, tx3} - block := types.NewBlock(&types.Header{Number: big.NewInt(314)}, txs, nil, nil, newHasher()) + block := types.NewBlock(&types.Header{Number: big.NewInt(314)}, txs, nil, nil, newTestHasher()) // Check that no transactions entries are in a pristine database for i, tx := range txs { diff --git a/core/rawdb/accessors_metadata.go b/core/rawdb/accessors_metadata.go index cd85a0a8a9..b2dadea145 100644 --- a/core/rawdb/accessors_metadata.go +++ b/core/rawdb/accessors_metadata.go @@ -81,6 +81,20 @@ func WriteChainConfig(db ethdb.KeyValueWriter, hash common.Hash, cfg *params.Cha } } +// ReadGenesisStateSpec retrieves the genesis state specification based on the +// given genesis hash. +func ReadGenesisStateSpec(db ethdb.KeyValueReader, hash common.Hash) []byte { + data, _ := db.Get(genesisStateSpecKey(hash)) + return data +} + +// WriteGenesisStateSpec writes the genesis state specification into the disk. +func WriteGenesisStateSpec(db ethdb.KeyValueWriter, hash common.Hash, data []byte) { + if err := db.Put(genesisStateSpecKey(hash), data); err != nil { + log.Crit("Failed to store genesis state", "err", err) + } +} + // crashList is a list of unclean-shutdown-markers, for rlp-encoding to the // database type crashList struct { diff --git a/core/rawdb/accessors_state.go b/core/rawdb/accessors_state.go index eb35804f41..894db97e76 100644 --- a/core/rawdb/accessors_state.go +++ b/core/rawdb/accessors_state.go @@ -17,6 +17,8 @@ package rawdb import ( + "encoding/binary" + "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" @@ -68,22 +70,179 @@ func DeleteCode(db ethdb.KeyValueWriter, hash common.Hash) { } } -// ReadTrieNode retrieves the trie node of the provided hash. -func ReadTrieNode(db ethdb.KeyValueReader, hash common.Hash) []byte { - data, _ := db.Get(hash.Bytes()) +/* Function below support Path base state trie scheme */ + +// ReadStateId retrieves the state id with the provided state root. (Return pointer can detect that Statid is valid or not, nil is invalid) +func ReadStateID(db ethdb.KeyValueReader, root common.Hash) *uint64 { + data, err := db.Get(stateIDKey(root)) + if err != nil || len(data) == 0 { + return nil + } + number := binary.BigEndian.Uint64(data) + return &number +} + +// WriteStateID writes the provided state lookup to database. +func WriteStateID(db ethdb.KeyValueWriter, root common.Hash, id uint64) { + var buff [8]byte + // Convert from uint64 to 8 bytes BigEndian id -> buff + binary.BigEndian.PutUint64(buff[:], id) + // Store the state id for root + if err := db.Put(stateIDKey(root), buff[:]); err != nil { + log.Crit("Failed to store state id", "err", err) + } +} + +func DeleteStateID(db ethdb.KeyValueWriter, root common.Hash) { + if err := db.Delete(stateIDKey(root)); err != nil { + log.Crit("Failed to delete state id", "err", err) + } +} + +// ReadPersistentStateID retrievies the id of persistent state from the database. +func ReadPersistentStateID(db ethdb.KeyValueReader) uint64 { + data, _ := db.Get(persistentStateIDKey) + + if len(data) != 8 { // 8 bytes + return 0 + } + return binary.BigEndian.Uint64(data) +} + +// WritePersistentStateID writes the provided id of persistent state to the database. +func WritePersistentStateID(db ethdb.KeyValueWriter, number uint64) { + if err := db.Put(persistentStateIDKey, encodeBlockNumber(number)); err != nil { + log.Crit("Failed to store persistent state id", "err", err) + } +} + +// Read Trie Journal retrieves in-memory trie nodes of layers saved at +// the last shutdown. +func ReadTrieJournal(db ethdb.KeyValueReader) []byte { + data, _ := db.Get(trieJournalKey) return data } -// WriteTrieNode writes the provided trie node database. -func WriteTrieNode(db ethdb.KeyValueWriter, hash common.Hash, node []byte) { - if err := db.Put(hash.Bytes(), node); err != nil { - log.Crit("Failed to store trie node", "err", err) +// WriteTrieJournal stores the serialized in-memory trie nodes of layers to save at +// shutdown. +func WriteTrieJournal(db ethdb.KeyValueWriter, journal []byte) { + if err := db.Put(trieJournalKey, journal); err != nil { + log.Crit("Failed to store tries journal", "err", err) } } -// DeleteTrieNode deletes the specified trie node from the database. -func DeleteTrieNode(db ethdb.KeyValueWriter, hash common.Hash) { - if err := db.Delete(hash.Bytes()); err != nil { - log.Crit("Failed to delete trie node", "err", err) +// DeleteTrieJournal deletes the serialized in-memory trie nodes of layers saved at +// the last shutdown. +func DeleteTrieJournal(db ethdb.KeyValueWriter) { + if err := db.Delete(trieJournalKey); err != nil { + log.Crit("Failed to remove tries journal", "err", err) } } + +/* Ancients */ + +// ReadStateHistoryMeta retrieves the metadata corresponding to the specified +// state history. Compute the position of state history in freezer by minus +// one since the id of first state history starts from one(zero for initial +// state). +func ReadStateHistoryMeta(db ethdb.AncientReaderOp, id uint64) []byte { + blob, err := db.Ancient(stateHistoryMeta, id-1) + if err != nil { + return nil + } + return blob +} + +// ReadStateHistoryMetaList retrieves a batch of meta objects with the specified +// start position and count. Compute the position of state history in freezer by +// minus one since the id of first state history starts from one(zero for initial +// state). +func ReadStateHistoryMetaList(db ethdb.AncientReaderOp, start uint64, count uint64) ([][]byte, error) { + return db.AncientRange(stateHistoryMeta, start-1, count, 0) +} + +// ReadStateAccountIndex retrieves the state root corresponding to the specified +// state history. Compute the position of state history in freezer by minus one +// since the id of first state history starts from one(zero for initial state). +func ReadStateAccountIndex(db ethdb.AncientReaderOp, id uint64) []byte { + blob, err := db.Ancient(stateHistoryAccountIndex, id-1) + if err != nil { + return nil + } + return blob +} + +// ReadStateStorageIndex retrieves the state root corresponding to the specified +// state history. Compute the position of state history in freezer by minus one +// since the id of first state history starts from one(zero for initial state). +func ReadStateStorageIndex(db ethdb.AncientReaderOp, id uint64) []byte { + blob, err := db.Ancient(stateHistoryStorageIndex, id-1) + if err != nil { + return nil + } + return blob +} + +// ReadStateAccountHistory retrieves the state root corresponding to the specified +// state history. Compute the position of state history in freezer by minus one +// since the id of first state history starts from one(zero for initial state). +func ReadStateAccountHistory(db ethdb.AncientReaderOp, id uint64) []byte { + blob, err := db.Ancient(stateHistoryAccountData, id-1) + if err != nil { + return nil + } + return blob +} + +// ReadStateStorageHistory retrieves the state root corresponding to the specified +// state history. Compute the position of state history in freezer by minus one +// since the id of first state history starts from one(zero for initial state). +func ReadStateStorageHistory(db ethdb.AncientReaderOp, id uint64) []byte { + blob, err := db.Ancient(stateHistoryStorageData, id-1) + if err != nil { + return nil + } + return blob +} + +// ReadStateHistory retrieves the state history from database with provided id. +// Compute the position of state history in freezer by minus one since the id +// of first state history starts from one(zero for initial state). +// Returns meta, account and storage (index, data). +func ReadStateHistory(db ethdb.AncientReaderOp, id uint64) ([]byte, []byte, []byte, []byte, []byte, error) { + meta, err := db.Ancient(stateHistoryMeta, id-1) + if err != nil { + return nil, nil, nil, nil, nil, err + } + accountIndex, err := db.Ancient(stateHistoryAccountIndex, id-1) + if err != nil { + return nil, nil, nil, nil, nil, err + } + storageIndex, err := db.Ancient(stateHistoryStorageIndex, id-1) + if err != nil { + return nil, nil, nil, nil, nil, err + } + accountData, err := db.Ancient(stateHistoryAccountData, id-1) + if err != nil { + return nil, nil, nil, nil, nil, err + } + storageData, err := db.Ancient(stateHistoryStorageData, id-1) + if err != nil { + return nil, nil, nil, nil, nil, err + } + return meta, accountIndex, storageIndex, accountData, storageData, nil +} + +// WriteStateHistory writes the provided state history to database. Compute the +// position of state history in freezer by minus one since the id of first state +// history starts from one(zero for initial state). +func WriteStateHistory(db ethdb.AncientWriter, id uint64, meta []byte, accountIndex []byte, storageIndex []byte, accounts []byte, storages []byte) { + db.ModifyAncients(func(op ethdb.AncientWriteOp) error { + op.AppendRaw(stateHistoryMeta, id-1, meta) + op.AppendRaw(stateHistoryAccountIndex, id-1, accountIndex) + op.AppendRaw(stateHistoryStorageIndex, id-1, storageIndex) + op.AppendRaw(stateHistoryAccountData, id-1, accounts) + op.AppendRaw(stateHistoryStorageData, id-1, storages) + return nil + }) +} diff --git a/core/rawdb/accessors_sync.go b/core/rawdb/accessors_sync.go new file mode 100644 index 0000000000..54e5967d81 --- /dev/null +++ b/core/rawdb/accessors_sync.go @@ -0,0 +1,44 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package rawdb + +import ( + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" +) + +const ( + StateSyncUnknown = uint8(0) // flags the state snap sync is unknown + StateSyncRunning = uint8(1) // flags the state snap sync is not completed yet + StateSyncFinished = uint8(2) // flags the state snap sync is completed +) + +// ReadSnapSyncStatusFlag retrieves the state snap sync status flag. +func ReadSnapSyncStatusFlag(db ethdb.KeyValueReader) uint8 { + blob, err := db.Get(snapSyncStatusFlagKey) + if err != nil || len(blob) != 1 { + return StateSyncUnknown + } + return blob[0] +} + +// WriteSnapSyncStatusFlag stores the state snap sync status flag into database. +func WriteSnapSyncStatusFlag(db ethdb.KeyValueWriter, flag uint8) { + if err := db.Put(snapSyncStatusFlagKey, []byte{flag}); err != nil { + log.Crit("Failed to store sync status flag", "err", err) + } +} diff --git a/core/rawdb/accessors_trie.go b/core/rawdb/accessors_trie.go new file mode 100644 index 0000000000..869991a388 --- /dev/null +++ b/core/rawdb/accessors_trie.go @@ -0,0 +1,314 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see + +package rawdb + +import ( + "fmt" + "sync" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "golang.org/x/crypto/sha3" +) + +// HashScheme is the legacy hash-based state scheme with which trie nodes are +// stored in the disk with node hash as the database key. The advantage of this +// scheme is that different versions of trie nodes can be stored in disk, which +// is very beneficial for constructing archive nodes. The drawback is it will +// store different trie nodes on the same path to different locations on the disk +// with no data locality, and it's unfriendly for designing state pruning. +// +// Now this scheme is still kept for backward compatibility, and it will be used +// for archive node and some other tries(e.g. light trie). +const HashScheme = "hash" + +// PathScheme is the new path-based state scheme with which trie nodes are stored +// in the disk with node path as the database key. This scheme will only store one +// version of state data in the disk, which means that the state pruning operation +// is native. At the same time, this scheme will put adjacent trie nodes in the same +// area of the disk with good data locality property. But this scheme needs to rely +// on extra state diffs to survive deep reorg. +const PathScheme = "path" + +// hasher is used to compute the sha256 hash of the provided data. +type hasher struct{ sha crypto.KeccakState } + +var hasherPool = sync.Pool{ + New: func() interface{} { return &hasher{sha: sha3.NewLegacyKeccak256().(crypto.KeccakState)} }, +} + +func newHasher() *hasher { + return hasherPool.Get().(*hasher) +} + +func (h *hasher) hash(data []byte) common.Hash { + return crypto.HashData(h.sha, data) +} +func (h *hasher) release() { + hasherPool.Put(h) +} + +// ReadAccountTrieNode retrieves the account trie node and the associated node +// hash with the specified node path. If it's empty, return empty hash. +func ReadAccountTrieNode(db ethdb.KeyValueReader, path []byte) ([]byte, common.Hash) { + data, err := db.Get(accountTrieNodeKey(path)) + if err != nil { + return nil, common.Hash{} + } + h := newHasher() + defer h.release() + return data, h.hash(data) +} + +// HasAccountTrieNode checks the account trie node presence with the specified +// node path and the associated node hash. +func HasAccountTrieNode(db ethdb.KeyValueReader, path []byte, hash common.Hash) bool { + data, err := db.Get(accountTrieNodeKey(path)) + if err != nil { + return false + } + h := newHasher() + defer h.release() + return h.hash(data) == hash +} + +// ExistsAccountTrieNode checks the presence of the account trie node with the +// specified node path, regardless of the node hash. +func ExistsAccountTrieNode(db ethdb.KeyValueReader, path []byte) bool { + has, err := db.Has(accountTrieNodeKey(path)) + if err != nil { + return false + } + return has +} + +// WriteAccountTrieNode writes the provided account trie node into database. +func WriteAccountTrieNode(db ethdb.KeyValueWriter, path []byte, node []byte) { + if err := db.Put(accountTrieNodeKey(path), node); err != nil { + log.Crit("Failed to store account trie node", "err", err) + } +} + +// DeleteAccountTrieNode deletes the specified account trie node from the database. +func DeleteAccountTrieNode(db ethdb.KeyValueWriter, path []byte) { + if err := db.Delete(accountTrieNodeKey(path)); err != nil { + log.Crit("Failed to delete account trie node", "err", err) + } +} + +// ReadStorageTrieNode retrieves the storage trie node and the associated node +// hash with the specified node path. +func ReadStorageTrieNode(db ethdb.KeyValueReader, accountHash common.Hash, path []byte) ([]byte, common.Hash) { + data, err := db.Get(storageTrieNodeKey(accountHash, path)) + if err != nil { + return nil, common.Hash{} + } + h := newHasher() + defer h.release() + return data, h.hash(data) +} + +// HasStorageTrieNode checks the storage trie node presence with the provided +// node path and the associated node hash. +func HasStorageTrieNode(db ethdb.KeyValueReader, accountHash common.Hash, path []byte, hash common.Hash) bool { + data, err := db.Get(storageTrieNodeKey(accountHash, path)) + if err != nil { + return false + } + h := newHasher() + defer h.release() + return h.hash(data) == hash +} + +// ExistsStorageTrieNode checks the presence of the storage trie node with the +// specified account hash and node path, regardless of the node hash. +func ExistsStorageTrieNode(db ethdb.KeyValueReader, accountHash common.Hash, path []byte) bool { + has, err := db.Has(storageTrieNodeKey(accountHash, path)) + if err != nil { + return false + } + return has +} + +// WriteStorageTrieNode writes the provided storage trie node into database. +func WriteStorageTrieNode(db ethdb.KeyValueWriter, accountHash common.Hash, path []byte, node []byte) { + if err := db.Put(storageTrieNodeKey(accountHash, path), node); err != nil { + log.Crit("Failed to store storage trie node", "err", err) + } +} + +// DeleteStorageTrieNode deletes the specified storage trie node from the database. +func DeleteStorageTrieNode(db ethdb.KeyValueWriter, accountHash common.Hash, path []byte) { + if err := db.Delete(storageTrieNodeKey(accountHash, path)); err != nil { + log.Crit("Failed to delete storage trie node", "err", err) + } +} + +// ReadLegacyTrieNode retrieves the legacy trie node with the given +// associated node hash. +func ReadLegacyTrieNode(db ethdb.KeyValueReader, hash common.Hash) []byte { + data, err := db.Get(hash.Bytes()) + if err != nil { + return nil + } + return data +} + +// HasLegacyTrieNode checks if the trie node with the provided hash is present in db. +func HasLegacyTrieNode(db ethdb.KeyValueReader, hash common.Hash) bool { + ok, _ := db.Has(hash.Bytes()) + return ok +} + +// WriteLegacyTrieNode writes the provided legacy trie node to database. +func WriteLegacyTrieNode(db ethdb.KeyValueWriter, hash common.Hash, node []byte) { + if err := db.Put(hash.Bytes(), node); err != nil { + log.Crit("Failed to store legacy trie node", "err", err) + } +} + +// DeleteLegacyTrieNode deletes the specified legacy trie node from database. +func DeleteLegacyTrieNode(db ethdb.KeyValueWriter, hash common.Hash) { + if err := db.Delete(hash.Bytes()); err != nil { + log.Crit("Failed to delete legacy trie node", "err", err) + } +} + +// HasTrieNode checks the trie node presence with the provided node info and +// the associated node hash. +func HasTrieNode(db ethdb.KeyValueReader, owner common.Hash, path []byte, hash common.Hash, scheme string) bool { + switch scheme { + case HashScheme: + return HasLegacyTrieNode(db, hash) + case PathScheme: + if owner == (common.Hash{}) { + return HasAccountTrieNode(db, path, hash) + } + return HasStorageTrieNode(db, owner, path, hash) + default: + panic(fmt.Sprintf("Unknown scheme %v", scheme)) + } +} + +// ReadTrieNode retrieves the trie node from database with the provided node info +// and associated node hash. +// hashScheme-based lookup requires the following: +// - hash +// +// pathScheme-based lookup requires the following: +// - owner +// - path +func ReadTrieNode(db ethdb.KeyValueReader, owner common.Hash, path []byte, hash common.Hash, scheme string) []byte { + switch scheme { + case HashScheme: + return ReadLegacyTrieNode(db, hash) + case PathScheme: + var ( + blob []byte + nHash common.Hash + ) + if owner == (common.Hash{}) { + blob, nHash = ReadAccountTrieNode(db, path) + } else { + blob, nHash = ReadStorageTrieNode(db, owner, path) + } + if len(blob) == 0 { + return nil + } + if nHash != hash { + return nil // Exists but not match + } + return blob + default: + panic(fmt.Sprintf("Unknown scheme %v", scheme)) + } +} + +// WriteTrieNode writes the trie node into database with the provided node info +// and associated node hash. +// hashScheme-based lookup requires the following: +// - hash +// +// pathScheme-based lookup requires the following: +// - owner +// - path +func WriteTrieNode(db ethdb.KeyValueWriter, owner common.Hash, path []byte, hash common.Hash, node []byte, scheme string) { + switch scheme { + case HashScheme: + WriteLegacyTrieNode(db, hash, node) + case PathScheme: + if owner == (common.Hash{}) { + WriteAccountTrieNode(db, path, node) + } else { + WriteStorageTrieNode(db, owner, path, node) + } + default: + panic(fmt.Sprintf("Unknown scheme %v", scheme)) + } +} + +// DeleteTrieNode deletes the trie node from database with the provided node info +// and associated node hash. +// hashScheme-based lookup requires the following: +// - hash +// +// pathScheme-based lookup requires the following: +// - owner +// - path +func DeleteTrieNode(db ethdb.KeyValueWriter, owner common.Hash, path []byte, hash common.Hash, scheme string) { + switch scheme { + case HashScheme: + DeleteLegacyTrieNode(db, hash) + case PathScheme: + if owner == (common.Hash{}) { + DeleteAccountTrieNode(db, path) + } else { + DeleteStorageTrieNode(db, owner, path) + } + default: + panic(fmt.Sprintf("Unknown scheme %v", scheme)) + } +} + +// ReadStateScheme reads the state scheme of persistent state, or none +// if the state is not present in database. +func ReadStateScheme(db ethdb.Reader) string { + // Check if state in path-based scheme is present + blob, _ := ReadAccountTrieNode(db, nil) + if len(blob) != 0 { + return PathScheme + } + // The root node might be deleted during the initial snap sync, check + // the persistent state id then. + if id := ReadPersistentStateID(db); id != 0 { + return PathScheme + } + // In a hash-based scheme, the genesis state is consistently stored + // on the disk. To assess the scheme of the persistent state, it + // suffices to inspect the scheme of the genesis state. + header := ReadHeader(db, ReadCanonicalHash(db, 0), 0) + if header == nil { + return "" // empty datadir + } + blob = ReadLegacyTrieNode(db, header.Root) + if len(blob) == 0 { + return "" // no state in disk + } + return HashScheme +} diff --git a/core/rawdb/ancient_scheme.go b/core/rawdb/ancient_scheme.go new file mode 100644 index 0000000000..5a173f4915 --- /dev/null +++ b/core/rawdb/ancient_scheme.go @@ -0,0 +1,88 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package rawdb + +import "path/filepath" + +// The list of table names of chain freezer. (headers, hashes, bodies, difficulties) + +const ( + // chainFreezerHeaderTable indicates the name of the freezer header table. + chainFreezerHeaderTable = "headers" + + // chainFreezerHashTable indicates the name of the freezer canonical hash table. + chainFreezerHashTable = "hashes" + + // chainFreezerBodiesTable indicates the name of the freezer block body table. + chainFreezerBodiesTable = "bodies" + + // chainFreezerReceiptTable indicates the name of the freezer receipts table. + chainFreezerReceiptTable = "receipts" + + // chainFreezerDifficultyTable indicates the name of the freezer total difficulty table. + chainFreezerDifficultyTable = "diffs" +) + +const ( + // stateHistoryTableSize defines the maximum size of freezer data files. + stateHistoryTableSize = 2 * 1000 * 1000 * 1000 // 2GB + + // stateHistoryAccountIndex indicates the name of the freezer state history table (Account + Storage). + stateHistoryMeta = "history.meta" + stateHistoryAccountIndex = "account.index" + stateHistoryStorageIndex = "storage.index" + stateHistoryAccountData = "account.data" + stateHistoryStorageData = "storage.data" + + namespace = "eth/db/state" +) + +// stateFreezerNoSnappy configures whether compression is disabled for the stateHistory. +// https://github.com/golang/snappy, Reason for splititng files for looking up in archive mode easily. +var stateFreezerNoSnappy = map[string]bool{ + stateHistoryMeta: true, + stateHistoryAccountIndex: false, + stateHistoryStorageIndex: false, + stateHistoryAccountData: false, + stateHistoryStorageData: false, +} + +// chainFreezerNoSnappy configures whether compression is disabled for the ancient-tables. +// Hashes and difficulties don't compress well. +var chainFreezerNoSnappy = map[string]bool{ + chainFreezerHeaderTable: false, + chainFreezerHashTable: true, + chainFreezerBodiesTable: false, + chainFreezerReceiptTable: false, + chainFreezerDifficultyTable: true, +} + +// The list of identifiers of ancient stores. It can split more in the futures. +var ( + ChainFreezerName = "chain" // the folder name of chain segment ancient store. + StateFreezerName = "state" // the folder name of reverse diff ancient store. +) + +// freezers the collections of all builtin freezers. +var freezers = []string{ChainFreezerName, StateFreezerName} + +// NewStateFreezer initializes the freezer for state history. +func NewStateFreezer(ancientDir string, readOnly bool) (*ResettableFreezer, error) { + return NewResettableFreezer( + filepath.Join(ancientDir, StateFreezerName), namespace, readOnly, + stateHistoryTableSize, stateFreezerNoSnappy) +} diff --git a/core/rawdb/ancient_utils.go b/core/rawdb/ancient_utils.go new file mode 100644 index 0000000000..f0c0f234f1 --- /dev/null +++ b/core/rawdb/ancient_utils.go @@ -0,0 +1,154 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package rawdb + +import ( + "fmt" + "path/filepath" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" +) + +type tableSize struct { + name string + size common.StorageSize +} + +// freezerInfo contains the basic information of the freezer. +type freezerInfo struct { + name string // The identifier of freezer + head uint64 // The number of last stored item in the freezer + tail uint64 // The number of first stored item in the freezer + sizes []tableSize // The storage size per table +} + +// count returns the number of stored items in the freezer. +func (info *freezerInfo) count() uint64 { + return info.head - info.tail + 1 +} + +// size returns the storage size of the entire freezer. +func (info *freezerInfo) size() common.StorageSize { + var total common.StorageSize + for _, table := range info.sizes { + total += table.size + } + return total +} + +func inspect(name string, order map[string]bool, reader ethdb.AncientReader) (freezerInfo, error) { + info := freezerInfo{name: name} + for t := range order { + size, err := reader.AncientSize(t) + if err != nil { + return freezerInfo{}, err + } + info.sizes = append(info.sizes, tableSize{name: t, size: common.StorageSize(size)}) + } + // Retrieve the number of last stored item + ancients, err := reader.Ancients() + if err != nil { + return freezerInfo{}, err + } + info.head = ancients - 1 + + // Retrieve the number of first stored item + tail, err := reader.Tail() + if err != nil { + return freezerInfo{}, err + } + info.tail = tail + return info, nil +} + +// inspectFreezers inspects all freezers registered in the system. +func inspectFreezers(db ethdb.Database) ([]freezerInfo, error) { + var infos []freezerInfo + for _, freezer := range freezers { + switch freezer { + case ChainFreezerName: + info, err := inspect(ChainFreezerName, chainFreezerNoSnappy, db) + if err != nil { + return nil, err + } + infos = append(infos, info) + + case StateFreezerName: + if ReadStateScheme(db) != PathScheme { + log.Info("Skip inspecting state freezer", "reason", "state freezer is supported for PathScheme only") + continue + } + datadir, err := db.AncientDatadir() + if err != nil { + return nil, err + } + f, err := NewStateFreezer(datadir, true) + if err != nil { + return nil, err + } + defer f.Close() + + info, err := inspect(StateFreezerName, stateFreezerNoSnappy, f) + if err != nil { + return nil, err + } + infos = append(infos, info) + + default: + return nil, fmt.Errorf("unknown freezer, supported ones: %v", freezers) + } + } + return infos, nil +} + +// InspectFreezerTable dumps out the index of a specific freezer table. The passed +// ancient indicates the path of root ancient directory where the chain freezer can +// be opened. Start and end specifiy the range for dumping out indexes. +// Note this function can only used for debugging purpose. +func InspectFreezerTable(ancient string, freezerName string, tableName string, start, end int64) error { + var ( + path string + tables map[string]bool + ) + + switch freezerName { + case ChainFreezerName: + path, tables = resolveChainFreezerDir(ancient), chainFreezerNoSnappy + case StateFreezerName: + path, tables = filepath.Join(ancient, freezerName), stateFreezerNoSnappy + default: + return fmt.Errorf("unknown freezer, supported ones: %v", freezers) + } + noSnappy, exit := tables[tableName] + if !exit { + // If the tableName is not exit in the tables, return an error. + var names []string + for name := range tables { + names = append(names, name) + } + return fmt.Errorf("unknown table name, supported ones: %v", names) + } + table, err := newFreezerTable(path, tableName, noSnappy) + if err != nil { + return err + } + + table.dumpIndexStdout(start, end) + return nil +} diff --git a/core/rawdb/chain_freezer.go b/core/rawdb/chain_freezer.go new file mode 100644 index 0000000000..d6e3ac5646 --- /dev/null +++ b/core/rawdb/chain_freezer.go @@ -0,0 +1,295 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package rawdb + +import ( + "fmt" + "sync" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" +) + +const ( + // freezerRecheckInterval is the frequency to check the key-value database for + // chain progression that might permit new blocks to be frozen into immutable + // storage. + freezerRecheckInterval = time.Minute + + // freezerBatchLimit is the maximum number of blocks to freeze in one batch + // before doing an fsync and deleting it from the key-value store. + freezerBatchLimit = 30000 +) + +type chainFreezer struct { + *Freezer + quit chan struct{} + wg sync.WaitGroup + trigger chan chan struct{} // Manual blocking frezzer trigger, test determinism +} + +// chainFreezer is a wrapper of freezer with additional chain freezing feature. +// The background thread will keep moving ancient chain segments from key-value +// database to flat files for saving space on live database. +// newChainFreezer initializes the freezer for ancient chain data. +func newChainFreezer(datadir string, namespace string, readonly bool, maxTableSize uint32, tables map[string]bool) (*chainFreezer, error) { + freezer, err := NewFreezer(datadir, namespace, readonly, maxTableSize, tables) + if err != nil { + return nil, err + } + + return &chainFreezer{ + Freezer: freezer, + quit: make(chan struct{}), + trigger: make(chan chan struct{}), + }, nil +} + +// Close func closes the chain freezer instance and terminates the background thread. +func (f *chainFreezer) Close() error { + err := f.Freezer.Close() + select { + case <-f.quit: + default: + close(f.quit) + } + f.wg.Wait() + return err +} + +// freeze is a background thread that periodically checks the blockchain for any +// import progress and moves ancient data from the fast database into the freezer. +// +// This functionality is deliberately broken off from block importing to avoid +// incurring additional data shuffling delays on block propagation. +func (f *chainFreezer) freeze(db ethdb.KeyValueStore) { + nfdb := &nofreezedb{KeyValueStore: db} + + var ( + backoff bool + triggered chan struct{} // Used in tests + ) + for { + select { + case <-f.quit: + log.Info("Freezer shutting down") + return + default: + } + if backoff { + // If we were doing a manual trigger, notify it + if triggered != nil { + triggered <- struct{}{} + triggered = nil + } + select { + case <-time.NewTimer(freezerRecheckInterval).C: + backoff = false + case triggered = <-f.trigger: + backoff = false + case <-f.quit: + return + } + } + // Retrieve the freezing threshold. + hash := ReadHeadBlockHash(nfdb) + if hash == (common.Hash{}) { + log.Debug("Current full block hash unavailable") // new chain, empty database + backoff = true + continue + } + number := ReadHeaderNumber(nfdb, hash) + threshold := f.threshold.Load() + + switch { + case number == nil: + log.Error("Current full block number unavailable", "hash", hash) + backoff = true + continue + + case *number < threshold: + log.Debug("Current full block not old enough", "number", *number, "hash", hash, "delay", threshold) + backoff = true + continue + + case *number-threshold <= f.frozen.Load(): + log.Debug("Ancient blocks frozen already", "number", *number, "hash", hash, "frozen", f.frozen.Load()) + backoff = true + continue + } + head := ReadHeader(nfdb, hash, *number) + if head == nil { + log.Error("Current full block unavailable", "number", *number, "hash", hash) + backoff = true + continue + } + + // Seems we have data ready to be frozen, process in usable batches + var ( + start = time.Now() + first, _ = f.Ancients() + limit = *number - threshold + ) + if limit-first > freezerBatchLimit { + limit = first + freezerBatchLimit + } + ancients, err := f.freezeRange(nfdb, first, limit) + if err != nil { + log.Error("Error in block freeze operation", "err", err) + backoff = true + continue + } + + // Batch of blocks have been frozen, flush them before wiping from leveldb + if err := f.Sync(); err != nil { + log.Crit("Failed to flush frozen tables", "err", err) + } + + // Wipe out all data from the active database + batch := db.NewBatch() + for i := 0; i < len(ancients); i++ { + // Always keep the genesis block in active database + if first+uint64(i) != 0 { + DeleteBlockWithoutNumber(batch, ancients[i], first+uint64(i)) + DeleteCanonicalHash(batch, first+uint64(i)) + } + } + if err := batch.Write(); err != nil { + log.Crit("Failed to delete frozen canonical blocks", "err", err) + } + batch.Reset() + + // Wipe out side chains also and track dangling side chains + var dangling []common.Hash + for number := first; number < f.frozen.Load(); number++ { + // Always keep the genesis block in active database + if number != 0 { + dangling = ReadAllHashes(db, number) + for _, hash := range dangling { + log.Trace("Deleting side chain", "number", number, "hash", hash) + DeleteBlock(batch, hash, number) + } + } + } + if err := batch.Write(); err != nil { + log.Crit("Failed to delete frozen side blocks", "err", err) + } + batch.Reset() + + // Step into the future and delete and dangling side chains + tip := f.frozen.Load() + if tip > 0 { + for len(dangling) > 0 { + drop := make(map[common.Hash]struct{}) + for _, hash := range dangling { + log.Debug("Dangling parent from freezer", "number", tip-1, "hash", hash) + drop[hash] = struct{}{} + } + children := ReadAllHashes(db, tip) + for i := 0; i < len(children); i++ { + // Dig up the child and ensure it's dangling + child := ReadHeader(nfdb, children[i], tip) + if child == nil { + log.Error("Missing dangling header", "number", tip, "hash", children[i]) + continue + } + if _, ok := drop[child.ParentHash]; !ok { + children = append(children[:i], children[i+1:]...) + i-- + continue + } + // Delete all block data associated with the child + log.Debug("Deleting dangling block", "number", tip, "hash", children[i], "parent", child.ParentHash) + DeleteBlock(batch, children[i], tip) + } + dangling = children + tip++ + } + if err := batch.Write(); err != nil { + log.Crit("Failed to delete dangling side blocks", "err", err) + } + } + + // Log something friendly for the user + context := []interface{}{ + "blocks", f.frozen.Load() - first, "elapsed", common.PrettyDuration(time.Since(start)), "number", f.frozen.Load() - 1, + } + if n := len(ancients); n > 0 { + context = append(context, []interface{}{"hash", ancients[n-1]}...) + } + log.Info("Deep froze chain segment", context...) + + // Avoid database thrashing with tiny writes + if f.frozen.Load()-first < freezerBatchLimit { + backoff = true + } + } +} + +func (f *chainFreezer) freezeRange(nfdb *nofreezedb, number, limit uint64) (hashes []common.Hash, err error) { + hashes = make([]common.Hash, 0, limit-number) + + _, err = f.ModifyAncients(func(op ethdb.AncientWriteOp) error { + for ; number <= limit; number++ { + // Retrieve all the components of the canonical block. + hash := ReadCanonicalHash(nfdb, number) + if hash == (common.Hash{}) { + return fmt.Errorf("canonical hash missing, can't freeze block %d", number) + } + header := ReadHeaderRLP(nfdb, hash, number) + if len(header) == 0 { + return fmt.Errorf("block header missing, can't freeze block %d", number) + } + body := ReadBodyRLP(nfdb, hash, number) + if len(body) == 0 { + return fmt.Errorf("block body missing, can't freeze block %d", number) + } + receipts := ReadReceiptsRLP(nfdb, hash, number) + if len(receipts) == 0 { + return fmt.Errorf("block receipts missing, can't freeze block %d", number) + } + td := ReadTdRLP(nfdb, hash, number) + if len(td) == 0 { + return fmt.Errorf("total difficulty missing, can't freeze block %d", number) + } + + // Write to the batch. + if err := op.AppendRaw(chainFreezerHashTable, number, hash[:]); err != nil { + return fmt.Errorf("can't write hash to freezer: %v", err) + } + if err := op.AppendRaw(chainFreezerHeaderTable, number, header); err != nil { + return fmt.Errorf("can't write header to freezer: %v", err) + } + if err := op.AppendRaw(chainFreezerBodiesTable, number, body); err != nil { + return fmt.Errorf("can't write body to freezer: %v", err) + } + if err := op.AppendRaw(chainFreezerReceiptTable, number, receipts); err != nil { + return fmt.Errorf("can't write receipts to freezer: %v", err) + } + if err := op.AppendRaw(chainFreezerDifficultyTable, number, td); err != nil { + return fmt.Errorf("can't write td to freezer: %v", err) + } + + hashes = append(hashes, hash) + } + return nil + }) + + return hashes, err +} diff --git a/core/rawdb/chain_iterator.go b/core/rawdb/chain_iterator.go index 74ac5787b7..2cb57a04e4 100644 --- a/core/rawdb/chain_iterator.go +++ b/core/rawdb/chain_iterator.go @@ -50,7 +50,7 @@ func InitDatabaseFromFreezer(db ethdb.Database) { if i+count > frozen { count = frozen - i } - data, err := db.AncientRange(freezerHashTable, i, count, 32*count) + data, err := db.AncientRange(chainFreezerHashTable, i, count, 32*count) if err != nil { log.Crit("Failed to init database from freezer", "err", err) } diff --git a/core/rawdb/chain_iterator_test.go b/core/rawdb/chain_iterator_test.go index 45cc6323e0..b6180f8252 100644 --- a/core/rawdb/chain_iterator_test.go +++ b/core/rawdb/chain_iterator_test.go @@ -34,7 +34,7 @@ func TestChainIterator(t *testing.T) { var block *types.Block var txs []*types.Transaction to := common.BytesToAddress([]byte{0x11}) - block = types.NewBlock(&types.Header{Number: big.NewInt(int64(0))}, nil, nil, nil, newHasher()) // Empty genesis block + block = types.NewBlock(&types.Header{Number: big.NewInt(int64(0))}, nil, nil, nil, newTestHasher()) // Empty genesis block WriteBlock(chainDb, block) WriteCanonicalHash(chainDb, block.Hash(), block.NumberU64()) for i := uint64(1); i <= 10; i++ { @@ -60,7 +60,7 @@ func TestChainIterator(t *testing.T) { }) } txs = append(txs, tx) - block = types.NewBlock(&types.Header{Number: big.NewInt(int64(i))}, []*types.Transaction{tx}, nil, nil, newHasher()) + block = types.NewBlock(&types.Header{Number: big.NewInt(int64(i))}, []*types.Transaction{tx}, nil, nil, newTestHasher()) WriteBlock(chainDb, block) WriteCanonicalHash(chainDb, block.Hash(), block.NumberU64()) } @@ -111,7 +111,7 @@ func TestIndexTransactions(t *testing.T) { to := common.BytesToAddress([]byte{0x11}) // Write empty genesis block - block = types.NewBlock(&types.Header{Number: big.NewInt(int64(0))}, nil, nil, nil, newHasher()) + block = types.NewBlock(&types.Header{Number: big.NewInt(int64(0))}, nil, nil, nil, newTestHasher()) WriteBlock(chainDb, block) WriteCanonicalHash(chainDb, block.Hash(), block.NumberU64()) @@ -138,7 +138,7 @@ func TestIndexTransactions(t *testing.T) { }) } txs = append(txs, tx) - block = types.NewBlock(&types.Header{Number: big.NewInt(int64(i))}, []*types.Transaction{tx}, nil, nil, newHasher()) + block = types.NewBlock(&types.Header{Number: big.NewInt(int64(i))}, []*types.Transaction{tx}, nil, nil, newTestHasher()) WriteBlock(chainDb, block) WriteCanonicalHash(chainDb, block.Hash(), block.NumberU64()) } diff --git a/core/rawdb/database.go b/core/rawdb/database.go index ab2b3d99ee..6bf5366ea2 100644 --- a/core/rawdb/database.go +++ b/core/rawdb/database.go @@ -21,7 +21,9 @@ import ( "errors" "fmt" "os" + "path" "path/filepath" + "strings" "time" "github.com/ethereum/go-ethereum/common" @@ -37,6 +39,12 @@ import ( type freezerdb struct { ethdb.KeyValueStore ethdb.AncientStore + ancientRoot string +} + +// AncientDatadir returns the path of root ancient directory. +func (frdb *freezerdb) AncientDatadir() (string, error) { + return frdb.ancientRoot, nil } // Close implements io.Closer, closing both the fast key-value store as well as @@ -59,18 +67,18 @@ func (frdb *freezerdb) Close() error { // a freeze cycle completes, without having to sleep for a minute to trigger the // automatic background run. func (frdb *freezerdb) Freeze(threshold uint64) error { - if frdb.AncientStore.(*freezer).readonly { + if frdb.AncientStore.(*chainFreezer).readonly { return errReadOnly } // Set the freezer threshold to a temporary value defer func(old uint64) { - frdb.AncientStore.(*freezer).threshold.Store(old) - }(frdb.AncientStore.(*freezer).threshold.Load()) - frdb.AncientStore.(*freezer).threshold.Store(threshold) + frdb.AncientStore.(*chainFreezer).threshold.Store(old) + }(frdb.AncientStore.(*chainFreezer).threshold.Load()) + frdb.AncientStore.(*chainFreezer).threshold.Store(threshold) // Trigger a freeze cycle and block until it's done trigger := make(chan struct{}, 1) - frdb.AncientStore.(*freezer).trigger <- trigger + frdb.AncientStore.(*chainFreezer).trigger <- trigger <-trigger return nil } @@ -80,6 +88,11 @@ type nofreezedb struct { ethdb.KeyValueStore } +// Tail returns an error as we don't have a backing chain freezer. +func (db *nofreezedb) Tail() (uint64, error) { + return 0, errNotSupported +} + // HasAncient returns an error as we don't have a backing chain freezer. func (db *nofreezedb) HasAncient(kind string, number uint64) (bool, error) { return false, errNotSupported @@ -110,17 +123,22 @@ func (db *nofreezedb) ModifyAncients(func(ethdb.AncientWriteOp) error) (int64, e return 0, errNotSupported } -// TruncateAncients returns an error as we don't have a backing chain freezer. -func (db *nofreezedb) TruncateAncients(items uint64) error { - return errNotSupported -} - // Sync returns an error as we don't have a backing chain freezer. func (db *nofreezedb) Sync() error { return errNotSupported } -func (db *nofreezedb) ReadAncients(fn func(reader ethdb.AncientReader) error) (err error) { +// TruncateHead returns an error as we don't have a backing chain freezer. +func (db *nofreezedb) TruncateHead(items uint64) (uint64, error) { + return 0, errNotSupported +} + +// TruncateTail returns an error as we don't have a backing chain freezer. +func (db *nofreezedb) TruncateTail(items uint64) (uint64, error) { + return 0, errNotSupported +} + +func (db *nofreezedb) ReadAncients(fn func(reader ethdb.AncientReaderOp) error) (err error) { // Unlike other ancient-related methods, this method does not return // errNotSupported when invoked. // The reason for this is that the caller might want to do several things: @@ -136,18 +154,47 @@ func (db *nofreezedb) ReadAncients(fn func(reader ethdb.AncientReader) error) (e return fn(db) } +// AncientDatadir returns an error as we don't have a backing chain freezer. +func (db *nofreezedb) AncientDatadir() (string, error) { + return "", errNotSupported +} + // NewDatabase creates a high level database on top of a given key-value data // store without a freezer moving immutable chain segments into cold storage. func NewDatabase(db ethdb.KeyValueStore) ethdb.Database { return &nofreezedb{KeyValueStore: db} } +// resolveChainFreezerDir is a helper function which resolves the absolute path +// of chain freezer by considering backward compatibility. +func resolveChainFreezerDir(ancient string) string { + // Check if the chain freezer is already present in the specified + // sub folder, if not then two possiblities + // - chain freezer is not initialized + // - it's legacy location, chain freezer is present in the root ancient folder + + freezer := path.Join(ancient, ChainFreezerName) + if !common.FileExist(freezer) { + if !common.FileExist(ancient) { + // The entire ancient store is not initialized, still use the sub + // folder for initialization. + } else { + // Ancient root is already initialized, then we hold the assumption + // that chain freezer is also initialized and located in root folder. + // In this case fallback to legacy location. + freezer = ancient + log.Info("Found legacy ancient chain path", "location", ancient) + } + } + return freezer +} + // NewDatabaseWithFreezer creates a high level database on top of a given key- // value data store with a freezer moving immutable chain segments into cold // storage. -func NewDatabaseWithFreezer(db ethdb.KeyValueStore, freezer string, namespace string, readonly bool) (ethdb.Database, error) { +func NewDatabaseWithFreezer(db ethdb.KeyValueStore, ancient string, namespace string, readonly bool) (ethdb.Database, error) { // Create the idle freezer instance - frdb, err := newFreezer(freezer, namespace, readonly, freezerTableSize, FreezerNoSnappy) + frdb, err := newChainFreezer(resolveChainFreezerDir(ancient), namespace, readonly, freezerTableSize, chainFreezerNoSnappy) if err != nil { return nil, err } @@ -178,7 +225,7 @@ func NewDatabaseWithFreezer(db ethdb.KeyValueStore, freezer string, namespace st // If the freezer already contains something, ensure that the genesis blocks // match, otherwise we might mix up freezers across chains and destroy both // the freezer and the key-value store. - frgenesis, err := frdb.Ancient(freezerHashTable, 0) + frgenesis, err := frdb.Ancient(chainFreezerHashTable, 0) if err != nil { return nil, fmt.Errorf("failed to retrieve genesis from ancient %v", err) } else if !bytes.Equal(kvgenesis, frgenesis) { @@ -224,6 +271,7 @@ func NewDatabaseWithFreezer(db ethdb.KeyValueStore, freezer string, namespace st }() } return &freezerdb{ + ancientRoot: ancient, // o.AncientsDirectory KeyValueStore: db, AncientStore: frdb, }, nil @@ -398,7 +446,10 @@ func InspectDatabase(db ethdb.Database, keyPrefix, keyStart []byte) error { tds stat numHashPairings stat hashNumPairings stat - tries stat + legacyTries stat + stateLookups stat + accountTries stat + storageTries stat codes stat txLookups stat accountSnaps stat @@ -408,13 +459,6 @@ func InspectDatabase(db ethdb.Database, keyPrefix, keyStart []byte) error { cliqueSnaps stat consortiumSnaps stat - // Ancient store statistics - ancientHeadersSize common.StorageSize - ancientBodiesSize common.StorageSize - ancientReceiptsSize common.StorageSize - ancientTdsSize common.StorageSize - ancientHashesSize common.StorageSize - // Les statistic chtTrieNodes stat bloomTrieNodes stat @@ -446,8 +490,14 @@ func InspectDatabase(db ethdb.Database, keyPrefix, keyStart []byte) error { numHashPairings.Add(size) case bytes.HasPrefix(key, headerNumberPrefix) && len(key) == (len(headerNumberPrefix)+common.HashLength): hashNumPairings.Add(size) - case len(key) == common.HashLength: - tries.Add(size) + case IsLegacyTrieNode(key, it.Value()): + legacyTries.Add(size) + case bytes.HasPrefix(key, stateIDPrefix) && len(key) == len(stateIDPrefix)+common.HashLength: + stateLookups.Add(size) + case IsAccountTrieNode(key): + accountTries.Add(size) + case IsStorageTrieNode(key): + storageTries.Add(size) case bytes.HasPrefix(key, CodePrefix) && len(key) == len(CodePrefix)+common.HashLength: codes.Add(size) case bytes.HasPrefix(key, txLookupPrefix) && len(key) == (len(txLookupPrefix)+common.HashLength): @@ -460,6 +510,8 @@ func InspectDatabase(db ethdb.Database, keyPrefix, keyStart []byte) error { preimages.Add(size) case bytes.HasPrefix(key, configPrefix) && len(key) == (len(configPrefix)+common.HashLength): metadata.Add(size) + case bytes.HasPrefix(key, genesisPrefix) && len(key) == (len(genesisPrefix)+common.HashLength): + metadata.Add(size) case bytes.HasPrefix(key, bloomBitsPrefix) && len(key) == (len(bloomBitsPrefix)+10+common.HashLength): bloomBits.Add(size) case bytes.HasPrefix(key, BloomBitsIndexPrefix): @@ -483,7 +535,7 @@ func InspectDatabase(db ethdb.Database, keyPrefix, keyStart []byte) error { fastTrieProgressKey, snapshotDisabledKey, SnapshotRootKey, snapshotJournalKey, snapshotGeneratorKey, snapshotRecoveryKey, txIndexTailKey, fastTxLookupLimitKey, uncleanShutdownKey, badBlockKey, highestFinalityVoteKey, storeInternalTxsEnabledKey, - snapshotSyncStatusKey, + snapshotSyncStatusKey, persistentStateIDKey, trieJournalKey, snapshotSyncStatusKey, snapSyncStatusFlagKey, } { if bytes.Equal(key, meta) { metadata.Add(size) @@ -501,20 +553,8 @@ func InspectDatabase(db ethdb.Database, keyPrefix, keyStart []byte) error { logged = time.Now() } } - // Inspect append-only file store then. - ancientSizes := []*common.StorageSize{&ancientHeadersSize, &ancientBodiesSize, &ancientReceiptsSize, &ancientHashesSize, &ancientTdsSize} - for i, category := range []string{freezerHeaderTable, freezerBodiesTable, freezerReceiptTable, freezerHashTable, freezerDifficultyTable} { - if size, err := db.AncientSize(category); err == nil { - *ancientSizes[i] += common.StorageSize(size) - total += common.StorageSize(size) - } - } - // Get number of ancient rows inside the freezer - ancients := counter(0) - if count, err := db.Ancients(); err == nil { - ancients = counter(count) - } - // Display the database statistic. + + // Display the database statistic of key-value store. stats := [][]string{ {"Key-Value store", "Headers", headers.Size(), headers.Count()}, {"Key-Value store", "Bodies", bodies.Size(), bodies.Count()}, @@ -525,21 +565,35 @@ func InspectDatabase(db ethdb.Database, keyPrefix, keyStart []byte) error { {"Key-Value store", "Transaction index", txLookups.Size(), txLookups.Count()}, {"Key-Value store", "Bloombit index", bloomBits.Size(), bloomBits.Count()}, {"Key-Value store", "Contract codes", codes.Size(), codes.Count()}, - {"Key-Value store", "Trie nodes", tries.Size(), tries.Count()}, + {"Key-Value store", "Hash trie nodes", legacyTries.Size(), legacyTries.Count()}, + {"Key-Value store", "Path trie state lookups", stateLookups.Size(), stateLookups.Count()}, + {"Key-Value store", "Path trie account nodes", accountTries.Size(), accountTries.Count()}, + {"Key-Value store", "Path trie storage nodes", storageTries.Size(), storageTries.Count()}, {"Key-Value store", "Trie preimages", preimages.Size(), preimages.Count()}, {"Key-Value store", "Account snapshot", accountSnaps.Size(), accountSnaps.Count()}, {"Key-Value store", "Storage snapshot", storageSnaps.Size(), storageSnaps.Count()}, {"Key-Value store", "Clique snapshots", cliqueSnaps.Size(), cliqueSnaps.Count()}, {"Key-Value store", "Consortium snapshots", consortiumSnaps.Size(), consortiumSnaps.Count()}, {"Key-Value store", "Singleton metadata", metadata.Size(), metadata.Count()}, - {"Ancient store", "Headers", ancientHeadersSize.String(), ancients.String()}, - {"Ancient store", "Bodies", ancientBodiesSize.String(), ancients.String()}, - {"Ancient store", "Receipt lists", ancientReceiptsSize.String(), ancients.String()}, - {"Ancient store", "Difficulties", ancientTdsSize.String(), ancients.String()}, - {"Ancient store", "Block number->hash", ancientHashesSize.String(), ancients.String()}, {"Light client", "CHT trie nodes", chtTrieNodes.Size(), chtTrieNodes.Count()}, {"Light client", "Bloom trie nodes", bloomTrieNodes.Size(), bloomTrieNodes.Count()}, } + // Inspect all registered append-only file store then. + ancients, err := inspectFreezers(db) + if err != nil { + return err + } + for _, ancient := range ancients { + for _, table := range ancient.sizes { + stats = append(stats, []string{ + fmt.Sprintf("Ancient store (%s)", strings.Title(ancient.name)), + strings.Title(table.name), + table.size.String(), + fmt.Sprintf("%d", ancient.count()), + }) + } + total += ancient.size() + } table := tablewriter.NewWriter(os.Stdout) table.SetHeader([]string{"Database", "Category", "Size", "Items"}) table.SetFooter([]string{"", "Total", total.String(), " "}) diff --git a/core/rawdb/freezer.go b/core/rawdb/freezer.go index a089a01c67..df768eb693 100644 --- a/core/rawdb/freezer.go +++ b/core/rawdb/freezer.go @@ -24,9 +24,7 @@ import ( "path/filepath" "sync" "sync/atomic" - "time" - "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/metrics" @@ -53,28 +51,21 @@ var ( ) const ( - // freezerRecheckInterval is the frequency to check the key-value database for - // chain progression that might permit new blocks to be frozen into immutable - // storage. - freezerRecheckInterval = time.Minute - // freezerBatchLimit is the maximum number of blocks to freeze in one batch - // before doing an fsync and deleting it from the key-value store. - freezerBatchLimit = 30000 - - // freezerTableSize defines the maximum size of freezer data files. + // freezerTableSize defines the maximum size of freezer data files, max size of per file is 2GB. freezerTableSize = 2 * 1000 * 1000 * 1000 ) -// freezer is an memory mapped append-only database to store immutable chain data +// freezer is a memory mapped append-only database to store immutable chain data // into flat files: // -// - The append only nature ensures that disk writes are minimized. -// - The memory mapping ensures we can max out system memory for caching without -// reserving it for go-ethereum. This would also reduce the memory requirements -// of Geth, and thus also GC overhead. -type freezer struct { +// - The append only nature ensures that disk writes are minimized. +// - The memory mapping ensures we can max out system memory for caching without +// reserving it for go-ethereum. This would also reduce the memory requirements +// of Geth, and thus also GC overhead. +type Freezer struct { frozen atomic.Uint64 // Number of items already frozen + tail atomic.Uint64 // Number of the first stored item in the freezer threshold atomic.Uint64 // Number of recent blocks not to freeze (params.FullImmutabilityThreshold apart from tests) // This lock synchronizes writers and the truncate operation, as well as @@ -98,7 +89,7 @@ type freezer struct { // // The 'tables' argument defines the data tables. If the value of a map // entry is true, snappy compression is disabled for the table. -func newFreezer(datadir string, namespace string, readonly bool, maxTableSize uint32, tables map[string]bool) (*freezer, error) { +func NewFreezer(datadir string, namespace string, readonly bool, maxTableSize uint32, tables map[string]bool) (*Freezer, error) { // Create the initial freezer object var ( readMeter = metrics.NewRegisteredMeter(namespace+"ancient/read", nil) @@ -119,13 +110,15 @@ func newFreezer(datadir string, namespace string, readonly bool, maxTableSize ui return nil, err } // Open all the supported data tables - freezer := &freezer{ + freezer := &Freezer{ readonly: readonly, tables: make(map[string]*freezerTable), instanceLock: lock, trigger: make(chan chan struct{}), quit: make(chan struct{}), } + // The number of blocks after which a chain segment is + // considered immutable (i.e. soft finality) freezer.threshold.Store(params.FullImmutabilityThreshold) // Create the tables. @@ -141,7 +134,7 @@ func newFreezer(datadir string, namespace string, readonly bool, maxTableSize ui freezer.tables[name] = table } - // Truncate all tables to common length. + // Truncate all tables to common length, then close if err := freezer.repair(); err != nil { for _, table := range freezer.tables { table.Close() @@ -158,7 +151,7 @@ func newFreezer(datadir string, namespace string, readonly bool, maxTableSize ui } // Close terminates the chain freezer, unmapping all the data files. -func (f *freezer) Close() error { +func (f *Freezer) Close() error { f.writeLock.Lock() defer f.writeLock.Unlock() @@ -184,7 +177,7 @@ func (f *freezer) Close() error { // HasAncient returns an indicator whether the specified ancient data exists // in the freezer. -func (f *freezer) HasAncient(kind string, number uint64) (bool, error) { +func (f *Freezer) HasAncient(kind string, number uint64) (bool, error) { if table := f.tables[kind]; table != nil { return table.has(number), nil } @@ -192,7 +185,7 @@ func (f *freezer) HasAncient(kind string, number uint64) (bool, error) { } // Ancient retrieves an ancient binary blob from the append-only immutable files. -func (f *freezer) Ancient(kind string, number uint64) ([]byte, error) { +func (f *Freezer) Ancient(kind string, number uint64) ([]byte, error) { if table := f.tables[kind]; table != nil { return table.Retrieve(number) } @@ -201,10 +194,12 @@ func (f *freezer) Ancient(kind string, number uint64) ([]byte, error) { // AncientRange retrieves multiple items in sequence, starting from the index 'start'. // It will return -// - at most 'max' items, -// - at least 1 item (even if exceeding the maxByteSize), but will otherwise -// return as many items as fit into maxByteSize. -func (f *freezer) AncientRange(kind string, start, count, maxBytes uint64) ([][]byte, error) { +// - at most 'count' items, +// - if maxBytes is specified: at least 1 item (even if exceeding the maxByteSize), +// but will otherwise return as many items as fit into maxByteSize. +// - if maxBytes is not specified, 'count' items will be returned if they are present.Retru +// - if maxBytes is not specified, 'count' items will be returned if they are present.Retru +func (f *Freezer) AncientRange(kind string, start, count, maxBytes uint64) ([][]byte, error) { if table := f.tables[kind]; table != nil { return table.RetrieveItems(start, count, maxBytes) } @@ -212,12 +207,12 @@ func (f *freezer) AncientRange(kind string, start, count, maxBytes uint64) ([][] } // Ancients returns the length of the frozen items. -func (f *freezer) Ancients() (uint64, error) { +func (f *Freezer) Ancients() (uint64, error) { return f.frozen.Load(), nil } // AncientSize returns the ancient size of the specified category. -func (f *freezer) AncientSize(kind string) (uint64, error) { +func (f *Freezer) AncientSize(kind string) (uint64, error) { // This needs the write lock to avoid data races on table fields. // Speed doesn't matter here, AncientSize is for debugging. f.writeLock.RLock() @@ -229,16 +224,21 @@ func (f *freezer) AncientSize(kind string) (uint64, error) { return 0, errUnknownTable } +// Tail returns the number of first stored item in the freezer. +func (f *Freezer) Tail() (uint64, error) { + return f.tail.Load(), nil +} + // ReadAncients runs the given read operation while ensuring that no writes take place // on the underlying freezer. -func (f *freezer) ReadAncients(fn func(ethdb.AncientReader) error) (err error) { +func (f *Freezer) ReadAncients(fn func(ethdb.AncientReaderOp) error) (err error) { f.writeLock.RLock() defer f.writeLock.RUnlock() return fn(f) } // ModifyAncients runs the given write operation. -func (f *freezer) ModifyAncients(fn func(ethdb.AncientWriteOp) error) (writeSize int64, err error) { +func (f *Freezer) ModifyAncients(fn func(ethdb.AncientWriteOp) error) (writeSize int64, err error) { if f.readonly { return 0, errReadOnly } @@ -251,7 +251,7 @@ func (f *freezer) ModifyAncients(fn func(ethdb.AncientWriteOp) error) (writeSize if err != nil { // The write operation has failed. Go back to the previous item position. for name, table := range f.tables { - err := table.truncate(prevItem) + err := table.truncateHead(prevItem) if err != nil { log.Error("Freezer table roll-back failed", "table", name, "index", prevItem, "err", err) } @@ -271,28 +271,55 @@ func (f *freezer) ModifyAncients(fn func(ethdb.AncientWriteOp) error) (writeSize return writeSize, nil } -// TruncateAncients discards any recent data above the provided threshold number. -func (f *freezer) TruncateAncients(items uint64) error { +// TruncateHead discards any recent data above the provided threshold number, only keep the first items ancient data. +// Return the old head number. +func (f *Freezer) TruncateHead(items uint64) (uint64, error) { if f.readonly { - return errReadOnly + return 0, errReadOnly } f.writeLock.Lock() defer f.writeLock.Unlock() - if f.frozen.Load() <= items { - return nil + // If the current frozen number is less than the requested items for frozen, do nothing. + previousItems := f.frozen.Load() + if previousItems <= items { + return previousItems, nil } for _, table := range f.tables { - if err := table.truncate(items); err != nil { - return err + if err := table.truncateHead(items); err != nil { + return 0, err } } f.frozen.Store(items) - return nil + return previousItems, nil +} + +// TruncateTail discards any recent data below the provided threshold number, only keep the last items ancient data, return the old tail number. +func (f *Freezer) TruncateTail(tail uint64) (uint64, error) { + if f.readonly { + return 0, errReadOnly + } + f.writeLock.Lock() + defer f.writeLock.Unlock() + + // If the current tail number is greater than the requested tail, seem out of range for truncating, do nothing. + old := f.tail.Load() + + if old >= tail { + return old, nil + } + + for _, table := range f.tables { + if err := table.truncateTail(tail); err != nil { + return 0, err + } + } + f.tail.Store(tail) + return old, nil } // Sync flushes all data tables to disk. -func (f *freezer) Sync() error { +func (f *Freezer) Sync() error { var errs []error for _, table := range f.tables { if err := table.Sync(); err != nil { @@ -306,240 +333,36 @@ func (f *freezer) Sync() error { } // repair truncates all data tables to the same length. -func (f *freezer) repair() error { - min := uint64(math.MaxUint64) - for _, table := range f.tables { - items := table.items.Load() - if min > items { - min = items - } - } - for _, table := range f.tables { - if err := table.truncate(min); err != nil { - return err - } - } - f.frozen.Store(min) - return nil -} - -// freeze is a background thread that periodically checks the blockchain for any -// import progress and moves ancient data from the fast database into the freezer. -// -// This functionality is deliberately broken off from block importing to avoid -// incurring additional data shuffling delays on block propagation. -func (f *freezer) freeze(db ethdb.KeyValueStore) { - nfdb := &nofreezedb{KeyValueStore: db} - +func (f *Freezer) repair() error { var ( - backoff bool - triggered chan struct{} // Used in tests + head = uint64(math.MaxUint64) + tail = uint64(0) ) - for { - select { - case <-f.quit: - log.Info("Freezer shutting down") - return - default: - } - if backoff { - // If we were doing a manual trigger, notify it - if triggered != nil { - triggered <- struct{}{} - triggered = nil - } - select { - case <-time.NewTimer(freezerRecheckInterval).C: - backoff = false - case triggered = <-f.trigger: - backoff = false - case <-f.quit: - return - } - } - // Retrieve the freezing threshold. - hash := ReadHeadBlockHash(nfdb) - if hash == (common.Hash{}) { - log.Debug("Current full block hash unavailable") // new chain, empty database - backoff = true - continue - } - number := ReadHeaderNumber(nfdb, hash) - threshold := f.threshold.Load() - - switch { - case number == nil: - log.Error("Current full block number unavailable", "hash", hash) - backoff = true - continue - - case *number < threshold: - log.Debug("Current full block not old enough", "number", *number, "hash", hash, "delay", threshold) - backoff = true - continue - - case *number-threshold <= f.frozen.Load(): - log.Debug("Ancient blocks frozen already", "number", *number, "hash", hash, "frozen", f.frozen.Load()) - backoff = true - continue - } - head := ReadHeader(nfdb, hash, *number) - if head == nil { - log.Error("Current full block unavailable", "number", *number, "hash", hash) - backoff = true - continue - } - - // Seems we have data ready to be frozen, process in usable batches - var ( - start = time.Now() - first, _ = f.Ancients() - limit = *number - threshold - ) - if limit-first > freezerBatchLimit { - limit = first + freezerBatchLimit - } - ancients, err := f.freezeRange(nfdb, first, limit) - if err != nil { - log.Error("Error in block freeze operation", "err", err) - backoff = true - continue - } - - // Batch of blocks have been frozen, flush them before wiping from leveldb - if err := f.Sync(); err != nil { - log.Crit("Failed to flush frozen tables", "err", err) - } + // Looping through all tables to find the most common head and tail between tables + for _, table := range f.tables { + items := table.items.Load() - // Wipe out all data from the active database - batch := db.NewBatch() - for i := 0; i < len(ancients); i++ { - // Always keep the genesis block in active database - if first+uint64(i) != 0 { - DeleteBlockWithoutNumber(batch, ancients[i], first+uint64(i)) - DeleteCanonicalHash(batch, first+uint64(i)) - } - } - if err := batch.Write(); err != nil { - log.Crit("Failed to delete frozen canonical blocks", "err", err) + if head > items { + head = items } - batch.Reset() - - // Wipe out side chains also and track dangling side chains - var dangling []common.Hash - for number := first; number < f.frozen.Load(); number++ { - // Always keep the genesis block in active database - if number != 0 { - dangling = ReadAllHashes(db, number) - for _, hash := range dangling { - log.Trace("Deleting side chain", "number", number, "hash", hash) - DeleteBlock(batch, hash, number) - } - } - } - if err := batch.Write(); err != nil { - log.Crit("Failed to delete frozen side blocks", "err", err) - } - batch.Reset() - - // Step into the future and delete and dangling side chains - tip := f.frozen.Load() - if tip > 0 { - for len(dangling) > 0 { - drop := make(map[common.Hash]struct{}) - for _, hash := range dangling { - log.Debug("Dangling parent from freezer", "number", tip-1, "hash", hash) - drop[hash] = struct{}{} - } - children := ReadAllHashes(db, tip) - for i := 0; i < len(children); i++ { - // Dig up the child and ensure it's dangling - child := ReadHeader(nfdb, children[i], tip) - if child == nil { - log.Error("Missing dangling header", "number", tip, "hash", children[i]) - continue - } - if _, ok := drop[child.ParentHash]; !ok { - children = append(children[:i], children[i+1:]...) - i-- - continue - } - // Delete all block data associated with the child - log.Debug("Deleting dangling block", "number", tip, "hash", children[i], "parent", child.ParentHash) - DeleteBlock(batch, children[i], tip) - } - dangling = children - tip++ - } - if err := batch.Write(); err != nil { - log.Crit("Failed to delete dangling side blocks", "err", err) - } + hidden := table.itemHidden.Load() + if hidden > tail { + tail = hidden } + } - // Log something friendly for the user - context := []interface{}{ - "blocks", f.frozen.Load() - first, "elapsed", common.PrettyDuration(time.Since(start)), "number", f.frozen.Load() - 1, - } - if n := len(ancients); n > 0 { - context = append(context, []interface{}{"hash", ancients[n-1]}...) + // Truncate all tables to the common head and tail. Returns the previous head number. + for _, table := range f.tables { + if err := table.truncateHead(head); err != nil { + return err } - log.Info("Deep froze chain segment", context...) - // Avoid database thrashing with tiny writes - if f.frozen.Load()-first < freezerBatchLimit { - backoff = true + if err := table.truncateTail(tail); err != nil { + return err } } -} - -func (f *freezer) freezeRange(nfdb *nofreezedb, number, limit uint64) (hashes []common.Hash, err error) { - hashes = make([]common.Hash, 0, limit-number) - - _, err = f.ModifyAncients(func(op ethdb.AncientWriteOp) error { - for ; number <= limit; number++ { - // Retrieve all the components of the canonical block. - hash := ReadCanonicalHash(nfdb, number) - if hash == (common.Hash{}) { - return fmt.Errorf("canonical hash missing, can't freeze block %d", number) - } - header := ReadHeaderRLP(nfdb, hash, number) - if len(header) == 0 { - return fmt.Errorf("block header missing, can't freeze block %d", number) - } - body := ReadBodyRLP(nfdb, hash, number) - if len(body) == 0 { - return fmt.Errorf("block body missing, can't freeze block %d", number) - } - receipts := ReadReceiptsRLP(nfdb, hash, number) - if len(receipts) == 0 { - return fmt.Errorf("block receipts missing, can't freeze block %d", number) - } - td := ReadTdRLP(nfdb, hash, number) - if len(td) == 0 { - return fmt.Errorf("total difficulty missing, can't freeze block %d", number) - } - - // Write to the batch. - if err := op.AppendRaw(freezerHashTable, number, hash[:]); err != nil { - return fmt.Errorf("can't write hash to freezer: %v", err) - } - if err := op.AppendRaw(freezerHeaderTable, number, header); err != nil { - return fmt.Errorf("can't write header to freezer: %v", err) - } - if err := op.AppendRaw(freezerBodiesTable, number, body); err != nil { - return fmt.Errorf("can't write body to freezer: %v", err) - } - if err := op.AppendRaw(freezerReceiptTable, number, receipts); err != nil { - return fmt.Errorf("can't write receipts to freezer: %v", err) - } - if err := op.AppendRaw(freezerDifficultyTable, number, td); err != nil { - return fmt.Errorf("can't write td to freezer: %v", err) - } - - hashes = append(hashes, hash) - } - return nil - }) - - return hashes, err + // Update frozen and tail counters. + f.frozen.Store(head) + f.tail.Store(tail) + return nil } diff --git a/core/rawdb/freezer_batch.go b/core/rawdb/freezer_batch.go index 476079c5a1..aa4a439e1d 100644 --- a/core/rawdb/freezer_batch.go +++ b/core/rawdb/freezer_batch.go @@ -33,7 +33,7 @@ type freezerBatch struct { tables map[string]*freezerTableBatch } -func newFreezerBatch(f *freezer) *freezerBatch { +func newFreezerBatch(f *Freezer) *freezerBatch { batch := &freezerBatch{tables: make(map[string]*freezerTableBatch, len(f.tables))} for kind, table := range f.tables { batch.tables[kind] = table.newBatch() @@ -165,6 +165,7 @@ func (batch *freezerTableBatch) appendItem(data []byte) error { batch.totalBytes += itemSize // Put index entry to buffer. + // The index file contains a list of index entries. entry := indexEntry{filenum: batch.t.headId, offset: uint32(itemOffset + itemSize)} batch.indexBuffer = entry.append(batch.indexBuffer) batch.curItem++ @@ -182,19 +183,27 @@ func (batch *freezerTableBatch) maybeCommit() error { // commit writes the batched items to the backing freezerTable. func (batch *freezerTableBatch) commit() error { - // Write data. + // Write data. The head file is fsync'd after write to ensure the + // data is truly transferred to disk. _, err := batch.t.head.Write(batch.dataBuffer) if err != nil { return err } + if err := batch.t.head.Sync(); err != nil { + return err + } dataSize := int64(len(batch.dataBuffer)) batch.dataBuffer = batch.dataBuffer[:0] - // Write index. + // Write indices. The index file is fsync'd after write to ensure the + // data indexes are truly transferred to disk. _, err = batch.t.index.Write(batch.indexBuffer) if err != nil { return err } + if err := batch.t.index.Sync(); err != nil { + return err + } indexSize := int64(len(batch.indexBuffer)) batch.indexBuffer = batch.indexBuffer[:0] diff --git a/core/rawdb/freezer_meta.go b/core/rawdb/freezer_meta.go new file mode 100644 index 0000000000..3eed366a7b --- /dev/null +++ b/core/rawdb/freezer_meta.go @@ -0,0 +1,112 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see + +package rawdb + +import ( + "io" + "os" + + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" +) + +const freezerVersion = 1 // The initial version tag of freezer table metadata + +// freezerTableMeta wraps all the metadata of the freezer table. +type freezerTableMeta struct { + // Version is the versioning descriptor of the freezer table. + Version uint16 + + // VirtualTail indicates how many items have been marked as deleted. + // Its value is equal to the number of items removed from the table + // plus the number of items hidden in the table, so it should never + // be lower than the "actual tail". + VirtualTail uint64 +} + +// newMetadata initializes the metadata object with the given virtual tail. +func newMetadata(tail uint64) *freezerTableMeta { + return &freezerTableMeta{ + Version: freezerVersion, + VirtualTail: tail, + } +} + +// readMetadata reads the metadata of the freezer table from the +// given metadata file. +func readMetadata(file *os.File) (*freezerTableMeta, error) { + _, err := file.Seek(0, io.SeekStart) // SeekStart means the origin of the file + if err != nil { + return nil, err + } + var meta freezerTableMeta + if err := rlp.Decode(file, &meta); err != nil { + return nil, err + } + return &meta, nil +} + +// writeMetadata writes the metadata of the freezer table into the +// given metadata file. +func writeMetadata(file *os.File, meta *freezerTableMeta) error { + _, err := file.Seek(0, io.SeekStart) + if err != nil { + return err + } + return rlp.Encode(file, meta) +} + +// loadMetadata loads the metadata from the given metadata file. +// Initializes the metadata file with the given "actual tail" if +// it's empty. +func loadMetadata(file *os.File, tail uint64) (*freezerTableMeta, error) { + stat, err := file.Stat() + if err != nil { + return nil, err + } + + // Write the metadata with the given actual tail into metadata file + // if it's non-existent. There are two possible scenarios here: + // - the freezer table is empty + // - the freezer table is legacy + // In both cases, write the meta into the file with the actual tail + // as the virtual tail. + if stat.Size() == 0 { // The file is empty + m := newMetadata(tail) + if err := writeMetadata(file, m); err != nil { + return nil, err + } + return m, nil + } + + // If the file is not empty, read the metadata from the file. + m, err := readMetadata(file) + if err != nil { + return nil, err + } + // Update the virtual tail with the given actual tail if it's even + // lower than it. Theoretically it shouldn't happen at all, print + // a warning here. + if m.VirtualTail < tail { + log.Warn("Updated virtual tail", "have", m.VirtualTail, "now", tail) + m.VirtualTail = tail + if err := writeMetadata(file, m); err != nil { + return nil, err + } + } + return m, nil +} diff --git a/core/rawdb/freezer_meta_test.go b/core/rawdb/freezer_meta_test.go new file mode 100644 index 0000000000..191744a754 --- /dev/null +++ b/core/rawdb/freezer_meta_test.go @@ -0,0 +1,61 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see + +package rawdb + +import ( + "io/ioutil" + "os" + "testing" +) + +func TestReadWriteFreezerTableMeta(t *testing.T) { + f, err := ioutil.TempFile(os.TempDir(), "*") + if err != nil { + t.Fatalf("Failed to create file %v", err) + } + err = writeMetadata(f, newMetadata(100)) + if err != nil { + t.Fatalf("Failed to write metadata %v", err) + } + meta, err := readMetadata(f) + if err != nil { + t.Fatalf("Failed to read metadata %v", err) + } + if meta.Version != freezerVersion { + t.Fatalf("Unexpected version field") + } + if meta.VirtualTail != uint64(100) { + t.Fatalf("Unexpected virtual tail field") + } +} + +func TestInitializeFreezerTableMeta(t *testing.T) { + f, err := ioutil.TempFile(os.TempDir(), "*") + if err != nil { + t.Fatalf("Failed to create file %v", err) + } + meta, err := loadMetadata(f, uint64(100)) + if err != nil { + t.Fatalf("Failed to read metadata %v", err) + } + if meta.Version != freezerVersion { + t.Fatalf("Unexpected version field") + } + if meta.VirtualTail != uint64(100) { + t.Fatalf("Unexpected virtual tail field") + } +} diff --git a/core/rawdb/freezer_resettable.go b/core/rawdb/freezer_resettable.go new file mode 100644 index 0000000000..184e908187 --- /dev/null +++ b/core/rawdb/freezer_resettable.go @@ -0,0 +1,240 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package rawdb + +import ( + "os" + "path/filepath" + "sync" + + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" +) + +// In PBSS, this freezer is used to reverse diff +// The idea for implementing this package is to provide a freezer which supported resettable in case we need to rollback to the genesis +// Normally, TruncateTail is irreversible. This implementing will depend on "os.Rename" & "os.RemoveAll" to delete and recreate a new one from scratch. + +const tmpSuffix = ".tmp" + +// freezerOpenFunc is the function used to open/create a freezer. +type freezerOpenFunc = func() (*Freezer, error) + +// ResettableFreezer is a wrapper of the freezer which makes the +// freezer resettable. +type ResettableFreezer struct { + freezer *Freezer + opener freezerOpenFunc + datadir string + lock sync.RWMutex +} + +// NewResettableFreezer creates a resettable freezer, note freezer is +// only resettable if the passed file directory is exclusively occupied +// by the freezer. And also the user-configurable ancient root directory +// is **not** supported for reset since it might be a mount and rename +// will cause a copy of hundreds of gigabyte into local directory. It +// needs some other file based solutions. +// +// The reset function will delete directory atomically and re-create the +// freezer from scratch. +// namespace is the prefix for metrics which is not stored in freezer +func NewResettableFreezer(datadir string, namespace string, readonly bool, maxTableSize uint32, tables map[string]bool) (*ResettableFreezer, error) { + // Clean up if we figureout .tmp inside data directory + if err := cleanup(datadir); err != nil { + return nil, err + } + opener := func() (*Freezer, error) { + return NewFreezer(datadir, namespace, readonly, maxTableSize, tables) + } + freezer, err := opener() + if err != nil { + return nil, err + } + return &ResettableFreezer{ + freezer: freezer, + opener: opener, + datadir: datadir, + }, nil +} + +// Reset deletes the file directory exclusively occupied by the freezer and +// recreate the freezer from scratch. The atomicity of directory deletion +// is guaranteed by the rename operation, +func (f *ResettableFreezer) Reset() error { + f.lock.Lock() + defer f.lock.Unlock() + + // Close the freezer before deleting the directory + if err := f.freezer.Close(); err != nil { + return err + } + + tmp := tmpName(f.datadir) + if err := os.Rename(f.datadir, tmp); err != nil { + return err + } + + // the leftover directory will be cleaned up in next startup in case crash happens after rename. See in cleanup function. + if err := os.RemoveAll(tmp); err != nil { + return err + } + freezer, err := f.opener() + if err != nil { + return err + } + f.freezer = freezer + return nil +} + +// Close terminates the chain freezer, unmapping all the data files. +func (f *ResettableFreezer) Close() error { + f.lock.RLock() + defer f.lock.RUnlock() + + return f.freezer.Close() +} + +// HasAncient returns an indicator whether the specified ancient data exists +// in the freezer +func (f *ResettableFreezer) HasAncient(kind string, number uint64) (bool, error) { + f.lock.RLock() + defer f.lock.RUnlock() + + return f.freezer.HasAncient(kind, number) +} + +// Ancient retrieves an ancient binary blob from the append-only immutable files. +func (f *ResettableFreezer) Ancient(kind string, number uint64) ([]byte, error) { + f.lock.RLock() + defer f.lock.RUnlock() + + return f.freezer.Ancient(kind, number) +} + +// AncientRange retrieves multiple items in sequence, starting from the index 'start'. +// It will return +// - at most 'max' items, +// - at least 1 item (even if exceeding the maxByteSize), but will otherwise +// return as many items as fit into maxByteSize +func (f *ResettableFreezer) AncientRange(kind string, start, count, maxBytes uint64) ([][]byte, error) { + f.lock.RLock() + defer f.lock.RUnlock() + + return f.freezer.AncientRange(kind, start, count, maxBytes) +} + +// Ancients returns the length of the frozen items. +func (f *ResettableFreezer) Ancients() (uint64, error) { + f.lock.RLock() + defer f.lock.RUnlock() + + return f.freezer.Ancients() +} + +// Tail returns the number of first stored item in the freezer. +func (f *ResettableFreezer) Tail() (uint64, error) { + f.lock.RLock() + defer f.lock.RUnlock() + + return f.freezer.Tail() +} + +// AncientSize returns the ancient size of the specified category. +func (f *ResettableFreezer) AncientSize(kind string) (uint64, error) { + f.lock.RLock() + defer f.lock.RUnlock() + + return f.freezer.AncientSize(kind) +} + +// ReadAncients runs the given read operation while ensuring that no writes take place +// on the underlying freezer. +func (f *ResettableFreezer) ReadAncients(fn func(ethdb.AncientReaderOp) error) (err error) { + f.lock.RLock() + defer f.lock.RUnlock() + + return f.freezer.ReadAncients(fn) +} + +// ModifyAncients runs the given write operation. +func (f *ResettableFreezer) ModifyAncients(fn func(ethdb.AncientWriteOp) error) (writeSize int64, err error) { + f.lock.RLock() + defer f.lock.RUnlock() + + return f.freezer.ModifyAncients(fn) +} + +// TruncateHead discards any recent data above the provided threshold number. +func (f *ResettableFreezer) TruncateHead(items uint64) (uint64, error) { + f.lock.RLock() + defer f.lock.RUnlock() + + return f.freezer.TruncateHead(items) +} + +// TruncateTail discards any recent data below the provided threshold number. +func (f *ResettableFreezer) TruncateTail(tail uint64) (uint64, error) { + f.lock.RLock() + defer f.lock.RUnlock() + + return f.freezer.TruncateTail(tail) +} + +// Sync flushes all data tables to disk. +func (f *ResettableFreezer) Sync() error { + f.lock.RLock() + defer f.lock.RUnlock() + + return f.freezer.Sync() +} + +func cleanup(pathToDelete string) error { + parentDir := filepath.Dir(pathToDelete) + + // In case Parent directory does not exist, return nil, no need to cleanup. + if _, err := os.Lstat(parentDir); os.IsNotExist(err) { + return nil + } + dir, err := os.Open(parentDir) + if err != nil { + return err + } + // Read all the names of files and directories in the parent directory with single slice. + names, err := dir.Readdirnames(0) + if err != nil { + return err + } + if cerr := dir.Close(); cerr != nil { + return cerr + } + + for _, name := range names { + if name == filepath.Base(pathToDelete)+tmpSuffix { + log.Info("Removed leftover freezer directory", "name", name) + return os.RemoveAll(filepath.Join(parentDir, name)) + } + } + return nil + +} + +// /home/user/documents -> /home/user/documents.tmp (Directory) +// /home/user/documents/file.txt -> /home/user/documents/file.txt.tmp (File) +func tmpName(path string) string { + return filepath.Join(filepath.Dir(path), filepath.Base(path)+tmpSuffix) +} diff --git a/core/rawdb/freezer_resettable_test.go b/core/rawdb/freezer_resettable_test.go new file mode 100644 index 0000000000..bd7129ae84 --- /dev/null +++ b/core/rawdb/freezer_resettable_test.go @@ -0,0 +1,120 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package rawdb + +import ( + "bytes" + "fmt" + "os" + "testing" + + "github.com/ethereum/go-ethereum/ethdb" +) + +func TestResetFreezer(t *testing.T) { + items := []struct { + id uint64 + blob []byte + }{ + {0, bytes.Repeat([]byte{0}, 2048)}, + {1, bytes.Repeat([]byte{1}, 2048)}, + {2, bytes.Repeat([]byte{2}, 2048)}, + {3, bytes.Repeat([]byte{3}, 2048)}, + } + temp := t.TempDir() + f, _ := NewResettableFreezer(temp, "", false, 2048, freezerTestTableDef) + defer f.Close() + + f.ModifyAncients(func(op ethdb.AncientWriteOp) error { + for _, item := range items { + op.AppendRaw("test", item.id, item.blob) + } + return nil + }) + // Expected can get + for _, item := range items { + blob, _ := f.Ancient("test", item.id) + if !bytes.Equal(blob, item.blob) { + t.Fatalf("Failed to get the correct blob") + } + } + if _, err := os.Lstat(temp); os.IsNotExist(err) { + t.Fatal("Expected datadir should exist") + } + // Reset freezer, Expect all data is removed, and the directory is still there. + f.Reset() + count, _ := f.Ancients() + if count != 0 { + t.Fatal("Failed to reset freezer") + } + for _, item := range items { + blob, _ := f.Ancient("test", item.id) + if len(blob) != 0 { + t.Fatal("Unexpected blob") + } + } + if _, err := os.Lstat(temp); os.IsNotExist(err) { + t.Fatal("Expected datadir should exist") + } + // Fill the freezer + f.ModifyAncients(func(op ethdb.AncientWriteOp) error { + for _, item := range items { + op.AppendRaw("test", item.id, item.blob) + } + return nil + }) + for _, item := range items { + blob, _ := f.Ancient("test", item.id) + if !bytes.Equal(blob, item.blob) { + t.Fatal("Unexpected blob") + } + } +} + +func TestFreezerCleanUpWhenInit(t *testing.T) { + items := []struct { + id uint64 + blob []byte + }{ + {0, bytes.Repeat([]byte{0}, 2048)}, + {1, bytes.Repeat([]byte{1}, 2048)}, + {2, bytes.Repeat([]byte{2}, 2048)}, + {3, bytes.Repeat([]byte{3}, 2048)}, + } + // Generate a temporary directory for the freezer + datadir := t.TempDir() + // Expect nothing here. + f, _ := NewResettableFreezer(datadir, "", false, 2048, freezerTestTableDef) + // Write some data to the freezer + f.ModifyAncients(func(op ethdb.AncientWriteOp) error { + for _, item := range items { + op.AppendRaw("test", item.id, item.blob) + } + return nil + }) + f.Close() + fmt.Println(tmpName(datadir)) + os.Rename(datadir, tmpName(datadir)) + // Open the freezer again, trigger cleanup operation + f, _ = NewResettableFreezer(datadir, "", false, 2048, freezerTestTableDef) + f.Close() + + // Expected datadir.tmp should be removed + if _, err := os.Lstat(tmpName(datadir)); !os.IsNotExist(err) { + t.Fatal("Failed to cleanup leftover directory") + } +} diff --git a/core/rawdb/freezer_table.go b/core/rawdb/freezer_table.go index c6590d2442..1856873211 100644 --- a/core/rawdb/freezer_table.go +++ b/core/rawdb/freezer_table.go @@ -50,17 +50,16 @@ var ( // offset within the file to the end of the data // In serialized form, the filenum is stored as uint16. type indexEntry struct { - filenum uint32 // stored as uint16 ( 2 bytes) - offset uint32 // stored as uint32 ( 4 bytes) + filenum uint32 // stored as uint16 ( 2 bytes ) + offset uint32 // stored as uint32 ( 4 bytes ) } -const indexEntrySize = 6 +const indexEntrySize = 6 // filenum + offset // unmarshalBinary deserializes binary b into the rawIndex entry. -func (i *indexEntry) unmarshalBinary(b []byte) error { +func (i *indexEntry) unmarshalBinary(b []byte) { i.filenum = uint32(binary.BigEndian.Uint16(b[:2])) i.offset = binary.BigEndian.Uint32(b[2:6]) - return nil } // append adds the encoded entry to the end of b. @@ -92,16 +91,24 @@ type freezerTable struct { items atomic.Uint64 // Number of items stored in the table (including items removed from tail) itemOffset atomic.Uint64 // Number of items removed from the table + // itemHidden is the number of items marked as deleted. Tail deletion is + // only supported at file level which means the actual deletion will be + // delayed until the entire data file is marked as deleted. Before that + // these items will be hidden to prevent being visited again. The value + // should never be lower than itemOffset. + itemHidden atomic.Uint64 + noCompression bool // if true, disables snappy compression. Note: does not work retroactively maxFileSize uint32 // Max file size for data-files name string path string head *os.File // File descriptor for the data head of the table + index *os.File // File descriptor for the indexEntry file of the table + meta *os.File // File descriptor for the metadata file of the table files map[uint32]*os.File // open files headId uint32 // number of the currently active head file tailId uint32 // number of the earliest file - index *os.File // File descriptor for the indexEntry file of the table headBytes int64 // Number of bytes written to the head file readMeter metrics.Meter // Meter for measuring the effective amount of data read @@ -112,52 +119,14 @@ type freezerTable struct { lock sync.RWMutex // Mutex protecting the data file descriptors } -// NewFreezerTable opens the given path as a freezer table. -func NewFreezerTable(path, name string, disableSnappy bool) (*freezerTable, error) { +// newFreezerTable opens the given path as a freezer table. +func newFreezerTable(path, name string, disableSnappy bool) (*freezerTable, error) { return newTable(path, name, metrics.NilMeter{}, metrics.NilMeter{}, metrics.NilGauge{}, freezerTableSize, disableSnappy) } -// openFreezerFileForAppend opens a freezer table file and seeks to the end -func openFreezerFileForAppend(filename string) (*os.File, error) { - // Open the file without the O_APPEND flag - // because it has differing behaviour during Truncate operations - // on different OS's - file, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0644) - if err != nil { - return nil, err - } - // Seek to end for append - if _, err = file.Seek(0, io.SeekEnd); err != nil { - return nil, err - } - return file, nil -} - -// openFreezerFileForReadOnly opens a freezer table file for read only access -func openFreezerFileForReadOnly(filename string) (*os.File, error) { - return os.OpenFile(filename, os.O_RDONLY, 0644) -} - -// openFreezerFileTruncated opens a freezer table making sure it is truncated -func openFreezerFileTruncated(filename string) (*os.File, error) { - return os.OpenFile(filename, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644) -} - -// truncateFreezerFile resizes a freezer table file and seeks to the end -func truncateFreezerFile(file *os.File, size int64) error { - if err := file.Truncate(size); err != nil { - return err - } - // Seek to end for append - if _, err := file.Seek(0, io.SeekEnd); err != nil { - return err - } - return nil -} - // newTable opens a freezer table, creating the data and index files if they are // non existent. Both files are truncated to the shortest common length to ensure -// they don't go out of sync. +// they don't go out of sync. (Table name could be bodies, receipts, etc.) func newTable(path string, name string, readMeter metrics.Meter, writeMeter metrics.Meter, sizeGauge metrics.Gauge, maxFilesize uint32, noCompression bool) (*freezerTable, error) { // Ensure the containing directory exists and open the indexEntry file if err := os.MkdirAll(path, 0755); err != nil { @@ -171,13 +140,24 @@ func newTable(path string, name string, readMeter metrics.Meter, writeMeter metr // Compressed idx idxName = fmt.Sprintf("%s.cidx", name) } - offsets, err := openFreezerFileForAppend(filepath.Join(path, idxName)) + var ( + err error + index *os.File + meta *os.File + ) + index, err = openFreezerFileForAppend(filepath.Join(path, idxName)) + if err != nil { + return nil, err + } + meta, err = openFreezerFileForAppend(filepath.Join(path, fmt.Sprintf("%s.meta", name))) if err != nil { return nil, err } + // Create the table and repair any past inconsistency tab := &freezerTable{ - index: offsets, + index: index, + meta: meta, files: make(map[uint32]*os.File), readMeter: readMeter, writeMeter: writeMeter, @@ -221,7 +201,9 @@ func (t *freezerTable) repair() error { } // Ensure the index is a multiple of indexEntrySize bytes if overflow := stat.Size() % indexEntrySize; overflow != 0 { - truncateFreezerFile(t.index, stat.Size()-overflow) // New file can't trigger this path + if err := truncateFreezerFile(t.index, stat.Size()-overflow); err != nil { + return err + } // New file can't trigger this path } // Retrieve the file sizes and prepare for truncation if stat, err = t.index.Stat(); err != nil { @@ -244,8 +226,26 @@ func (t *freezerTable) repair() error { t.tailId = firstIndex.filenum t.itemOffset.Store(uint64(firstIndex.offset)) - t.index.ReadAt(buffer, offsetsSize-indexEntrySize) - lastIndex.unmarshalBinary(buffer) + // Load metadata from the file + meta, err := loadMetadata(t.meta, t.itemOffset.Load()) + if err != nil { + return err + } + t.itemHidden.Store(meta.VirtualTail) + + // Read the last index, use the default value in case the freezer is empty + if offsetsSize == indexEntrySize { + lastIndex = indexEntry{filenum: t.tailId, offset: 0} + } else { + t.index.ReadAt(buffer, offsetsSize-indexEntrySize) + lastIndex.unmarshalBinary(buffer) + } + // Print an error log if the index is corrupted due to an incorrect + // last index item. While it is theoretically possible to have a zero offset + // by storing all zero-size items, it is highly unlikely to occur in practice. + if lastIndex.offset == 0 && offsetsSize/indexEntrySize > 1 { + log.Error("Corrupted index file detected", "lastOffset", lastIndex.offset, "indexes", offsetsSize/indexEntrySize) + } t.head, err = t.openFile(lastIndex.filenum, openFreezerFileForAppend) if err != nil { return err @@ -274,9 +274,15 @@ func (t *freezerTable) repair() error { return err } offsetsSize -= indexEntrySize - t.index.ReadAt(buffer, offsetsSize-indexEntrySize) + // Read the new head index, use the default value in case + // the freezer is already empty. var newLastIndex indexEntry - newLastIndex.unmarshalBinary(buffer) + if offsetsSize == indexEntrySize { + newLastIndex = indexEntry{filenum: t.tailId, offset: 0} + } else { + t.index.ReadAt(buffer, offsetsSize-indexEntrySize) + newLastIndex.unmarshalBinary(buffer) + } // We might have slipped back into an earlier head-file here if newLastIndex.filenum != lastIndex.filenum { // Release earlier opened file @@ -302,11 +308,19 @@ func (t *freezerTable) repair() error { if err := t.head.Sync(); err != nil { return err } + if err := t.meta.Sync(); err != nil { + return err + } // Update the item and byte counters and return t.items.Store(t.itemOffset.Load() + uint64(offsetsSize/indexEntrySize-1)) // last indexEntry points to the end of the data file t.headBytes = contentSize t.headId = lastIndex.filenum + // Delete the leftover files because of head deletion + t.releaseFilesAfter(t.headId, true) + + // Delete the leftover files because of tail deletion + t.releaseFilesBefore(t.tailId, true) // Close opened files and preopen all files if err := t.preopen(); err != nil { return err @@ -333,16 +347,20 @@ func (t *freezerTable) preopen() (err error) { return err } -// truncate discards any recent data above the provided threshold number. -func (t *freezerTable) truncate(items uint64) error { +// truncateHead discards any recent data above the provided threshold number. +func (t *freezerTable) truncateHead(items uint64) error { t.lock.Lock() defer t.lock.Unlock() - // If our item count is correct, don't do anything + // Ensure the given truncate target must be within the existing range. existing := t.items.Load() if existing <= items { return nil } + // Ensure the given truncate target must be above the hidden items. + if items < t.itemHidden.Load() { + return errors.New("truncation below tail") + } // We need to truncate, save the old size for metrics tracking oldSize, err := t.sizeNolock() if err != nil { @@ -354,17 +372,28 @@ func (t *freezerTable) truncate(items uint64) error { log = t.logger.Warn // Only loud warn if we delete multiple items } log("Truncating freezer table", "items", existing, "limit", items) - if err := truncateFreezerFile(t.index, int64(items+1)*indexEntrySize); err != nil { + + // Truncate the index file first, the tail position is also considered + // when calculating the new freezer table length. + // Calculate the new expected size of the data file and truncate it + length := items - t.itemOffset.Load() + if err := truncateFreezerFile(t.index, int64(length+1)*indexEntrySize); err != nil { return err } - // Calculate the new expected size of the data file and truncate it - buffer := make([]byte, indexEntrySize) - if _, err := t.index.ReadAt(buffer, int64(items*indexEntrySize)); err != nil { + if err := t.index.Sync(); err != nil { return err } var expected indexEntry - expected.unmarshalBinary(buffer) + if length == 0 { + expected = indexEntry{filenum: t.tailId, offset: 0} + } else { + buffer := make([]byte, indexEntrySize) + if _, err := t.index.ReadAt(buffer, int64(length*indexEntrySize)); err != nil { + return err + } + expected.unmarshalBinary(buffer) + } // We might need to truncate back to older files if expected.filenum != t.headId { // If already open for reading, force-reopen for writing @@ -376,6 +405,7 @@ func (t *freezerTable) truncate(items uint64) error { // Release any files _after the current head -- both the previous head // and any files which may have been opened for reading t.releaseFilesAfter(expected.filenum, true) + // Set back the historic head t.head = newHead t.headId = expected.filenum @@ -383,6 +413,9 @@ func (t *freezerTable) truncate(items uint64) error { if err := truncateFreezerFile(t.head, int64(expected.offset)); err != nil { return err } + if err := t.head.Sync(); err != nil { + return err + } // All data files truncated, set internal counters and return t.headBytes = int64(expected.offset) t.items.Store(items) @@ -397,6 +430,123 @@ func (t *freezerTable) truncate(items uint64) error { return nil } +// truncateTail discards any recent data before the provided threshold number. +// tail -> item-offset -> item-hidden -> truncated-items -> items/head. (Valid Range). +func (t *freezerTable) truncateTail(items uint64) error { + t.lock.Lock() + defer t.lock.Unlock() + + // The truncateTarget is below the current tail, return nil, no need to truncate + + if t.itemHidden.Load() >= items { + return nil + } + // The truncateTarget is above the current head, return error + if t.items.Load() < items { + return errors.New("truncation above head") + } + + // Load the new tail index by the given new tail position + var ( + newTailId uint32 + buffer = make([]byte, indexEntrySize) + ) + + if t.items.Load() == items { + newTailId = t.headId // Truncate in the head. + } else { + // Get the index entry of the new tail position and it's it based on the file number. + offset := items - t.itemOffset.Load() + if _, err := t.index.ReadAt(buffer, int64((offset+1)*indexEntrySize)); err != nil { + return err + } + var newTailIndex indexEntry + newTailIndex.unmarshalBinary(buffer) + newTailId = newTailIndex.filenum + } + // Update the virtual tail marker and hidden these entries in table. + t.itemHidden.Store(items) + if err := writeMetadata(t.meta, newMetadata(items)); err != nil { + return err + } + // Hidden items still fall in the current tail file, no data file + // can be dropped. + if t.tailId == newTailId { + return nil + } + // Hidden items fall in the incorrect range, returns the error. + if t.tailId > newTailId { + return fmt.Errorf("invalid index, tail-file %d, item-file %d", t.tailId, newTailId) + } + // Hidden items exceed the current tail file, drop the relevant + // data files. We need to truncate, save the old size for metrics + // tracking. + oldSize, err := t.sizeNolock() + if err != nil { + return err + } + // Count how many items can be deleted from the file. + var ( + newDeleted = items + deleted = t.itemOffset.Load() + ) + for current := items - 1; current >= deleted; current -= 1 { + if _, err := t.index.ReadAt(buffer, int64((current-deleted+1)*indexEntrySize)); err != nil { + return err + } + var pre indexEntry + pre.unmarshalBinary(buffer) + if pre.filenum != newTailId { + break + } + newDeleted = current + } + // Commit the changes of metadata file first before manipulating + // the indexes file. + if err := t.meta.Sync(); err != nil { + return err + } + // Close the index file before shorten it. + if err := t.index.Close(); err != nil { + return err + } + // Truncate the deleted index entries from the index file. It overwrites the entries in current index file. + err = copyFrom(t.index.Name(), t.index.Name(), indexEntrySize*(newDeleted-deleted+1), func(f *os.File) error { + tailIndex := indexEntry{ + filenum: newTailId, + offset: uint32(newDeleted), + } + _, err := f.Write(tailIndex.append(nil)) + return err + }) + if err != nil { + return err + } + // Reopen the modified index file to load the changes + t.index, err = openFreezerFileForAppend(t.index.Name()) + if err != nil { + return err + } + // Sync the file to ensure changes are flushed to disk + if err := t.index.Sync(); err != nil { + return err + } + // Release/Delete any files before the current tail + t.tailId = newTailId + t.itemOffset.Store(newDeleted) + + // Release with removing any files before the current tailId + t.releaseFilesBefore(t.tailId, true) + + // Retrieve the new size and update the total size counter + newSize, err := t.sizeNolock() + if err != nil { + return err + } + t.sizeGauge.Dec(int64(oldSize - newSize)) + return nil +} + // Close closes all opened files. func (t *freezerTable) Close() error { t.lock.Lock() @@ -408,6 +558,11 @@ func (t *freezerTable) Close() error { } t.index = nil + if err := t.meta.Close(); err != nil { + errs = append(errs, err) + } + t.meta = nil + for _, f := range t.files { if err := f.Close(); err != nil { errs = append(errs, err) @@ -421,6 +576,19 @@ func (t *freezerTable) Close() error { return nil } +// releaseFilesBefore closes all open files with a lower number, and optionally also deletes the files +func (t *freezerTable) releaseFilesBefore(num uint32, remove bool) { + for fnum, f := range t.files { + if fnum < num { + delete(t.files, fnum) + f.Close() + if remove { + os.Remove(f.Name()) + } + } + } +} + // openFile assumes that the write-lock is held by the caller func (t *freezerTable) openFile(num uint32, opener func(string) (*os.File, error)) (f *os.File, err error) { var exist bool @@ -531,7 +699,7 @@ func (t *freezerTable) RetrieveItems(start, count, maxBytes uint64) ([][]byte, e if !t.noCompression { decompressedSize, _ = snappy.DecodedLen(item) } - if i > 0 && uint64(outputSize+decompressedSize) > maxBytes { + if i > 0 && maxBytes != 0 && uint64(outputSize+decompressedSize) > maxBytes { break } if !t.noCompression { @@ -549,44 +717,50 @@ func (t *freezerTable) RetrieveItems(start, count, maxBytes uint64) ([][]byte, e } // retrieveItems reads up to 'count' items from the table. It reads at least -// one item, but otherwise avoids reading more than maxBytes bytes. -// It returns the (potentially compressed) data, and the sizes. +// one item, but otherwise avoids reading more than maxBytes bytes. Freezer +// will ignore the size limitation and continuously allocate memory to store +// data if maxBytes is 0. It returns the (potentially compressed) data, and +// the sizes. func (t *freezerTable) retrieveItems(start, count, maxBytes uint64) ([]byte, []int, error) { t.lock.RLock() defer t.lock.RUnlock() // Ensure the table and the item is accessible - if t.index == nil || t.head == nil { + if t.index == nil || t.head == nil || t.meta == nil { return nil, nil, errClosed } - itemCount := t.items.Load() // max number + items := t.items.Load() // max number + hidden := t.itemHidden.Load() // Ensure the start is written, not deleted from the tail, and that the // caller actually wants something - if itemCount <= start || t.itemOffset.Load() > start || count == 0 { + if items <= start || hidden > start || count == 0 { return nil, nil, errOutOfBounds } - if start+count > itemCount { - count = itemCount - start + + if start+count > items { + count = items - start } - var ( - output = make([]byte, maxBytes) // Buffer to read data into - outputSize int // Used size of that buffer - ) + + var output []byte // Buffer to read data into + + if maxBytes != 0 { + output = make([]byte, 0, maxBytes) + } else { + output = make([]byte, 0, 1024) // initial buffer cap + } + // readData is a helper method to read a single data item from disk. readData := func(fileId, start uint32, length int) error { // In case a small limit is used, and the elements are large, may need to // realloc the read-buffer when reading the first (and only) item. - if len(output) < length { - output = make([]byte, length) - } + output = grow(output, length) dataFile, exist := t.files[fileId] if !exist { return fmt.Errorf("missing data file %d", fileId) } - if _, err := dataFile.ReadAt(output[outputSize:outputSize+length], int64(start)); err != nil { - return err + if _, err := dataFile.ReadAt(output[len(output)-length:], int64(start)); err != nil { + return fmt.Errorf("%w, fileid: %d, start: %d, length: %d", err, fileId, start, length) } - outputSize += length return nil } // Read all the indexes in one go @@ -594,6 +768,7 @@ func (t *freezerTable) retrieveItems(start, count, maxBytes uint64) ([]byte, []i if err != nil { return nil, nil, err } + var ( sizes []int // The sizes for each element totalSize = 0 // The total size of all data read so far @@ -617,7 +792,7 @@ func (t *freezerTable) retrieveItems(start, count, maxBytes uint64) ([]byte, []i } readStart = 0 } - if i > 0 && uint64(totalSize+size) > maxBytes { + if i > 0 && uint64(totalSize+size) > maxBytes && maxBytes != 0 { // About to break out due to byte limit being exceeded. We don't // read this last item, but we need to do the deferred reads now. if unreadSize > 0 { @@ -631,7 +806,7 @@ func (t *freezerTable) retrieveItems(start, count, maxBytes uint64) ([]byte, []i unreadSize += size totalSize += size sizes = append(sizes, size) - if i == len(indices)-2 || uint64(totalSize) > maxBytes { + if i == len(indices)-2 || (uint64(totalSize) > maxBytes && maxBytes != 0) { // Last item, need to do the read now if err := readData(secondIndex.filenum, readStart, unreadSize); err != nil { return nil, nil, err @@ -639,13 +814,15 @@ func (t *freezerTable) retrieveItems(start, count, maxBytes uint64) ([]byte, []i break } } - return output[:outputSize], sizes, nil + // Update metrics. + t.readMeter.Mark(int64(totalSize)) + return output, sizes, nil } // has returns an indicator whether the specified number data // exists in the freezer table. func (t *freezerTable) has(number uint64) bool { - return t.items.Load() > number + return t.items.Load() > number && t.itemHidden.Load() <= number } // size returns the total data size in the freezer table. @@ -704,7 +881,7 @@ func (t *freezerTable) Sync() error { // DumpIndex is a debug print utility function, mainly for testing. It can also // be used to analyse a live freezer table index. -func (t *freezerTable) DumpIndex(start, stop int64) { +func (t *freezerTable) dumpIndexStdout(start, stop int64) { t.dumpIndex(os.Stdout, start, stop) } @@ -716,13 +893,20 @@ func (t *freezerTable) dumpIndexString(start, stop int64) string { } func (t *freezerTable) dumpIndex(w io.Writer, start, stop int64) { + meta, err := readMetadata(t.meta) + if err != nil { + fmt.Fprintf(w, "Failed to decode freezer table %v\n", err) + return + } + fmt.Fprintf(w, "Version %d deleted %d, hidden %d\n", meta.Version, t.itemOffset.Load(), t.itemHidden.Load()) + buf := make([]byte, indexEntrySize) fmt.Fprintf(w, "| number | fileno | offset |\n") fmt.Fprintf(w, "|--------|--------|--------|\n") for i := uint64(start); ; i++ { - if _, err := t.index.ReadAt(buf, int64(i*indexEntrySize)); err != nil { + if _, err := t.index.ReadAt(buf, int64((i+1)*indexEntrySize)); err != nil { break } var entry indexEntry diff --git a/core/rawdb/freezer_table_test.go b/core/rawdb/freezer_table_test.go index 0401b48a6d..074144a9e2 100644 --- a/core/rawdb/freezer_table_test.go +++ b/core/rawdb/freezer_table_test.go @@ -387,7 +387,7 @@ func TestFreezerTruncate(t *testing.T) { t.Fatal(err) } defer f.Close() - f.truncate(10) // 150 bytes + f.truncateHead(10) // 150 bytes if f.items.Load() != 10 { t.Fatalf("expected %d items, got %d", 10, f.items.Load()) } @@ -504,7 +504,7 @@ func TestFreezerReadAndTruncate(t *testing.T) { } // Now, truncate back to zero - f.truncate(0) + f.truncateHead(0) // Write the data again batch := f.newBatch() @@ -732,7 +732,7 @@ func TestSequentialRead(t *testing.T) { } // Write 15 bytes 30 times writeChunks(t, f, 30, 15) - f.DumpIndex(0, 30) + f.dumpIndexStdout(0, 30) f.Close() } { // Open it, iterate, verify iteration @@ -829,3 +829,49 @@ func TestSequentialReadByteLimit(t *testing.T) { } } } + +// TestSequentialReadNoByteLimit tests the batch-read if maxBytes is not specified. +// Freezer should return the requested items regardless the size limitation. +func TestSequentialReadNoByteLimit(t *testing.T) { + rm, wm, sg := metrics.NewMeter(), metrics.NewMeter(), metrics.NewGauge() + fname := fmt.Sprintf("batchread-3-%d", rand.Uint64()) + { // Fill table + f, err := newTable(os.TempDir(), fname, rm, wm, sg, 100, false) + if err != nil { + t.Fatal(err) + } + // Write 10 bytes 30 times, + // Splitting it at every 100 bytes (10 items) + writeChunks(t, f, 30, 10) + f.Close() + } + for i, tc := range []struct { + items uint64 + want int + }{ + {1, 1}, + {30, 30}, + {31, 30}, + } { + { + f, err := newTable(os.TempDir(), fname, rm, wm, sg, 100, false) + if err != nil { + t.Fatal(err) + } + items, err := f.RetrieveItems(0, tc.items, 0) + if err != nil { + t.Fatal(err) + } + if have, want := len(items), tc.want; have != want { + t.Fatalf("test %d: want %d items, have %d ", i, want, have) + } + for ii, have := range items { + want := getChunk(10, ii) + if !bytes.Equal(want, have) { + t.Fatalf("test %d: data corruption item %d: have\n%x\n, want \n%x\n", i, ii, have, want) + } + } + f.Close() + } + } +} diff --git a/core/rawdb/freezer_test.go b/core/rawdb/freezer_test.go index fa84f80306..9749e58616 100644 --- a/core/rawdb/freezer_test.go +++ b/core/rawdb/freezer_test.go @@ -115,7 +115,7 @@ func TestFreezerModifyRollback(t *testing.T) { // Reopen and check that the rolled-back data doesn't reappear. tables := map[string]bool{"test": true} - f2, err := newFreezer(dir, "", false, 2049, tables) + f2, err := NewFreezer(dir, "", false, 2049, tables) if err != nil { t.Fatalf("can't reopen freezer after failed ModifyAncients: %v", err) } @@ -186,7 +186,7 @@ func TestFreezerConcurrentModifyRetrieve(t *testing.T) { wg.Wait() } -// This test runs ModifyAncients and TruncateAncients concurrently with each other. +// This test runs ModifyAncients and TruncateHead concurrently with each other. func TestFreezerConcurrentModifyTruncate(t *testing.T) { f, dir := newFreezerForTesting(t, freezerTestTableDef) defer os.RemoveAll(dir) @@ -196,7 +196,7 @@ func TestFreezerConcurrentModifyTruncate(t *testing.T) { for i := 0; i < 1000; i++ { // First reset and write 100 items. - if err := f.TruncateAncients(0); err != nil { + if _, err := f.TruncateHead(0); err != nil { t.Fatal("truncate failed:", err) } _, err := f.ModifyAncients(func(op ethdb.AncientWriteOp) error { @@ -231,7 +231,7 @@ func TestFreezerConcurrentModifyTruncate(t *testing.T) { wg.Done() }() go func() { - truncateErr = f.TruncateAncients(10) + _, truncateErr = f.TruncateHead(10) wg.Done() }() go func() { @@ -253,7 +253,7 @@ func TestFreezerConcurrentModifyTruncate(t *testing.T) { } } -func newFreezerForTesting(t *testing.T, tables map[string]bool) (*freezer, string) { +func newFreezerForTesting(t *testing.T, tables map[string]bool) (*Freezer, string) { t.Helper() dir, err := ioutil.TempDir("", "freezer") @@ -262,7 +262,7 @@ func newFreezerForTesting(t *testing.T, tables map[string]bool) (*freezer, strin } // note: using low max table size here to ensure the tests actually // switch between multiple files. - f, err := newFreezer(dir, "", false, 2049, tables) + f, err := NewFreezer(dir, "", false, 2049, tables) if err != nil { t.Fatal("can't open freezer", err) } @@ -270,7 +270,7 @@ func newFreezerForTesting(t *testing.T, tables map[string]bool) (*freezer, strin } // checkAncientCount verifies that the freezer contains n items. -func checkAncientCount(t *testing.T, f *freezer, kind string, n uint64) { +func checkAncientCount(t *testing.T, f *Freezer, kind string, n uint64) { t.Helper() if frozen, _ := f.Ancients(); frozen != n { diff --git a/core/rawdb/freezer_utils.go b/core/rawdb/freezer_utils.go new file mode 100644 index 0000000000..d0b103fa84 --- /dev/null +++ b/core/rawdb/freezer_utils.go @@ -0,0 +1,137 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package rawdb + +import ( + "io" + "io/ioutil" + "os" + "path/filepath" +) + +// copyFrom copies data from 'srcPath' at offset 'offset' into 'destPath'. +// The 'destPath' is created if it doesn't exist, otherwise it is overwritten. +// Before the copy is executed, there is a callback can be registered to +// manipulate the dest file. +// It is perfectly valid to have destPath == srcPath. +// Those paths must be absolute path. +func copyFrom(srcPath, destPath string, offset uint64, beforeCopyFunc func(f *os.File) error) error { + // Create a temp file in the same directory where we want it to wind up + f, err := ioutil.TempFile(filepath.Dir(destPath), "*") // Create random name + if err != nil { + return err + } + + fname := f.Name() + + // Clean up the remaining file. + defer func() { + if f != nil { + f.Close() + } + os.Remove(fname) // Clean up the temp file + }() + + // Apply the beforeCopyFun , before we processing + if beforeCopyFunc != nil { + if err := beforeCopyFunc(f); err != nil { + return err + } + } + // Open the source file + + src, err := os.Open(srcPath) + if err != nil { + return err + } + // Set offset of nextRead in offset relative to origin of the file. + if _, err = src.Seek(int64(offset), 0); err != nil { + src.Close() + return err + } + + // io.Copy uses 32K buffer internally. + _, err = io.Copy(f, src) + if err != nil { + src.Close() + return err + } + // Rename the temporary file to the specified dest name. + // src may be same as dest, so needs to be closed before + // we do the final move. + src.Close() + + if err := f.Close(); err != nil { + return err + } + f = nil + return os.Rename(fname, destPath) +} + +// openFreezerFileForAppend opens a freezer table file and seeks to the end, if it's not exist, create it. +func openFreezerFileForAppend(filename string) (*os.File, error) { + // Open the file without the O_APPEND flag + // because it has differing behaviour during Truncate operations + // on different OS's + file, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0644) + if err != nil { + return nil, err + } + // Seek to end for append + if _, err = file.Seek(0, io.SeekEnd); err != nil { + return nil, err + } + return file, nil +} + +// openFreezerFileForReadOnly opens a freezer table file for read only access +func openFreezerFileForReadOnly(filename string) (*os.File, error) { + return os.OpenFile(filename, os.O_RDONLY, 0644) +} + +// openFreezerFileTruncated opens a freezer table making sure it is truncated +func openFreezerFileTruncated(filename string) (*os.File, error) { + return os.OpenFile(filename, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644) +} + +// truncateFreezerFile resizes a freezer table file and seeks to the end +func truncateFreezerFile(file *os.File, size int64) error { + if err := file.Truncate(size); err != nil { + return err + } + // Seek to end for append + if _, err := file.Seek(0, io.SeekEnd); err != nil { + return err + } + return nil +} + +// grow prepares the slice space for new item, and doubles the slice capacity +// if space is not enough. +func grow(buf []byte, n int) []byte { + if cap(buf)-len(buf) < n { + newcap := 2 * cap(buf) + if newcap-len(buf) < n { + newcap = len(buf) + n + } + nbuf := make([]byte, len(buf), newcap) + copy(nbuf, buf) + buf = nbuf + } + buf = buf[:len(buf)+n] + return buf +} diff --git a/core/rawdb/freezer_utils_test.go b/core/rawdb/freezer_utils_test.go new file mode 100644 index 0000000000..445f63fb79 --- /dev/null +++ b/core/rawdb/freezer_utils_test.go @@ -0,0 +1,75 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . +package rawdb + +import ( + "bytes" + "io/ioutil" + "os" + "testing" +) + +func TestCopyFrom(t *testing.T) { + var ( + content = []byte{0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8} + prefix = []byte{0x9, 0xa, 0xb, 0xc, 0xd, 0xf} + ) + var cases = []struct { + src, dest string + offset uint64 + writePrefix bool + }{ + {"foo", "bar", 0, false}, + {"foo", "bar", 1, false}, + {"foo", "bar", 8, false}, + {"foo", "foo", 0, false}, + {"foo", "foo", 1, false}, + {"foo", "foo", 8, false}, + {"foo", "bar", 0, true}, + {"foo", "bar", 1, true}, + {"foo", "bar", 8, true}, + } + for _, c := range cases { + ioutil.WriteFile(c.src, content, 0644) + + if err := copyFrom(c.src, c.dest, c.offset, func(f *os.File) error { + if !c.writePrefix { + return nil + } + f.Write(prefix) + return nil + }); err != nil { + os.Remove(c.src) + t.Fatalf("Failed to copy %v", err) + } + + blob, err := ioutil.ReadFile(c.dest) + if err != nil { + os.Remove(c.src) + os.Remove(c.dest) + t.Fatalf("Failed to read %v", err) + } + want := content[c.offset:] + if c.writePrefix { + want = append(prefix, want...) + } + if !bytes.Equal(blob, want) { + t.Fatal("Unexpected value") + } + os.Remove(c.src) + os.Remove(c.dest) + } +} diff --git a/core/rawdb/schema.go b/core/rawdb/schema.go index 16478b6923..c1ea97e163 100644 --- a/core/rawdb/schema.go +++ b/core/rawdb/schema.go @@ -22,6 +22,7 @@ import ( "encoding/binary" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/metrics" ) @@ -39,6 +40,9 @@ var ( // headFastBlockKey tracks the latest known incomplete block's hash during fast sync. headFastBlockKey = []byte("LastFast") + // persistentStateIDKey tracks the id of latest stored state(for path-based only) + persistentStateIDKey = []byte("LastStateID") + // lastPivotKey tracks the last pivot block used by fast sync (to reenable on sethead). lastPivotKey = []byte("LastPivot") @@ -63,6 +67,9 @@ var ( // snapshotSyncStatusKey tracks the snapshot sync status across restarts. snapshotSyncStatusKey = []byte("SnapshotSyncStatus") + // trieJournalKey tracks the in-memory trie node layers across restarts. + trieJournalKey = []byte("TrieJournal") + // txIndexTailKey tracks the oldest block whose transactions have been indexed. txIndexTailKey = []byte("TransactionIndexTail") @@ -83,6 +90,9 @@ var ( snapshotConsortiumPrefix = []byte("consortium-") // key = ConsortiumSnapshotPrefix + block hash + // snapSyncStatusFlagKey flags that status of snap sync. + snapSyncStatusFlagKey = []byte("SnapSyncStatus") + // Data item prefixes (use single byte to avoid mixing data types, avoid `i`, used for indexes). headerPrefix = []byte("h") // headerPrefix + num (uint64 big endian) + hash -> header headerTDSuffix = []byte("t") // headerPrefix + num (uint64 big endian) + hash + headerTDSuffix -> td @@ -103,8 +113,14 @@ var ( internalTxsPrefix = []byte("itxs") // internalTxsPrefix + block hash -> internal transactions dirtyAccountsKey = []byte("dacc") // dirtyAccountsPrefix + block hash -> dirty accounts - PreimagePrefix = []byte("secure-key-") // PreimagePrefix + hash -> preimage - configPrefix = []byte("ethereum-config-") // config prefix for the db + // Path-based storage scheme of merkle patricia trie. + TrieNodeAccountPrefix = []byte("A") // TrieNodeAccountPrefix + hexPath -> trie node + TrieNodeStoragePrefix = []byte("O") // TrieNodeStoragePrefix + accountHash + hexPath -> trie node + stateIDPrefix = []byte("L") // stateIDPrefix + state root -> state id + + PreimagePrefix = []byte("secure-key-") // PreimagePrefix + hash -> preimage + configPrefix = []byte("ethereum-config-") // config prefix for the db + genesisPrefix = []byte("ethereum-genesis-") // genesis state prefix for the db // Chain index prefixes (use `i` + single byte to avoid mixing data types). BloomBitsIndexPrefix = []byte("iB") // BloomBitsIndexPrefix is the data table of a chain indexer to track its progress @@ -113,33 +129,6 @@ var ( preimageHitCounter = metrics.NewRegisteredCounter("db/preimage/hits", nil) ) -const ( - // freezerHeaderTable indicates the name of the freezer header table. - freezerHeaderTable = "headers" - - // freezerHashTable indicates the name of the freezer canonical hash table. - freezerHashTable = "hashes" - - // freezerBodiesTable indicates the name of the freezer block body table. - freezerBodiesTable = "bodies" - - // freezerReceiptTable indicates the name of the freezer receipts table. - freezerReceiptTable = "receipts" - - // freezerDifficultyTable indicates the name of the freezer total difficulty table. - freezerDifficultyTable = "diffs" -) - -// FreezerNoSnappy configures whether compression is disabled for the ancient-tables. -// Hashes and difficulties don't compress well. -var FreezerNoSnappy = map[string]bool{ - freezerHeaderTable: false, - freezerHashTable: true, - freezerBodiesTable: false, - freezerReceiptTable: false, - freezerDifficultyTable: true, -} - // LegacyTxLookupEntry is the legacy TxLookupEntry definition with some unnecessary // fields. type LegacyTxLookupEntry struct { @@ -254,6 +243,87 @@ func configKey(hash common.Hash) []byte { return append(configPrefix, hash.Bytes()...) } +// genesisStateSpecKey = genesisPrefix + hash +func genesisStateSpecKey(hash common.Hash) []byte { + return append(genesisPrefix, hash.Bytes()...) +} + +// accountTrieNodeKey = trieNodeAccountPrefix + nodePath. +func accountTrieNodeKey(path []byte) []byte { + return append(TrieNodeAccountPrefix, path...) +} + +// storageTrieNodeKey = TrieNodeStoragePrefix + accountHash + nodePath. +func storageTrieNodeKey(accountHash common.Hash, path []byte) []byte { + return append(append(TrieNodeStoragePrefix, accountHash.Bytes()...), path...) +} + func snapshotConsortiumKey(hash common.Hash) []byte { return append(snapshotConsortiumPrefix, hash.Bytes()...) } + +// IsLegacyTrieNode reports whether a provided database entry is a legacy trie +// node. The characteristics of legacy trie node are: +// - the key length is 32 bytes +// - the key is the hash of val +func IsLegacyTrieNode(key []byte, val []byte) bool { + if len(key) != common.HashLength { + return false + } + return bytes.Equal(key, crypto.Keccak256(val)) +} + +// ResolveAccountTrieNodeKey reports whether a provided database entry is an +// account trie node in path-based state scheme, and returns the resolved +// node path if so. +func ResolveAccountTrieNodeKey(key []byte) (bool, []byte) { + if !bytes.HasPrefix(key, TrieNodeAccountPrefix) { + return false, nil + } + // The remaining key should only consist a hex node path + // whose length is in the range 0 to 64 (64 is excluded + // since leaves are always wrapped with shortNode). + if len(key) >= len(TrieNodeAccountPrefix)+common.HashLength*2 { + return false, nil + } + return true, key[len(TrieNodeAccountPrefix):] +} + +// IsAccountTrieNode reports whether a provided database entry is an account +// trie node in path-based state scheme. +func IsAccountTrieNode(key []byte) bool { + ok, _ := ResolveAccountTrieNodeKey(key) + return ok +} + +// ResolveStorageTrieNode reports whether a provided database entry is a storage +// trie node in path-based state scheme, and returns the resolved account hash +// and node path if so. +func ResolveStorageTrieNode(key []byte) (bool, common.Hash, []byte) { + if !bytes.HasPrefix(key, TrieNodeStoragePrefix) { + return false, common.Hash{}, nil + } + // The remaining key consists of 2 parts: + // - 32 bytes account hash + // - hex node path whose length is in the range 0 to 64 + if len(key) < len(TrieNodeStoragePrefix)+common.HashLength { + return false, common.Hash{}, nil + } + if len(key) >= len(TrieNodeStoragePrefix)+common.HashLength+common.HashLength*2 { + return false, common.Hash{}, nil + } + accountHash := common.BytesToHash(key[len(TrieNodeStoragePrefix) : len(TrieNodeStoragePrefix)+common.HashLength]) + return true, accountHash, key[len(TrieNodeStoragePrefix)+common.HashLength:] +} + +// IsStorageTrieNode reports whether a provided database entry is a storage +// trie node in path-based state scheme. +func IsStorageTrieNode(key []byte) bool { + ok, _, _ := ResolveStorageTrieNode(key) + return ok +} + +// stateIDKey = stateIDPrefix + root (32 bytes) +func stateIDKey(root common.Hash) []byte { + return append(stateIDPrefix, root.Bytes()...) +} diff --git a/core/rawdb/schema_test.go b/core/rawdb/schema_test.go new file mode 100644 index 0000000000..11d036756d --- /dev/null +++ b/core/rawdb/schema_test.go @@ -0,0 +1,227 @@ +// Copyright 2020 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package rawdb + +import ( + "bytes" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" +) + +var ( + bytes4 = []byte{0x00, 0x01, 0x02, 0x03} + bytes20 = []byte{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03} + bytes32 = []byte{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f} + bytes63 = []byte{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e} + bytes64 = []byte{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f} + bytes65 = []byte{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00} +) + +func TestIsLegacyTrieNode(t *testing.T) { + tests := []struct { + name string + inputData []byte + inputKey []byte + expected bool + }{ + { + name: "empty", + inputKey: []byte{}, + expected: false, + }, + { + name: "non-legacy (too short)", + inputKey: []byte{0x00, 0x01, 0x02, 0x03}, + expected: false, + }, + { + name: "legacy", + inputData: []byte{0x00, 0x01, 0x02, 0x03}, + inputKey: crypto.Keccak256([]byte{0x00, 0x01, 0x02, 0x03}), + expected: true, + }, + { + name: "non-legacy (too long)", + inputKey: []byte{0x00, 0x01, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00}, + expected: false, + }, + { + name: "non-legacy (key is not hash of data)", + inputData: []byte{0x00, 0x01, 0x02, 0x03}, + inputKey: crypto.Keccak256([]byte{0x00, 0x01, 0x02, 0x04}), + expected: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + if actual := IsLegacyTrieNode(test.inputKey, test.inputData); actual != test.expected { + t.Errorf("expected %v, got %v", test.expected, actual) + } + }) + } +} + +func TestResolveAccountTrieNodeKey(t *testing.T) { + tests := []struct { + name string + inputKey []byte + expectedCheck bool + expectedKey []byte + }{ + { + name: "empty", + inputKey: []byte{}, + expectedCheck: false, + expectedKey: nil, + }, + { + name: "non account prefixed", + inputKey: bytes4, + expectedCheck: false, + expectedKey: nil, + }, + { + name: "storage prefixed", + inputKey: append(TrieNodeStoragePrefix, bytes4...), + expectedCheck: false, + expectedKey: nil, + }, + { + name: "account prefixed length 4", + inputKey: accountTrieNodeKey(bytes4), + expectedCheck: true, + expectedKey: bytes4, + }, + { + name: "account prefixed length 20", + inputKey: accountTrieNodeKey(bytes20), + expectedCheck: true, + expectedKey: bytes20, + }, + { + name: "account prefixed length 63", + inputKey: accountTrieNodeKey(bytes63), + expectedCheck: true, + expectedKey: bytes63, + }, + { + name: "account prefixed length 64", + inputKey: accountTrieNodeKey(bytes64), + expectedCheck: false, + expectedKey: nil, + }, + { + name: "account prefixed length 65", + inputKey: accountTrieNodeKey(bytes65), + expectedCheck: false, + expectedKey: nil, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + if check, key := ResolveAccountTrieNodeKey(test.inputKey); check != test.expectedCheck || !bytes.Equal(key, test.expectedKey) { + t.Errorf("expected %v, %v, got %v, %v", test.expectedCheck, test.expectedKey, check, key) + } + }) + } +} + +func TestResolveStorageTrieNode(t *testing.T) { + tests := []struct { + name string + inputKey []byte + expectedCheck bool + expectedHash common.Hash + expectedKey []byte + }{ + { + name: "empty", + inputKey: []byte{}, + expectedCheck: false, + expectedHash: common.Hash{}, + expectedKey: nil, + }, + { + name: "non storage prefixed", + inputKey: []byte{0x00, 0x01, 0x02, 0x03}, + expectedCheck: false, + expectedHash: common.Hash{}, + expectedKey: nil, + }, + { + name: "account prefixed", + inputKey: accountTrieNodeKey(bytes4), + expectedCheck: false, + expectedHash: common.Hash{}, + expectedKey: nil, + }, + { + name: "storage prefixed hash 20 length 4", + inputKey: append(append(TrieNodeStoragePrefix, bytes20...), bytes4...), + expectedCheck: false, + expectedHash: common.Hash{}, + expectedKey: nil, + }, + { + name: "storage prefixed hash 32 length 4", + inputKey: storageTrieNodeKey(common.BytesToHash(bytes32), bytes4), + expectedCheck: true, + expectedHash: common.BytesToHash(bytes32), + expectedKey: bytes4, + }, + { + name: "storage prefixed hash 32 length 20", + inputKey: storageTrieNodeKey(common.BytesToHash(bytes20), bytes20), + expectedCheck: true, + expectedHash: common.BytesToHash(bytes20), + expectedKey: bytes20, + }, + { + name: "storage prefixed hash 32 length 63", + inputKey: storageTrieNodeKey(common.BytesToHash(bytes65), bytes63), + expectedCheck: true, + expectedHash: common.BytesToHash(bytes65), + expectedKey: bytes63, + }, + { + name: "storage prefixed hash 32 length 64", + inputKey: storageTrieNodeKey(common.BytesToHash(bytes32), bytes64), + expectedCheck: false, + expectedHash: common.Hash{}, + expectedKey: nil, + }, + { + name: "storage prefixed hash 32 length 65", + inputKey: storageTrieNodeKey(common.BytesToHash(bytes32), bytes65), + expectedCheck: false, + expectedHash: common.Hash{}, + expectedKey: nil, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + if check, hash, key := ResolveStorageTrieNode(test.inputKey); check != test.expectedCheck || !bytes.Equal(key, test.expectedKey) || hash != test.expectedHash { + t.Errorf("expected %v, %v, %v, got %v, %v, %v", test.expectedCheck, test.expectedHash, test.expectedKey, check, hash, key) + } + }) + } +} diff --git a/core/rawdb/table.go b/core/rawdb/table.go index 253ed51455..73ec8416a1 100644 --- a/core/rawdb/table.go +++ b/core/rawdb/table.go @@ -68,6 +68,12 @@ func (t *table) AncientRange(kind string, start, count, maxBytes uint64) ([][]by return t.db.AncientRange(kind, start, count, maxBytes) } +// Tail is a noop passthrough that just forwards the request to the underlying +// database. +func (t *table) Tail() (uint64, error) { + return t.db.Tail() +} + // Ancients is a noop passthrough that just forwards the request to the underlying // database. func (t *table) Ancients() (uint64, error) { @@ -85,14 +91,19 @@ func (t *table) ModifyAncients(fn func(ethdb.AncientWriteOp) error) (int64, erro return t.db.ModifyAncients(fn) } -func (t *table) ReadAncients(fn func(reader ethdb.AncientReader) error) (err error) { +func (t *table) ReadAncients(fn func(reader ethdb.AncientReaderOp) error) (err error) { return t.db.ReadAncients(fn) } -// TruncateAncients is a noop passthrough that just forwards the request to the underlying +// TruncateHead is a noop passthrough that just forwards the request to the underlying // database. -func (t *table) TruncateAncients(items uint64) error { - return t.db.TruncateAncients(items) +func (t *table) TruncateHead(items uint64) (uint64, error) { + return t.db.TruncateHead(items) +} + +// TruncateTail is a noop passthrough that just forwards the request to the underlying +func (t *table) TruncateTail(items uint64) (uint64, error) { + return t.db.TruncateTail(items) } // Sync is a noop passthrough that just forwards the request to the underlying @@ -101,6 +112,11 @@ func (t *table) Sync() error { return t.db.Sync() } +// AncientDatadir returns the ancient datadir of the underlying database. +func (t *table) AncientDatadir() (string, error) { + return t.db.AncientDatadir() +} + // Put inserts the given value into the database at a prefixed version of the // provided key. func (t *table) Put(key []byte, value []byte) error { diff --git a/core/rlp_test.go b/core/rlp_test.go index bf30eff24f..ba4a9266fd 100644 --- a/core/rlp_test.go +++ b/core/rlp_test.go @@ -28,6 +28,7 @@ import ( "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" "golang.org/x/crypto/sha3" ) @@ -45,7 +46,7 @@ func getBlock(transactions int, uncles int, dataSize int) *types.Block { Config: params.TestChainConfig, Alloc: GenesisAlloc{address: {Balance: funds}}, } - genesis = gspec.MustCommit(db) + genesis = gspec.MustCommit(db, trie.NewDatabase(db, newDbConfig(rawdb.HashScheme))) ) // We need to generate as many blocks +1 as uncles diff --git a/core/state/database.go b/core/state/database.go index 50f96593d7..01c7c51f26 100644 --- a/core/state/database.go +++ b/core/state/database.go @@ -26,6 +26,7 @@ import ( "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/trienode" lru "github.com/hashicorp/golang-lru/v2" ) @@ -43,7 +44,7 @@ type Database interface { OpenTrie(root common.Hash) (Trie, error) // OpenStorageTrie opens the storage trie of an account. - OpenStorageTrie(addrHash, root common.Hash) (Trie, error) + OpenStorageTrie(stateRoot, addrHash, root common.Hash) (Trie, error) // CopyTrie returns an independent copy of the given trie. CopyTrie(Trie) Trie @@ -88,13 +89,15 @@ type Trie interface { // can be used even if the trie doesn't have one. Hash() common.Hash - // Commit writes all nodes to the trie's memory database, tracking the internal - // and external (for account tries) references. - Commit(onleaf trie.LeafCallback) (common.Hash, int, error) + // Commit collects all dirty nodes in the trie and replace them with the + // corresponding node hash. All collected nodes(including dirty leaves if + // collectLeaf is true) will be encapsulated into a nodeset for return. + // The returned nodeset can be nil if the trie is clean(nothing to commit). + Commit(collectLeaf bool) (common.Hash, *trienode.NodeSet, error) // NodeIterator returns an iterator that returns nodes of the trie. Iteration // starts at the key after the given start key. - NodeIterator(startKey []byte) trie.NodeIterator + NodeIterator(startKey []byte) (trie.NodeIterator, error) // Prove constructs a Merkle proof for key. The result contains all encoded nodes // on the path to the value at key. The value itself is also included in the last @@ -119,21 +122,31 @@ func NewDatabase(db ethdb.Database) Database { func NewDatabaseWithConfig(db ethdb.Database, config *trie.Config) Database { csc, _ := lru.New[common.Hash, int](codeSizeCacheSize) return &cachingDB{ - db: trie.NewDatabaseWithConfig(db, config), + triedb: trie.NewDatabase(db, config), + codeSizeCache: csc, + codeCache: fastcache.New(codeCacheSize), + } +} + +// NewDatabaseWithNodeDB creates a state database with an already initialized node database. +func NewDatabaseWithNodeDB(db ethdb.Database, triedb *trie.Database) Database { + csc, _ := lru.New[common.Hash, int](codeSizeCacheSize) + return &cachingDB{ + triedb: triedb, codeSizeCache: csc, codeCache: fastcache.New(codeCacheSize), } } type cachingDB struct { - db *trie.Database + triedb *trie.Database codeSizeCache *lru.Cache[common.Hash, int] codeCache *fastcache.Cache } // OpenTrie opens the main account trie at a specific root hash. func (db *cachingDB) OpenTrie(root common.Hash) (Trie, error) { - tr, err := trie.NewSecure(root, db.db) + tr, err := trie.NewSecure(trie.StateTrieID(root), db.triedb) if err != nil { return nil, err } @@ -141,8 +154,8 @@ func (db *cachingDB) OpenTrie(root common.Hash) (Trie, error) { } // OpenStorageTrie opens the storage trie of an account. -func (db *cachingDB) OpenStorageTrie(addrHash, root common.Hash) (Trie, error) { - tr, err := trie.NewSecure(root, db.db) +func (db *cachingDB) OpenStorageTrie(stateRoot, addrHash, root common.Hash) (Trie, error) { + tr, err := trie.NewSecure(trie.StorageTrieID(stateRoot, addrHash, root), db.triedb) if err != nil { return nil, err } @@ -164,7 +177,7 @@ func (db *cachingDB) ContractCode(addrHash, codeHash common.Hash) ([]byte, error if code := db.codeCache.Get(nil, codeHash.Bytes()); len(code) > 0 { return code, nil } - code := rawdb.ReadCode(db.db.DiskDB(), codeHash) + code := rawdb.ReadCode(db.triedb.DiskDB(), codeHash) if len(code) > 0 { db.codeCache.Set(codeHash.Bytes(), code) db.codeSizeCache.Add(codeHash, len(code)) @@ -180,7 +193,7 @@ func (db *cachingDB) ContractCodeWithPrefix(addrHash, codeHash common.Hash) ([]b if code := db.codeCache.Get(nil, codeHash.Bytes()); len(code) > 0 { return code, nil } - code := rawdb.ReadCodeWithPrefix(db.db.DiskDB(), codeHash) + code := rawdb.ReadCodeWithPrefix(db.triedb.DiskDB(), codeHash) if len(code) > 0 { db.codeCache.Set(codeHash.Bytes(), code) db.codeSizeCache.Add(codeHash, len(code)) @@ -200,5 +213,5 @@ func (db *cachingDB) ContractCodeSize(addrHash, codeHash common.Hash) (int, erro // TrieDB retrieves any intermediate trie-node caching layer. func (db *cachingDB) TrieDB() *trie.Database { - return db.db + return db.triedb } diff --git a/core/state/dump.go b/core/state/dump.go index bfcc035435..e9891960f9 100644 --- a/core/state/dump.go +++ b/core/state/dump.go @@ -138,8 +138,11 @@ func (s *StateDB) DumpToCollector(c DumpCollector, conf *DumpConfig) (nextKey [] ) log.Info("Trie dumping started", "root", s.trie.Hash()) c.OnRoot(s.trie.Hash()) - - it := trie.NewIterator(s.trie.NodeIterator(conf.Start)) + trieIt, err := s.trie.NodeIterator(conf.Start) + if err != nil { + return nil + } + it := trie.NewIterator(trieIt) for it.Next() { var data types.StateAccount if err := rlp.DecodeBytes(it.Value, &data); err != nil { @@ -162,13 +165,19 @@ func (s *StateDB) DumpToCollector(c DumpCollector, conf *DumpConfig) (nextKey [] account.SecureKey = it.Key } addr := common.BytesToAddress(addrBytes) - obj := newObject(s, addr, data) + obj := newObject(s, addr, &data) if !conf.SkipCode { - account.Code = obj.Code(s.db) + account.Code = obj.Code() } + if !conf.SkipStorage { account.Storage = make(map[common.Hash]string) - storageIt := trie.NewIterator(obj.getTrie(s.db).NodeIterator(nil)) + trieIt, err := obj.getTrie().NodeIterator(nil) + if err != nil { + log.Error("Failed to create trie iterator", "err", err) + continue + } + storageIt := trie.NewIterator(trieIt) for storageIt.Next() { _, content, _, err := rlp.Split(storageIt.Value) if err != nil { diff --git a/core/state/iterator.go b/core/state/iterator.go index 611df52431..653d566f85 100644 --- a/core/state/iterator.go +++ b/core/state/iterator.go @@ -74,8 +74,12 @@ func (it *NodeIterator) step() error { return nil } // Initialize the iterator if we've just started + var err error if it.stateIt == nil { - it.stateIt = it.state.trie.NodeIterator(nil) + it.stateIt, err = it.state.trie.NodeIterator(nil) + if err != nil { + return err + } } // If we had data nodes previously, we surely have at least state nodes if it.dataIt != nil { @@ -109,11 +113,14 @@ func (it *NodeIterator) step() error { if err := rlp.Decode(bytes.NewReader(it.stateIt.LeafBlob()), &account); err != nil { return err } - dataTrie, err := it.state.db.OpenStorageTrie(common.BytesToHash(it.stateIt.LeafKey()), account.Root) + dataTrie, err := it.state.db.OpenStorageTrie(it.state.originalRoot, common.BytesToHash(it.stateIt.LeafKey()), account.Root) + if err != nil { + return err + } + it.dataIt, err = dataTrie.NodeIterator(nil) if err != nil { return err } - it.dataIt = dataTrie.NodeIterator(nil) if !it.dataIt.Next(true) { it.dataIt = nil } diff --git a/core/state/iterator_test.go b/core/state/iterator_test.go index d1afe9ca3e..2419c9175c 100644 --- a/core/state/iterator_test.go +++ b/core/state/iterator_test.go @@ -17,20 +17,25 @@ package state import ( - "bytes" "testing" "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/crypto" ) // Tests that the node iterator indeed walks over the entire database contents. func TestNodeIteratorCoverage(t *testing.T) { + testNodeIteratorCoverage(t, rawdb.HashScheme) + testNodeIteratorCoverage(t, rawdb.PathScheme) +} + +func testNodeIteratorCoverage(t *testing.T, scheme string) { // Create some arbitrary test state to iterate - db, root, _ := makeTestState() - db.TrieDB().Commit(root, false, nil) + db, sdb, ndb, root, _ := makeTestState(scheme) + ndb.Commit(root, false) - state, err := New(root, db, nil) + state, err := New(root, sdb, nil) if err != nil { t.Fatalf("failed to create state trie at %x: %v", root, err) } @@ -41,29 +46,63 @@ func TestNodeIteratorCoverage(t *testing.T) { hashes[it.Hash] = struct{}{} } } + // Check in-disk nodes + var ( + seenNodes = make(map[common.Hash]struct{}) + seenCodes = make(map[common.Hash]struct{}) + ) + it := db.NewIterator(nil, nil) + for it.Next() { + ok, hash := isTrieNode(scheme, it.Key(), it.Value()) + if !ok { + continue + } + seenNodes[hash] = struct{}{} + } + it.Release() + + // Check in-disk codes + it = db.NewIterator(nil, nil) + for it.Next() { + ok, hash := rawdb.IsCodeKey(it.Key()) + if !ok { + continue + } + if _, ok := hashes[common.BytesToHash(hash)]; !ok { + t.Errorf("state entry not reported %x", it.Key()) + } + seenCodes[common.BytesToHash(hash)] = struct{}{} + } + it.Release() + // Cross check the iterated hashes and the database/nodepool content for hash := range hashes { - if _, err = db.TrieDB().Node(hash); err != nil { - _, err = db.ContractCode(common.Hash{}, hash) + _, ok := seenNodes[hash] + if !ok { + _, ok = seenCodes[hash] } - if err != nil { + if !ok { t.Errorf("failed to retrieve reported node %x", hash) } } - for _, hash := range db.TrieDB().Nodes() { - if _, ok := hashes[hash]; !ok { - t.Errorf("state entry not reported %x", hash) +} + +// isTrieNode is a helper function which reports if the provided +// database entry belongs to a trie node or not. +func isTrieNode(scheme string, key, val []byte) (bool, common.Hash) { + if scheme == rawdb.HashScheme { + if rawdb.IsLegacyTrieNode(key, val) { + return true, common.BytesToHash(key) } - } - it := db.TrieDB().DiskDB().(ethdb.Database).NewIterator(nil, nil) - for it.Next() { - key := it.Key() - if bytes.HasPrefix(key, []byte("secure-key-")) { - continue + } else { + ok := rawdb.IsAccountTrieNode(key) + if ok { + return true, crypto.Keccak256Hash(val) } - if _, ok := hashes[common.BytesToHash(key)]; !ok { - t.Errorf("state entry not reported %x", key) + ok = rawdb.IsStorageTrieNode(key) + if ok { + return true, crypto.Keccak256Hash(val) } } - it.Release() + return false, common.Hash{} } diff --git a/core/state/journal.go b/core/state/journal.go index 20f18fb981..99ac806c1c 100644 --- a/core/state/journal.go +++ b/core/state/journal.go @@ -90,8 +90,18 @@ type ( account *common.Address } resetObjectChange struct { + account *common.Address prev *stateObject prevdestruct bool + + // tracking previous states of accounts and storages in snapshot, before each transaction + prevAccount []byte + prevStorage map[common.Hash][]byte + + // tracking previous states of accounts and storages in trie, before each commit + prevAccountOriginExist bool + prevAccountOrigin []byte + prevStorageOrigin map[common.Hash][]byte } selfDestructChange struct { account *common.Address @@ -157,12 +167,24 @@ func (ch createObjectChange) dirtied() *common.Address { func (ch resetObjectChange) revert(s *StateDB) { s.setStateObject(ch.prev) if !ch.prevdestruct && s.snap != nil { - delete(s.snapDestructs, ch.prev.addrHash) + delete(s.stateObjectsDestruct, ch.prev.address) + } + if ch.prevAccountOriginExist { + s.accountsOrigin[ch.prev.address] = ch.prevAccountOrigin + } + if ch.prevStorageOrigin != nil { + s.storagesOrigin[ch.prev.address] = ch.prevStorageOrigin + } + if ch.prevAccount != nil { + s.accounts[ch.prev.addrHash] = ch.prevAccount + } + if ch.prevStorage != nil { + s.storages[ch.prev.addrHash] = ch.prevStorage } } func (ch resetObjectChange) dirtied() *common.Address { - return nil + return ch.account } func (ch selfDestructChange) revert(s *StateDB) { diff --git a/core/state/metrics.go b/core/state/metrics.go index 7b40ff37af..64c651461e 100644 --- a/core/state/metrics.go +++ b/core/state/metrics.go @@ -19,10 +19,19 @@ package state import "github.com/ethereum/go-ethereum/metrics" var ( - accountUpdatedMeter = metrics.NewRegisteredMeter("state/update/account", nil) - storageUpdatedMeter = metrics.NewRegisteredMeter("state/update/storage", nil) - accountDeletedMeter = metrics.NewRegisteredMeter("state/delete/account", nil) - storageDeletedMeter = metrics.NewRegisteredMeter("state/delete/storage", nil) - accountCommittedMeter = metrics.NewRegisteredMeter("state/commit/account", nil) - storageCommittedMeter = metrics.NewRegisteredMeter("state/commit/storage", nil) + accountUpdatedMeter = metrics.NewRegisteredMeter("state/update/account", nil) + storageUpdatedMeter = metrics.NewRegisteredMeter("state/update/storage", nil) + accountDeletedMeter = metrics.NewRegisteredMeter("state/delete/account", nil) + storageDeletedMeter = metrics.NewRegisteredMeter("state/delete/storage", nil) + accountTrieUpdatedMeter = metrics.NewRegisteredMeter("state/update/accountnodes", nil) + storageTriesUpdatedMeter = metrics.NewRegisteredMeter("state/update/storagenodes", nil) + accountTrieDeletedMeter = metrics.NewRegisteredMeter("state/delete/accountnodes", nil) + storageTriesDeletedMeter = metrics.NewRegisteredMeter("state/delete/storagenodes", nil) + + slotDeletionMaxCount = metrics.NewRegisteredGauge("state/delete/storage/max/slot", nil) + slotDeletionMaxSize = metrics.NewRegisteredGauge("state/delete/storage/max/size", nil) + slotDeletionTimer = metrics.NewRegisteredResettingTimer("state/delete/storage/timer", nil) + slotDeletionCount = metrics.NewRegisteredMeter("state/delete/storage/slot", nil) + slotDeletionSize = metrics.NewRegisteredMeter("state/delete/storage/size", nil) + slotDeletionSkip = metrics.NewRegisteredGauge("state/delete/storage/skip", nil) ) diff --git a/core/state/pruner/pruner.go b/core/state/pruner/pruner.go index 37772ca35c..05825cc3ad 100644 --- a/core/state/pruner/pruner.go +++ b/core/state/pruner/pruner.go @@ -66,30 +66,31 @@ var ( // Pruner is an offline tool to prune the stale state with the // help of the snapshot. The workflow of pruner is very simple: // -// - iterate the snapshot, reconstruct the relevant state -// - iterate the database, delete all other state entries which -// don't belong to the target state and the genesis state +// - iterate the snapshot, reconstruct the relevant state +// - iterate the database, delete all other state entries which +// don't belong to the target state and the genesis state // // It can take several hours(around 2 hours for mainnet) to finish // the whole pruning work. It's recommended to run this offline tool // periodically in order to release the disk usage and improve the // disk read performance to some extent. type Pruner struct { - db ethdb.Database - stateBloom *stateBloom - datadir string - trieCachePath string - headHeader *types.Header - snaptree *snapshot.Tree + db ethdb.Database + stateBloom *stateBloom + datadir string + headHeader *types.Header + snaptree *snapshot.Tree } // NewPruner creates the pruner instance. -func NewPruner(db ethdb.Database, datadir, trieCachePath string, bloomSize uint64) (*Pruner, error) { +func NewPruner(db ethdb.Database, datadir string, bloomSize uint64) (*Pruner, error) { headBlock := rawdb.ReadHeadBlock(db) if headBlock == nil { return nil, errors.New("Failed to load head block") } - snaptree, err := snapshot.New(db, trie.NewDatabase(db), 256, headBlock.Root(), false, false, false) + // Offline pruning is only supported in legacy hash based scheme. + triedb := trie.NewDatabase(db, trie.HashDefaults) + snaptree, err := snapshot.New(db, triedb, 256, headBlock.Root(), false, false, false) if err != nil { return nil, err // The relevant snapshot(s) might not exist } @@ -103,12 +104,11 @@ func NewPruner(db ethdb.Database, datadir, trieCachePath string, bloomSize uint6 return nil, err } return &Pruner{ - db: db, - stateBloom: stateBloom, - datadir: datadir, - trieCachePath: trieCachePath, - headHeader: headBlock.Header(), - snaptree: snaptree, + db: db, + stateBloom: stateBloom, + datadir: datadir, + headHeader: headBlock.Header(), + snaptree: snaptree, }, nil } @@ -241,7 +241,7 @@ func (p *Pruner) Prune(root common.Hash) error { return err } if stateBloomRoot != (common.Hash{}) { - return RecoverPruning(p.datadir, p.db, p.trieCachePath) + return RecoverPruning(p.datadir, p.db) } // If the target state root is not specified, use the HEAD-127 as the // target. The reason for picking it is: @@ -265,7 +265,7 @@ func (p *Pruner) Prune(root common.Hash) error { // Ensure the root is really present. The weak assumption // is the presence of root can indicate the presence of the // entire trie. - if blob := rawdb.ReadTrieNode(p.db, root); len(blob) == 0 { + if !rawdb.HasLegacyTrieNode(p.db, root) { // The special case is for clique based networks(rinkeby, goerli // and some other private networks), it's possible that two // consecutive blocks will have same root. In this case snapshot @@ -279,7 +279,7 @@ func (p *Pruner) Prune(root common.Hash) error { // as the pruning target. var found bool for i := len(layers) - 2; i >= 2; i-- { - if blob := rawdb.ReadTrieNode(p.db, layers[i].Root()); len(blob) != 0 { + if !rawdb.HasLegacyTrieNode(p.db, layers[i].Root()) { root = layers[i].Root() found = true log.Info("Selecting middle-layer as the pruning target", "root", root, "depth", i) @@ -299,11 +299,6 @@ func (p *Pruner) Prune(root common.Hash) error { log.Info("Selecting user-specified state as the pruning target", "root", root) } } - // Before start the pruning, delete the clean trie cache first. - // It's necessary otherwise in the next restart we will hit the - // deleted state root in the "clean cache" so that the incomplete - // state is picked for usage. - deleteCleanTrieCache(p.trieCachePath) // All the state roots of the middle layer should be forcibly pruned, // otherwise the dangling state will be left. @@ -342,7 +337,7 @@ func (p *Pruner) Prune(root common.Hash) error { // pruning can be resumed. What's more if the bloom filter is constructed, the // pruning **has to be resumed**. Otherwise a lot of dangling nodes may be left // in the disk. -func RecoverPruning(datadir string, db ethdb.Database, trieCachePath string) error { +func RecoverPruning(datadir string, db ethdb.Database) error { stateBloomPath, stateBloomRoot, err := findBloomFilter(datadir) if err != nil { return err @@ -362,7 +357,9 @@ func RecoverPruning(datadir string, db ethdb.Database, trieCachePath string) err // - The state HEAD is rewound already because of multiple incomplete `prune-state` // In this case, even the state HEAD is not exactly matched with snapshot, it // still feasible to recover the pruning correctly. - snaptree, err := snapshot.New(db, trie.NewDatabase(db), 256, headBlock.Root(), false, false, true) + // Offline pruning is only supported in legacy hash based scheme. + triedb := trie.NewDatabase(db, trie.HashDefaults) + snaptree, err := snapshot.New(db, triedb, 256, headBlock.Root(), false, false, true) if err != nil { return err // The relevant snapshot(s) might not exist } @@ -372,12 +369,6 @@ func RecoverPruning(datadir string, db ethdb.Database, trieCachePath string) err } log.Info("Loaded state bloom filter", "path", stateBloomPath) - // Before start the pruning, delete the clean trie cache first. - // It's necessary otherwise in the next restart we will hit the - // deleted state root in the "clean cache" so that the incomplete - // state is picked for usage. - deleteCleanTrieCache(trieCachePath) - // All the state roots of the middle layers should be forcibly pruned, // otherwise the dangling state will be left. var ( @@ -410,11 +401,14 @@ func extractGenesis(db ethdb.Database, stateBloom *stateBloom) error { if genesis == nil { return errors.New("missing genesis block") } - t, err := trie.NewSecure(genesis.Root(), trie.NewDatabase(db)) + t, err := trie.NewSecure(trie.StateTrieID(genesis.Root()), trie.NewDatabase(db, trie.HashDefaults)) + if err != nil { + return err + } + accIter, err := t.NodeIterator(nil) if err != nil { return err } - accIter := t.NodeIterator(nil) for accIter.Next(true) { hash := accIter.Hash() @@ -430,11 +424,14 @@ func extractGenesis(db ethdb.Database, stateBloom *stateBloom) error { return err } if acc.Root != emptyRoot { - storageTrie, err := trie.NewSecure(acc.Root, trie.NewDatabase(db)) + storageTrie, err := trie.NewSecure(trie.StorageTrieID(genesis.Root(), common.BytesToHash(accIter.LeafKey()), acc.Root), trie.NewDatabase(db, trie.HashDefaults)) + if err != nil { + return err + } + storageIter, err := storageTrie.NodeIterator(nil) if err != nil { return err } - storageIter := storageTrie.NodeIterator(nil) for storageIter.Next(true) { hash := storageIter.Hash() if hash != (common.Hash{}) { diff --git a/core/state/snapshot/account.go b/core/state/snapshot/account.go deleted file mode 100644 index b92e942950..0000000000 --- a/core/state/snapshot/account.go +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2019 The go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see . - -package snapshot - -import ( - "bytes" - "math/big" - - "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/rlp" -) - -// Account is a modified version of a state.Account, where the root is replaced -// with a byte slice. This format can be used to represent full-consensus format -// or slim-snapshot format which replaces the empty root and code hash as nil -// byte slice. -type Account struct { - Nonce uint64 - Balance *big.Int - Root []byte - CodeHash []byte -} - -// SlimAccount converts a state.Account content into a slim snapshot account -func SlimAccount(nonce uint64, balance *big.Int, root common.Hash, codehash []byte) Account { - slim := Account{ - Nonce: nonce, - Balance: balance, - } - if root != emptyRoot { - slim.Root = root[:] - } - if !bytes.Equal(codehash, emptyCode[:]) { - slim.CodeHash = codehash - } - return slim -} - -// SlimAccountRLP converts a state.Account content into a slim snapshot -// version RLP encoded. -func SlimAccountRLP(nonce uint64, balance *big.Int, root common.Hash, codehash []byte) []byte { - data, err := rlp.EncodeToBytes(SlimAccount(nonce, balance, root, codehash)) - if err != nil { - panic(err) - } - return data -} - -// FullAccount decodes the data on the 'slim RLP' format and return -// the consensus format account. -func FullAccount(data []byte) (Account, error) { - var account Account - if err := rlp.DecodeBytes(data, &account); err != nil { - return Account{}, err - } - if len(account.Root) == 0 { - account.Root = emptyRoot[:] - } - if len(account.CodeHash) == 0 { - account.CodeHash = emptyCode[:] - } - return account, nil -} - -// FullAccountRLP converts data on the 'slim RLP' format into the full RLP-format. -func FullAccountRLP(data []byte) ([]byte, error) { - account, err := FullAccount(data) - if err != nil { - return nil, err - } - return rlp.EncodeToBytes(account) -} diff --git a/core/state/snapshot/conversion.go b/core/state/snapshot/conversion.go index f70cbf1e68..03f6466f49 100644 --- a/core/state/snapshot/conversion.go +++ b/core/state/snapshot/conversion.go @@ -17,7 +17,6 @@ package snapshot import ( - "bytes" "encoding/binary" "errors" "fmt" @@ -28,6 +27,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rlp" @@ -43,7 +43,7 @@ type trieKV struct { type ( // trieGeneratorFn is the interface of trie generation which can // be implemented by different trie algorithm. - trieGeneratorFn func(db ethdb.KeyValueWriter, in chan (trieKV), out chan (common.Hash)) + trieGeneratorFn func(db ethdb.KeyValueWriter, scheme string, owner common.Hash, in chan (trieKV), out chan (common.Hash)) // leafCallbackFn is the callback invoked at the leaves of the trie, // returns the subtrie root with the specified subtrie identifier. @@ -52,12 +52,12 @@ type ( // GenerateAccountTrieRoot takes an account iterator and reproduces the root hash. func GenerateAccountTrieRoot(it AccountIterator) (common.Hash, error) { - return generateTrieRoot(nil, it, common.Hash{}, stackTrieGenerate, nil, newGenerateStats(), true) + return generateTrieRoot(nil, "", it, common.Hash{}, stackTrieGenerate, nil, newGenerateStats(), true) } // GenerateStorageTrieRoot takes a storage iterator and reproduces the root hash. func GenerateStorageTrieRoot(account common.Hash, it StorageIterator) (common.Hash, error) { - return generateTrieRoot(nil, it, account, stackTrieGenerate, nil, newGenerateStats(), true) + return generateTrieRoot(nil, "", it, account, stackTrieGenerate, nil, newGenerateStats(), true) } // GenerateTrie takes the whole snapshot tree as the input, traverses all the @@ -71,7 +71,8 @@ func GenerateTrie(snaptree *Tree, root common.Hash, src ethdb.Database, dst ethd } defer acctIt.Release() - got, err := generateTrieRoot(dst, acctIt, common.Hash{}, stackTrieGenerate, func(dst ethdb.KeyValueWriter, accountHash, codeHash common.Hash, stat *generateStats) (common.Hash, error) { + scheme := snaptree.triedb.Scheme() + got, err := generateTrieRoot(dst, scheme, acctIt, common.Hash{}, stackTrieGenerate, func(dst ethdb.KeyValueWriter, accountHash, codeHash common.Hash, stat *generateStats) (common.Hash, error) { // Migrate the code first, commit the contract code into the tmp db. if codeHash != emptyCode { code := rawdb.ReadCode(src, codeHash) @@ -87,7 +88,7 @@ func GenerateTrie(snaptree *Tree, root common.Hash, src ethdb.Database, dst ethd } defer storageIt.Release() - hash, err := generateTrieRoot(dst, storageIt, accountHash, stackTrieGenerate, nil, stat, false) + hash, err := generateTrieRoot(dst, scheme, storageIt, accountHash, stackTrieGenerate, nil, stat, false) if err != nil { return common.Hash{}, err } @@ -242,7 +243,7 @@ func runReport(stats *generateStats, stop chan bool) { // generateTrieRoot generates the trie hash based on the snapshot iterator. // It can be used for generating account trie, storage trie or even the // whole state which connects the accounts and the corresponding storages. -func generateTrieRoot(db ethdb.KeyValueWriter, it Iterator, account common.Hash, generatorFn trieGeneratorFn, leafCallback leafCallbackFn, stats *generateStats, report bool) (common.Hash, error) { +func generateTrieRoot(db ethdb.KeyValueWriter, scheme string, it Iterator, account common.Hash, generatorFn trieGeneratorFn, leafCallback leafCallbackFn, stats *generateStats, report bool) (common.Hash, error) { var ( in = make(chan trieKV) // chan to pass leaves out = make(chan common.Hash, 1) // chan to collect result @@ -253,7 +254,7 @@ func generateTrieRoot(db ethdb.KeyValueWriter, it Iterator, account common.Hash, wg.Add(1) go func() { defer wg.Done() - generatorFn(db, in, out) + generatorFn(db, scheme, account, in, out) }() // Spin up a go-routine for progress logging if report && stats != nil { @@ -299,7 +300,7 @@ func generateTrieRoot(db ethdb.KeyValueWriter, it Iterator, account common.Hash, fullData []byte ) if leafCallback == nil { - fullData, err = FullAccountRLP(it.(AccountIterator).Account()) + fullData, err = types.FullAccountRLP(it.(AccountIterator).Account()) if err != nil { return stop(err) } @@ -311,7 +312,7 @@ func generateTrieRoot(db ethdb.KeyValueWriter, it Iterator, account common.Hash, return stop(err) } // Fetch the next account and process it concurrently - account, err := FullAccount(it.(AccountIterator).Account()) + account, err := types.FullAccount(it.(AccountIterator).Account()) if err != nil { return stop(err) } @@ -321,7 +322,7 @@ func generateTrieRoot(db ethdb.KeyValueWriter, it Iterator, account common.Hash, results <- err return } - if !bytes.Equal(account.Root, subroot.Bytes()) { + if account.Root != subroot { results <- fmt.Errorf("invalid subroot(path %x), want %x, have %x", hash, account.Root, subroot) return } @@ -360,16 +361,18 @@ func generateTrieRoot(db ethdb.KeyValueWriter, it Iterator, account common.Hash, return stop(nil) } -func stackTrieGenerate(db ethdb.KeyValueWriter, in chan trieKV, out chan common.Hash) { - t := trie.NewStackTrie(db) +func stackTrieGenerate(db ethdb.KeyValueWriter, scheme string, owner common.Hash, in chan trieKV, out chan common.Hash) { + + options := trie.NewStackTrieOptions() + // Implement nodeWriter in case db is existed otherwise let it be nil. + if db != nil { + options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + rawdb.WriteTrieNode(db, owner, path, hash, blob, scheme) + }) + } + t := trie.NewStackTrie(options) for leaf := range in { t.TryUpdate(leaf.key[:], leaf.value) } - var root common.Hash - if db == nil { - root = t.Hash() - } else { - root, _ = t.Commit() - } - out <- root + out <- t.Commit() } diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go index 2d69c33355..2409ae1422 100644 --- a/core/state/snapshot/difflayer.go +++ b/core/state/snapshot/difflayer.go @@ -27,6 +27,7 @@ import ( "time" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/rlp" bloomfilter "github.com/holiman/bloomfilter/v2" ) @@ -269,7 +270,7 @@ func (dl *diffLayer) Stale() bool { // Account directly retrieves the account associated with a particular hash in // the snapshot slim data format. -func (dl *diffLayer) Account(hash common.Hash) (*Account, error) { +func (dl *diffLayer) Account(hash common.Hash) (*types.SlimAccount, error) { data, err := dl.AccountRLP(hash) if err != nil { return nil, err @@ -277,7 +278,7 @@ func (dl *diffLayer) Account(hash common.Hash) (*Account, error) { if len(data) == 0 { // can be both nil and []byte{} return nil, nil } - account := new(Account) + account := new(types.SlimAccount) if err := rlp.DecodeBytes(data, account); err != nil { panic(err) } diff --git a/core/state/snapshot/disklayer.go b/core/state/snapshot/disklayer.go index 7cbf6e293d..513f0f5aba 100644 --- a/core/state/snapshot/disklayer.go +++ b/core/state/snapshot/disklayer.go @@ -23,6 +23,7 @@ import ( "github.com/VictoriaMetrics/fastcache" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" @@ -65,7 +66,7 @@ func (dl *diskLayer) Stale() bool { // Account directly retrieves the account associated with a particular hash in // the snapshot slim data format. -func (dl *diskLayer) Account(hash common.Hash) (*Account, error) { +func (dl *diskLayer) Account(hash common.Hash) (*types.SlimAccount, error) { data, err := dl.AccountRLP(hash) if err != nil { return nil, err @@ -73,7 +74,7 @@ func (dl *diskLayer) Account(hash common.Hash) (*Account, error) { if len(data) == 0 { // can be both nil and []byte{} return nil, nil } - account := new(Account) + account := new(types.SlimAccount) if err := rlp.DecodeBytes(data, account); err != nil { panic(err) } diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 9d74ca4d9b..d8ecba8746 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -21,7 +21,6 @@ import ( "encoding/binary" "errors" "fmt" - "math/big" "time" "github.com/VictoriaMetrics/fastcache" @@ -29,13 +28,14 @@ import ( "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethereum/go-ethereum/common/math" "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb" - "github.com/ethereum/go-ethereum/ethdb/memorydb" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/trienode" ) var ( @@ -248,7 +248,7 @@ func (result *proofResult) forEach(callback func(key []byte, val []byte) error) // // The proof result will be returned if the range proving is finished, otherwise // the error will be returned to abort the entire procedure. -func (dl *diskLayer) proveRange(stats *generatorStats, root common.Hash, prefix []byte, kind string, origin []byte, max int, valueConvertFn func([]byte) ([]byte, error)) (*proofResult, error) { +func (dl *diskLayer) proveRange(stats *generatorStats, trieID *trie.ID, prefix []byte, kind string, origin []byte, max int, valueConvertFn func([]byte) ([]byte, error)) (*proofResult, error) { var ( keys [][]byte vals [][]byte @@ -305,6 +305,7 @@ func (dl *diskLayer) proveRange(stats *generatorStats, root common.Hash, prefix }(time.Now()) // The snap state is exhausted, pass the entire key/val set for verification + root := trieID.Root if origin == nil && !diskMore { stackTr := trie.NewStackTrie(nil) for i, key := range keys { @@ -320,7 +321,7 @@ func (dl *diskLayer) proveRange(stats *generatorStats, root common.Hash, prefix return &proofResult{keys: keys, vals: vals}, nil } // Snap state is chunked, generate edge proofs for verification. - tr, err := trie.New(root, dl.triedb) + tr, err := trie.New(trieID, dl.triedb) if err != nil { stats.Log("Trie missing, state snapshotting paused", dl.root, dl.genMarker) return nil, errMissingTrie @@ -381,9 +382,9 @@ type onStateCallback func(key []byte, val []byte, write bool, delete bool) error // generateRange generates the state segment with particular prefix. Generation can // either verify the correctness of existing state through rangeproof and skip // generation, or iterate trie to regenerate state on demand. -func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string, origin []byte, max int, stats *generatorStats, onState onStateCallback, valueConvertFn func([]byte) ([]byte, error)) (bool, []byte, error) { +func (dl *diskLayer) generateRange(trieID *trie.ID, prefix []byte, kind string, origin []byte, max int, stats *generatorStats, onState onStateCallback, valueConvertFn func([]byte) ([]byte, error)) (bool, []byte, error) { // Use range prover to check the validity of the flat state in the range - result, err := dl.proveRange(stats, root, prefix, kind, origin, max, valueConvertFn) + result, err := dl.proveRange(stats, trieID, prefix, kind, origin, max, valueConvertFn) if err != nil { return false, nil, err } @@ -428,20 +429,29 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string, // We use the snap data to build up a cache which can be used by the // main account trie as a primary lookup when resolving hashes - var snapNodeCache ethdb.KeyValueStore + var resolver trie.NodeResolver if len(result.keys) > 0 { - snapNodeCache = memorydb.New() - snapTrieDb := trie.NewDatabase(snapNodeCache) - snapTrie, _ := trie.New(common.Hash{}, snapTrieDb) + mdb := rawdb.NewMemoryDatabase() + tdb := trie.NewDatabase(mdb, trie.HashDefaults) + snapTrie := trie.NewEmpty(tdb) + defer tdb.Close() for i, key := range result.keys { snapTrie.Update(key, result.vals[i]) } - root, _, _ := snapTrie.Commit(nil) - snapTrieDb.Commit(root, false, nil) + root, nodes, err := snapTrie.Commit(false) + + if err != nil && nodes != nil { + tdb.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) + tdb.Commit(root, false) + } + resolver = func(owner common.Hash, path []byte, hash common.Hash) []byte { + return rawdb.ReadTrieNode(mdb, owner, path, hash, tdb.Scheme()) // Read the TrieNode based on scheme + } } + tr := result.tr if tr == nil { - tr, err = trie.New(root, dl.triedb) + tr, err = trie.New(trieID, dl.triedb) if err != nil { stats.Log("Trie missing, state snapshotting paused", dl.root, dl.genMarker) return false, nil, errMissingTrie @@ -450,7 +460,7 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string, var ( trieMore bool - nodeIt = tr.NodeIterator(origin) + nodeIt = tr.MustNodeIterator(origin) iter = trie.NewIterator(nodeIt) kvkeys, kvvals = result.keys, result.vals @@ -465,7 +475,7 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string, start = time.Now() internal time.Duration ) - nodeIt.AddResolver(snapNodeCache) + nodeIt.AddResolver(resolver) for iter.Next() { if last != nil && bytes.Compare(iter.Key, last) > 0 { trieMore = true @@ -524,7 +534,7 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string, } else { snapAccountTrieReadCounter.Inc((time.Since(start) - internal).Nanoseconds()) } - logger.Debug("Regenerated state range", "root", root, "last", hexutil.Encode(last), + logger.Debug("Regenerated state range", "root", trieID.Root, "last", hexutil.Encode(last), "count", count, "created", created, "updated", updated, "untouched", untouched, "deleted", deleted) // If there are either more trie items, or there are more snap items @@ -611,12 +621,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { return nil } // Retrieve the current account and flatten it into the internal format - var acc struct { - Nonce uint64 - Balance *big.Int - Root common.Hash - CodeHash []byte - } + var acc types.StateAccount if err := rlp.DecodeBytes(val, &acc); err != nil { log.Crit("Invalid account encountered during snapshot creation", "err", err) } @@ -632,7 +637,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { } snapRecoveredAccountMeter.Mark(1) } else { - data := SlimAccountRLP(acc.Nonce, acc.Balance, acc.Root, acc.CodeHash) + data := types.SlimAccountRLP(acc) dataLen = len(data) rawdb.WriteAccountSnapshot(batch, accountHash, data) snapGeneratedAccountMeter.Mark(1) @@ -698,7 +703,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { } var storeOrigin = common.CopyBytes(storeMarker) for { - exhausted, last, err := dl.generateRange(acc.Root, append(rawdb.SnapshotStoragePrefix, accountHash.Bytes()...), "storage", storeOrigin, storageCheckRange, stats, onStorage, nil) + exhausted, last, err := dl.generateRange(trie.StorageTrieID(dl.Root(), accountHash, acc.Root), append(rawdb.SnapshotStoragePrefix, accountHash.Bytes()...), "storage", storeOrigin, storageCheckRange, stats, onStorage, nil) if err != nil { return err } @@ -717,7 +722,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { // Global loop for regerating the entire state trie + all layered storage tries. for { - exhausted, last, err := dl.generateRange(dl.root, rawdb.SnapshotAccountPrefix, "account", accOrigin, accountRange, stats, onAccount, FullAccountRLP) + exhausted, last, err := dl.generateRange(trie.StateTrieID(dl.root), rawdb.SnapshotAccountPrefix, "account", accOrigin, accountRange, stats, onAccount, types.FullAccountRLP) // The procedure it aborted, either by external signal or internal error if err != nil { if abort == nil { // aborted by internal error, wait the signal diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index 582da6a2e7..caf13e42bb 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -25,48 +25,50 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/ethdb" - "github.com/ethereum/go-ethereum/ethdb/memorydb" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/triedb/hashdb" + "github.com/ethereum/go-ethereum/trie/triedb/pathdb" + "github.com/ethereum/go-ethereum/trie/trienode" "golang.org/x/crypto/sha3" ) +func hashData(input []byte) common.Hash { + var hasher = sha3.NewLegacyKeccak256() + var hash common.Hash + hasher.Reset() + hasher.Write(input) + hasher.Sum(hash[:0]) + return hash +} + // Tests that snapshot generation from an empty database. func TestGeneration(t *testing.T) { + testGeneration(t, rawdb.HashScheme) + testGeneration(t, rawdb.PathScheme) +} + +func testGeneration(t *testing.T, scheme string) { // We can't use statedb to make a test trie (circular dependency), so make // a fake one manually. We're going with a small account trie of 3 accounts, // two of which also has the same 3-slot storage trie attached. - var ( - diskdb = memorydb.New() - triedb = trie.NewDatabase(diskdb) - ) - stTrie, _ := trie.NewSecure(common.Hash{}, triedb) - stTrie.Update([]byte("key-1"), []byte("val-1")) // 0x1314700b81afc49f94db3623ef1df38f3ed18b73a1b7ea2f6c095118cf6118a0 - stTrie.Update([]byte("key-2"), []byte("val-2")) // 0x18a0f4d79cff4459642dd7604f303886ad9d77c30cf3d7d7cedb3a693ab6d371 - stTrie.Update([]byte("key-3"), []byte("val-3")) // 0x51c71a47af0695957647fb68766d0becee77e953df17c29b3c2f25436f055c78 - stTrie.Commit(nil) // Root: 0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67 - - accTrie, _ := trie.NewSecure(common.Hash{}, triedb) - acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} - val, _ := rlp.EncodeToBytes(acc) - accTrie.Update([]byte("acc-1"), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e + var helper = newHelper(scheme) + stRoot := helper.makeStorageTrie(common.Hash{}, common.Hash{}, []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, false) - acc = &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} - val, _ = rlp.EncodeToBytes(acc) - accTrie.Update([]byte("acc-2"), val) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 + helper.addTrieAccount("acc-1", &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.addTrieAccount("acc-2", &types.StateAccount{Balance: big.NewInt(2), Root: emptyRoot, CodeHash: emptyCode.Bytes()}) + helper.addTrieAccount("acc-3", &types.StateAccount{Balance: big.NewInt(3), Root: stRoot, CodeHash: emptyCode.Bytes()}) - acc = &Account{Balance: big.NewInt(3), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} - val, _ = rlp.EncodeToBytes(acc) - accTrie.Update([]byte("acc-3"), val) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 - root, _, _ := accTrie.Commit(nil) // Root: 0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd - triedb.Commit(root, false, nil) + helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-1")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + root, snap := helper.CommitAndGenerate() if have, want := root, common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"); have != want { t.Fatalf("have %#x want %#x", have, want) } - snap := generateSnapshot(diskdb, triedb, 16, root) select { case <-snap.genPending: // Snapshot generation succeeded @@ -75,63 +77,39 @@ func TestGeneration(t *testing.T) { t.Errorf("Snapshot generation failed") } checkSnapRoot(t, snap, root) + // Signal abortion to the generator and wait for it to tear down stop := make(chan *generatorStats) snap.genAbort <- stop <-stop } -func hashData(input []byte) common.Hash { - var hasher = sha3.NewLegacyKeccak256() - var hash common.Hash - hasher.Reset() - hasher.Write(input) - hasher.Sum(hash[:0]) - return hash -} - // Tests that snapshot generation with existent flat state. func TestGenerateExistentState(t *testing.T) { + testGenerateExistentState(t, rawdb.HashScheme) + testGenerateExistentState(t, rawdb.PathScheme) +} + +func testGenerateExistentState(t *testing.T, scheme string) { // We can't use statedb to make a test trie (circular dependency), so make // a fake one manually. We're going with a small account trie of 3 accounts, // two of which also has the same 3-slot storage trie attached. - var ( - diskdb = memorydb.New() - triedb = trie.NewDatabase(diskdb) - ) - stTrie, _ := trie.NewSecure(common.Hash{}, triedb) - stTrie.Update([]byte("key-1"), []byte("val-1")) // 0x1314700b81afc49f94db3623ef1df38f3ed18b73a1b7ea2f6c095118cf6118a0 - stTrie.Update([]byte("key-2"), []byte("val-2")) // 0x18a0f4d79cff4459642dd7604f303886ad9d77c30cf3d7d7cedb3a693ab6d371 - stTrie.Update([]byte("key-3"), []byte("val-3")) // 0x51c71a47af0695957647fb68766d0becee77e953df17c29b3c2f25436f055c78 - stTrie.Commit(nil) // Root: 0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67 - - accTrie, _ := trie.NewSecure(common.Hash{}, triedb) - acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} - val, _ := rlp.EncodeToBytes(acc) - accTrie.Update([]byte("acc-1"), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e - rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-1")), val) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-1")), []byte("val-1")) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-2")), []byte("val-2")) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-3")), []byte("val-3")) - - acc = &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} - val, _ = rlp.EncodeToBytes(acc) - accTrie.Update([]byte("acc-2"), val) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 - diskdb.Put(hashData([]byte("acc-2")).Bytes(), val) - rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-2")), val) - - acc = &Account{Balance: big.NewInt(3), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} - val, _ = rlp.EncodeToBytes(acc) - accTrie.Update([]byte("acc-3"), val) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 - rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-3")), val) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-3")), hashData([]byte("key-1")), []byte("val-1")) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-3")), hashData([]byte("key-2")), []byte("val-2")) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-3")), hashData([]byte("key-3")), []byte("val-3")) - - root, _, _ := accTrie.Commit(nil) // Root: 0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd - triedb.Commit(root, false, nil) - - snap := generateSnapshot(diskdb, triedb, 16, root) + var helper = newHelper(scheme) + + stRoot := helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-1")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addTrieAccount("acc-1", &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.addSnapAccount("acc-1", &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.addSnapStorage("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) + + helper.addTrieAccount("acc-2", &types.StateAccount{Balance: big.NewInt(2), Root: emptyRoot, CodeHash: emptyCode.Bytes()}) + helper.addSnapAccount("acc-2", &types.StateAccount{Balance: big.NewInt(2), Root: emptyRoot, CodeHash: emptyCode.Bytes()}) + + stRoot = helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addTrieAccount("acc-3", &types.StateAccount{Balance: big.NewInt(3), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.addSnapAccount("acc-3", &types.StateAccount{Balance: big.NewInt(3), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.addSnapStorage("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) + + root, snap := helper.CommitAndGenerate() select { case <-snap.genPending: // Snapshot generation succeeded @@ -140,6 +118,7 @@ func TestGenerateExistentState(t *testing.T) { t.Errorf("Snapshot generation failed") } checkSnapRoot(t, snap, root) + // Signal abortion to the generator and wait for it to tear down stop := make(chan *generatorStats) snap.genAbort <- stop @@ -150,18 +129,17 @@ func checkSnapRoot(t *testing.T, snap *diskLayer, trieRoot common.Hash) { t.Helper() accIt := snap.AccountIterator(common.Hash{}) defer accIt.Release() - snapRoot, err := generateTrieRoot(nil, accIt, common.Hash{}, stackTrieGenerate, + snapRoot, err := generateTrieRoot(nil, "", accIt, common.Hash{}, stackTrieGenerate, func(db ethdb.KeyValueWriter, accountHash, codeHash common.Hash, stat *generateStats) (common.Hash, error) { storageIt, _ := snap.StorageIterator(accountHash, common.Hash{}) defer storageIt.Release() - hash, err := generateTrieRoot(nil, storageIt, accountHash, stackTrieGenerate, nil, stat, false) + hash, err := generateTrieRoot(nil, "", storageIt, accountHash, stackTrieGenerate, nil, stat, false) if err != nil { return common.Hash{}, err } return hash, nil }, newGenerateStats(), true) - if err != nil { t.Fatal(err) } @@ -171,34 +149,42 @@ func checkSnapRoot(t *testing.T, snap *diskLayer, trieRoot common.Hash) { } type testHelper struct { - diskdb *memorydb.Database + diskdb ethdb.Database triedb *trie.Database accTrie *trie.SecureTrie + nodes *trienode.MergedNodeSet } -func newHelper() *testHelper { - diskdb := memorydb.New() - triedb := trie.NewDatabase(diskdb) - accTrie, _ := trie.NewSecure(common.Hash{}, triedb) +func newHelper(scheme string) *testHelper { + diskdb := rawdb.NewMemoryDatabase() + config := &trie.Config{} + if scheme == rawdb.PathScheme { + config.PathDB = &pathdb.Config{} // disable caching + } else { + config.HashDB = &hashdb.Config{} // disable caching + } + triedb := trie.NewDatabase(diskdb, config) + accTrie, _ := trie.NewSecure(trie.StateTrieID(common.Hash{}), triedb) return &testHelper{ diskdb: diskdb, triedb: triedb, accTrie: accTrie, + nodes: trienode.NewMergedNodeSet(), } } -func (t *testHelper) addTrieAccount(acckey string, acc *Account) { +func (t *testHelper) addTrieAccount(acckey string, acc *types.StateAccount) { val, _ := rlp.EncodeToBytes(acc) t.accTrie.Update([]byte(acckey), val) } -func (t *testHelper) addSnapAccount(acckey string, acc *Account) { +func (t *testHelper) addSnapAccount(acckey string, acc *types.StateAccount) { val, _ := rlp.EncodeToBytes(acc) key := hashData([]byte(acckey)) rawdb.WriteAccountSnapshot(t.diskdb, key, val) } -func (t *testHelper) addAccount(acckey string, acc *Account) { +func (t *testHelper) addAccount(acckey string, acc *types.StateAccount) { t.addTrieAccount(acckey, acc) t.addSnapAccount(acckey, acc) } @@ -210,18 +196,33 @@ func (t *testHelper) addSnapStorage(accKey string, keys []string, vals []string) } } -func (t *testHelper) makeStorageTrie(keys []string, vals []string) []byte { - stTrie, _ := trie.NewSecure(common.Hash{}, t.triedb) +func (t *testHelper) makeStorageTrie(stateRoot, owner common.Hash, keys []string, vals []string, commit bool) common.Hash { + stTrie, _ := trie.NewSecure(trie.StorageTrieID(stateRoot, owner, common.Hash{}), t.triedb) for i, k := range keys { stTrie.Update([]byte(k), []byte(vals[i])) } - root, _, _ := stTrie.Commit(nil) - return root.Bytes() + if !commit { + return stTrie.Hash() + } + root, nodes, _ := stTrie.Commit(false) + if nodes != nil { + t.nodes.Merge(nodes) + } + return root +} + +func (t *testHelper) Commit() common.Hash { + root, nodes, _ := t.accTrie.Commit(true) + if nodes != nil { + t.nodes.Merge(nodes) + } + t.triedb.Update(root, types.EmptyRootHash, 0, t.nodes, nil) + t.triedb.Commit(root, false) + return root } -func (t *testHelper) Generate() (common.Hash, *diskLayer) { - root, _, _ := t.accTrie.Commit(nil) - t.triedb.Commit(root, false, nil) +func (t *testHelper) CommitAndGenerate() (common.Hash, *diskLayer) { + root := t.Commit() snap := generateSnapshot(t.diskdb, t.triedb, 16, root) return root, snap } @@ -234,75 +235,92 @@ func (t *testHelper) Generate() (common.Hash, *diskLayer) { // - miss in the beginning // - miss in the middle // - miss in the end +// // - the contract(non-empty storage) has wrong storage slots // - wrong slots in the beginning // - wrong slots in the middle // - wrong slots in the end +// // - the contract(non-empty storage) has extra storage slots // - extra slots in the beginning // - extra slots in the middle // - extra slots in the end func TestGenerateExistentStateWithWrongStorage(t *testing.T) { - helper := newHelper() - stRoot := helper.makeStorageTrie([]string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) + testGenerateExistentStateWithWrongStorage(t, rawdb.HashScheme) + testGenerateExistentStateWithWrongStorage(t, rawdb.PathScheme) +} + +func testGenerateExistentStateWithWrongStorage(t *testing.T, scheme string) { + helper := newHelper(scheme) // Account one, empty root but non-empty database - helper.addAccount("acc-1", &Account{Balance: big.NewInt(1), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()}) + helper.addAccount("acc-1", &types.StateAccount{Balance: big.NewInt(1), Root: emptyRoot, CodeHash: emptyCode.Bytes()}) helper.addSnapStorage("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) // Account two, non empty root but empty database - helper.addAccount("acc-2", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + stRoot := helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-2")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addAccount("acc-2", &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) // Miss slots { // Account three, non empty root but misses slots in the beginning - helper.addAccount("acc-3", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addAccount("acc-3", &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) helper.addSnapStorage("acc-3", []string{"key-2", "key-3"}, []string{"val-2", "val-3"}) // Account four, non empty root but misses slots in the middle - helper.addAccount("acc-4", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-4")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addAccount("acc-4", &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) helper.addSnapStorage("acc-4", []string{"key-1", "key-3"}, []string{"val-1", "val-3"}) // Account five, non empty root but misses slots in the end - helper.addAccount("acc-5", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-5")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addAccount("acc-5", &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) helper.addSnapStorage("acc-5", []string{"key-1", "key-2"}, []string{"val-1", "val-2"}) } // Wrong storage slots { // Account six, non empty root but wrong slots in the beginning - helper.addAccount("acc-6", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-6")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addAccount("acc-6", &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) helper.addSnapStorage("acc-6", []string{"key-1", "key-2", "key-3"}, []string{"badval-1", "val-2", "val-3"}) // Account seven, non empty root but wrong slots in the middle - helper.addAccount("acc-7", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-7")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addAccount("acc-7", &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) helper.addSnapStorage("acc-7", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "badval-2", "val-3"}) // Account eight, non empty root but wrong slots in the end - helper.addAccount("acc-8", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-8")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addAccount("acc-8", &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) helper.addSnapStorage("acc-8", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "badval-3"}) // Account 9, non empty root but rotated slots - helper.addAccount("acc-9", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-9")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addAccount("acc-9", &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) helper.addSnapStorage("acc-9", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-3", "val-2"}) } // Extra storage slots { // Account 10, non empty root but extra slots in the beginning - helper.addAccount("acc-10", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-10")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addAccount("acc-10", &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) helper.addSnapStorage("acc-10", []string{"key-0", "key-1", "key-2", "key-3"}, []string{"val-0", "val-1", "val-2", "val-3"}) // Account 11, non empty root but extra slots in the middle - helper.addAccount("acc-11", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-11")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addAccount("acc-11", &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) helper.addSnapStorage("acc-11", []string{"key-1", "key-2", "key-2-1", "key-3"}, []string{"val-1", "val-2", "val-2-1", "val-3"}) // Account 12, non empty root but extra slots in the end - helper.addAccount("acc-12", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-12")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addAccount("acc-12", &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) helper.addSnapStorage("acc-12", []string{"key-1", "key-2", "key-3", "key-4"}, []string{"val-1", "val-2", "val-3", "val-4"}) } - root, snap := helper.Generate() + root, snap := helper.CommitAndGenerate() t.Logf("Root: %#x\n", root) // Root = 0x8746cce9fd9c658b2cfd639878ed6584b7a2b3e73bb40f607fcfa156002429a0 select { @@ -325,36 +343,47 @@ func TestGenerateExistentStateWithWrongStorage(t *testing.T) { // - wrong accounts // - extra accounts func TestGenerateExistentStateWithWrongAccounts(t *testing.T) { - helper := newHelper() - stRoot := helper.makeStorageTrie([]string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) + + testGenerateExistentStateWithWrongAccounts(t, rawdb.HashScheme) + testGenerateExistentStateWithWrongAccounts(t, rawdb.PathScheme) +} + +func testGenerateExistentStateWithWrongAccounts(t *testing.T, scheme string) { + helper := newHelper(scheme) + + helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-1")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-2")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-4")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + stRoot := helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-6")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) // Trie accounts [acc-1, acc-2, acc-3, acc-4, acc-6] // Extra accounts [acc-0, acc-5, acc-7] // Missing accounts, only in the trie { - helper.addTrieAccount("acc-1", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) // Beginning - helper.addTrieAccount("acc-4", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) // Middle - helper.addTrieAccount("acc-6", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) // End + helper.addTrieAccount("acc-1", &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) // Beginning + helper.addTrieAccount("acc-4", &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) // Middle + helper.addTrieAccount("acc-6", &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) // End } // Wrong accounts { - helper.addTrieAccount("acc-2", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) - helper.addSnapAccount("acc-2", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: common.Hex2Bytes("0x1234")}) + helper.addTrieAccount("acc-2", &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.addSnapAccount("acc-2", &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: common.Hex2Bytes("0x1234")}) - helper.addTrieAccount("acc-3", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) - helper.addSnapAccount("acc-3", &Account{Balance: big.NewInt(1), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()}) + helper.addTrieAccount("acc-3", &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.addSnapAccount("acc-3", &types.StateAccount{Balance: big.NewInt(1), Root: emptyRoot, CodeHash: emptyCode.Bytes()}) } // Extra accounts, only in the snap { - helper.addSnapAccount("acc-0", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyRoot.Bytes()}) // before the beginning - helper.addSnapAccount("acc-5", &Account{Balance: big.NewInt(1), Root: emptyRoot.Bytes(), CodeHash: common.Hex2Bytes("0x1234")}) // Middle - helper.addSnapAccount("acc-7", &Account{Balance: big.NewInt(1), Root: emptyRoot.Bytes(), CodeHash: emptyRoot.Bytes()}) // after the end + helper.addSnapAccount("acc-0", &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyRoot.Bytes()}) // before the beginning + helper.addSnapAccount("acc-5", &types.StateAccount{Balance: big.NewInt(1), Root: emptyRoot, CodeHash: common.Hex2Bytes("0x1234")}) // Middle + helper.addSnapAccount("acc-7", &types.StateAccount{Balance: big.NewInt(1), Root: emptyRoot, CodeHash: emptyRoot.Bytes()}) // after the end } - root, snap := helper.Generate() + root, snap := helper.CommitAndGenerate() t.Logf("Root: %#x\n", root) // Root = 0x825891472281463511e7ebcc7f109e4f9200c20fa384754e11fd605cd98464e8 select { @@ -375,32 +404,30 @@ func TestGenerateExistentStateWithWrongAccounts(t *testing.T) { // Tests that snapshot generation errors out correctly in case of a missing trie // node in the account trie. func TestGenerateCorruptAccountTrie(t *testing.T) { + + testGenerateCorruptAccountTrie(t, rawdb.HashScheme) + testGenerateCorruptAccountTrie(t, rawdb.PathScheme) +} + +func testGenerateCorruptAccountTrie(t *testing.T, scheme string) { // We can't use statedb to make a test trie (circular dependency), so make // a fake one manually. We're going with a small account trie of 3 accounts, // without any storage slots to keep the test smaller. - var ( - diskdb = memorydb.New() - triedb = trie.NewDatabase(diskdb) - ) - tr, _ := trie.NewSecure(common.Hash{}, triedb) - acc := &Account{Balance: big.NewInt(1), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} - val, _ := rlp.EncodeToBytes(acc) - tr.Update([]byte("acc-1"), val) // 0xc7a30f39aff471c95d8a837497ad0e49b65be475cc0953540f80cfcdbdcd9074 + helper := newHelper(scheme) + + helper.addTrieAccount("acc-1", &types.StateAccount{Balance: big.NewInt(1), Root: emptyRoot, CodeHash: emptyCode.Bytes()}) // 0xc7a30f39aff471c95d8a837497ad0e49b65be475cc0953540f80cfcdbdcd9074 + helper.addTrieAccount("acc-2", &types.StateAccount{Balance: big.NewInt(2), Root: emptyRoot, CodeHash: emptyCode.Bytes()}) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 + helper.addTrieAccount("acc-3", &types.StateAccount{Balance: big.NewInt(3), Root: emptyRoot, CodeHash: emptyCode.Bytes()}) // 0x19ead688e907b0fab07176120dceec244a72aff2f0aa51e8b827584e378772f4 - acc = &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} - val, _ = rlp.EncodeToBytes(acc) - tr.Update([]byte("acc-2"), val) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 + root := helper.Commit() // Root: 0xa04693ea110a31037fb5ee814308a6f1d76bdab0b11676bdf4541d2de55ba978 - acc = &Account{Balance: big.NewInt(3), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} - val, _ = rlp.EncodeToBytes(acc) - tr.Update([]byte("acc-3"), val) // 0x19ead688e907b0fab07176120dceec244a72aff2f0aa51e8b827584e378772f4 - tr.Commit(nil) // Root: 0xa04693ea110a31037fb5ee814308a6f1d76bdab0b11676bdf4541d2de55ba978 + // Delete an account trie node and ensure the generator chokes + targetPath := []byte{0xc} + targetHash := common.HexToHash("0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7") - // Delete an account trie leaf and ensure the generator chokes - triedb.Commit(common.HexToHash("0xa04693ea110a31037fb5ee814308a6f1d76bdab0b11676bdf4541d2de55ba978"), false, nil) - diskdb.Delete(common.HexToHash("0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7").Bytes()) + rawdb.DeleteTrieNode(helper.diskdb, common.Hash{}, targetPath, targetHash, scheme) - snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xa04693ea110a31037fb5ee814308a6f1d76bdab0b11676bdf4541d2de55ba978")) + snap := generateSnapshot(helper.diskdb, helper.triedb, 16, root) select { case <-snap.genPending: // Snapshot generation succeeded @@ -419,48 +446,32 @@ func TestGenerateCorruptAccountTrie(t *testing.T) { // trie node for a storage trie. It's similar to internal corruption but it is // handled differently inside the generator. func TestGenerateMissingStorageTrie(t *testing.T) { + testGenerateMissingStorageTrie(t, rawdb.HashScheme) + testGenerateMissingStorageTrie(t, rawdb.PathScheme) +} + +func testGenerateMissingStorageTrie(t *testing.T, scheme string) { // We can't use statedb to make a test trie (circular dependency), so make // a fake one manually. We're going with a small account trie of 3 accounts, // two of which also has the same 3-slot storage trie attached. var ( - diskdb = memorydb.New() - triedb = trie.NewDatabase(diskdb) + acc1 = hashData([]byte("acc-1")) + acc3 = hashData([]byte("acc-3")) + helper = newHelper(scheme) ) - stTrie, _ := trie.NewSecure(common.Hash{}, triedb) - stTrie.Update([]byte("key-1"), []byte("val-1")) // 0x1314700b81afc49f94db3623ef1df38f3ed18b73a1b7ea2f6c095118cf6118a0 - stTrie.Update([]byte("key-2"), []byte("val-2")) // 0x18a0f4d79cff4459642dd7604f303886ad9d77c30cf3d7d7cedb3a693ab6d371 - stTrie.Update([]byte("key-3"), []byte("val-3")) // 0x51c71a47af0695957647fb68766d0becee77e953df17c29b3c2f25436f055c78 - stTrie.Commit(nil) // Root: 0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67 - - accTrie, _ := trie.NewSecure(common.Hash{}, triedb) - acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} - val, _ := rlp.EncodeToBytes(acc) - accTrie.Update([]byte("acc-1"), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e - acc = &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} - val, _ = rlp.EncodeToBytes(acc) - accTrie.Update([]byte("acc-2"), val) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 + stRoot := helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-1")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) // 0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67 + helper.addTrieAccount("acc-1", &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e + helper.addTrieAccount("acc-2", &types.StateAccount{Balance: big.NewInt(2), Root: emptyRoot, CodeHash: emptyCode.Bytes()}) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 + stRoot = helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addTrieAccount("acc-3", &types.StateAccount{Balance: big.NewInt(3), Root: stRoot, CodeHash: emptyCode.Bytes()}) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 + root := helper.Commit() - acc = &Account{Balance: big.NewInt(3), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} - val, _ = rlp.EncodeToBytes(acc) - accTrie.Update([]byte("acc-3"), val) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 - accTrie.Commit(nil) // Root: 0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd + // Delete storage trie root of account one and three. + rawdb.DeleteTrieNode(helper.diskdb, acc1, nil, stRoot, scheme) + rawdb.DeleteTrieNode(helper.diskdb, acc3, nil, stRoot, scheme) - // We can only corrupt the disk database, so flush the tries out - triedb.Reference( - common.HexToHash("0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67"), - common.HexToHash("0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e"), - ) - triedb.Reference( - common.HexToHash("0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67"), - common.HexToHash("0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2"), - ) - triedb.Commit(common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"), false, nil) - - // Delete a storage trie root and ensure the generator chokes - diskdb.Delete(common.HexToHash("0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67").Bytes()) - - snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd")) + snap := generateSnapshot(helper.diskdb, helper.triedb, 16, root) select { case <-snap.genPending: // Snapshot generation succeeded @@ -478,48 +489,31 @@ func TestGenerateMissingStorageTrie(t *testing.T) { // Tests that snapshot generation errors out correctly in case of a missing trie // node in a storage trie. func TestGenerateCorruptStorageTrie(t *testing.T) { + + testGenerateCorruptStorageTrie(t, rawdb.HashScheme) + testGenerateCorruptStorageTrie(t, rawdb.PathScheme) +} + +func testGenerateCorruptStorageTrie(t *testing.T, scheme string) { // We can't use statedb to make a test trie (circular dependency), so make // a fake one manually. We're going with a small account trie of 3 accounts, // two of which also has the same 3-slot storage trie attached. - var ( - diskdb = memorydb.New() - triedb = trie.NewDatabase(diskdb) - ) - stTrie, _ := trie.NewSecure(common.Hash{}, triedb) - stTrie.Update([]byte("key-1"), []byte("val-1")) // 0x1314700b81afc49f94db3623ef1df38f3ed18b73a1b7ea2f6c095118cf6118a0 - stTrie.Update([]byte("key-2"), []byte("val-2")) // 0x18a0f4d79cff4459642dd7604f303886ad9d77c30cf3d7d7cedb3a693ab6d371 - stTrie.Update([]byte("key-3"), []byte("val-3")) // 0x51c71a47af0695957647fb68766d0becee77e953df17c29b3c2f25436f055c78 - stTrie.Commit(nil) // Root: 0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67 - - accTrie, _ := trie.NewSecure(common.Hash{}, triedb) - acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} - val, _ := rlp.EncodeToBytes(acc) - accTrie.Update([]byte("acc-1"), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e - - acc = &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} - val, _ = rlp.EncodeToBytes(acc) - accTrie.Update([]byte("acc-2"), val) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 - - acc = &Account{Balance: big.NewInt(3), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} - val, _ = rlp.EncodeToBytes(acc) - accTrie.Update([]byte("acc-3"), val) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 - accTrie.Commit(nil) // Root: 0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd - - // We can only corrupt the disk database, so flush the tries out - triedb.Reference( - common.HexToHash("0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67"), - common.HexToHash("0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e"), - ) - triedb.Reference( - common.HexToHash("0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67"), - common.HexToHash("0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2"), - ) - triedb.Commit(common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"), false, nil) - - // Delete a storage trie leaf and ensure the generator chokes - diskdb.Delete(common.HexToHash("0x18a0f4d79cff4459642dd7604f303886ad9d77c30cf3d7d7cedb3a693ab6d371").Bytes()) - - snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd")) + helper := newHelper(scheme) + + stRoot := helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-1")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) // 0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67 + helper.addTrieAccount("acc-1", &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e + helper.addTrieAccount("acc-2", &types.StateAccount{Balance: big.NewInt(2), Root: emptyRoot, CodeHash: emptyCode.Bytes()}) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 + stRoot = helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addTrieAccount("acc-3", &types.StateAccount{Balance: big.NewInt(3), Root: stRoot, CodeHash: emptyCode.Bytes()}) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 + + root := helper.Commit() + // Delete a node in the storage trie. + targetPath := []byte{0x4} + targetHash := common.HexToHash("0x18a0f4d79cff4459642dd7604f303886ad9d77c30cf3d7d7cedb3a693ab6d371") + rawdb.DeleteTrieNode(helper.diskdb, hashData([]byte("acc-1")), targetPath, targetHash, scheme) + rawdb.DeleteTrieNode(helper.diskdb, hashData([]byte("acc-3")), targetPath, targetHash, scheme) + + snap := generateSnapshot(helper.diskdb, helper.triedb, 16, root) select { case <-snap.genPending: // Snapshot generation succeeded @@ -534,56 +528,56 @@ func TestGenerateCorruptStorageTrie(t *testing.T) { <-stop } -func getStorageTrie(n int, triedb *trie.Database) *trie.SecureTrie { - stTrie, _ := trie.NewSecure(common.Hash{}, triedb) - for i := 0; i < n; i++ { - k := fmt.Sprintf("key-%d", i) - v := fmt.Sprintf("val-%d", i) - stTrie.Update([]byte(k), []byte(v)) - } - stTrie.Commit(nil) - return stTrie -} - // Tests that snapshot generation when an extra account with storage exists in the snap state. func TestGenerateWithExtraAccounts(t *testing.T) { - var ( - diskdb = memorydb.New() - triedb = trie.NewDatabase(diskdb) - stTrie = getStorageTrie(5, triedb) - ) - accTrie, _ := trie.NewSecure(common.Hash{}, triedb) - { // Account one in the trie - acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + testGenerateWithExtraAccounts(t, rawdb.HashScheme) + testGenerateWithExtraAccounts(t, rawdb.PathScheme) +} + +func testGenerateWithExtraAccounts(t *testing.T, scheme string) { + helper := newHelper(scheme) + { + // Account one in the trie + stRoot := helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-1")), + []string{"key-1", "key-2", "key-3", "key-4", "key-5"}, + []string{"val-1", "val-2", "val-3", "val-4", "val-5"}, + true, + ) + acc := &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()} val, _ := rlp.EncodeToBytes(acc) - accTrie.Update([]byte("acc-1"), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e + helper.accTrie.Update([]byte("acc-1"), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e + // Identical in the snap key := hashData([]byte("acc-1")) - rawdb.WriteAccountSnapshot(diskdb, key, val) - rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-1")), []byte("val-1")) - rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-2")), []byte("val-2")) - rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-3")), []byte("val-3")) - rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-4")), []byte("val-4")) - rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-5")), []byte("val-5")) - } - { // Account two exists only in the snapshot - acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + rawdb.WriteAccountSnapshot(helper.triedb.DiskDB(), key, val) + rawdb.WriteStorageSnapshot(helper.triedb.DiskDB(), key, hashData([]byte("key-1")), []byte("val-1")) + rawdb.WriteStorageSnapshot(helper.triedb.DiskDB(), key, hashData([]byte("key-2")), []byte("val-2")) + rawdb.WriteStorageSnapshot(helper.triedb.DiskDB(), key, hashData([]byte("key-3")), []byte("val-3")) + rawdb.WriteStorageSnapshot(helper.triedb.DiskDB(), key, hashData([]byte("key-4")), []byte("val-4")) + rawdb.WriteStorageSnapshot(helper.triedb.DiskDB(), key, hashData([]byte("key-5")), []byte("val-5")) + } + { + // Account two exists only in the snapshot + stRoot := helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-2")), + []string{"key-1", "key-2", "key-3", "key-4", "key-5"}, + []string{"val-1", "val-2", "val-3", "val-4", "val-5"}, + true, + ) + acc := &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()} val, _ := rlp.EncodeToBytes(acc) key := hashData([]byte("acc-2")) - rawdb.WriteAccountSnapshot(diskdb, key, val) - rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("b-key-1")), []byte("b-val-1")) - rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("b-key-2")), []byte("b-val-2")) - rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("b-key-3")), []byte("b-val-3")) - } - root, _, _ := accTrie.Commit(nil) - t.Logf("root: %x", root) - triedb.Commit(root, false, nil) + rawdb.WriteAccountSnapshot(helper.triedb.DiskDB(), key, val) + rawdb.WriteStorageSnapshot(helper.triedb.DiskDB(), key, hashData([]byte("b-key-1")), []byte("b-val-1")) + rawdb.WriteStorageSnapshot(helper.triedb.DiskDB(), key, hashData([]byte("b-key-2")), []byte("b-val-2")) + rawdb.WriteStorageSnapshot(helper.triedb.DiskDB(), key, hashData([]byte("b-key-3")), []byte("b-val-3")) + } + root := helper.Commit() + // To verify the test: If we now inspect the snap db, there should exist extraneous storage items - if data := rawdb.ReadStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("b-key-1"))); data == nil { + if data := rawdb.ReadStorageSnapshot(helper.triedb.DiskDB(), hashData([]byte("acc-2")), hashData([]byte("b-key-1"))); data == nil { t.Fatalf("expected snap storage to exist") } - - snap := generateSnapshot(diskdb, triedb, 16, root) + snap := generateSnapshot(helper.diskdb, helper.triedb, 16, root) select { case <-snap.genPending: // Snapshot generation succeeded @@ -592,12 +586,13 @@ func TestGenerateWithExtraAccounts(t *testing.T) { t.Errorf("Snapshot generation failed") } checkSnapRoot(t, snap, root) + // Signal abortion to the generator and wait for it to tear down stop := make(chan *generatorStats) snap.genAbort <- stop <-stop // If we now inspect the snap db, there should exist no extraneous storage items - if data := rawdb.ReadStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("b-key-1"))); data != nil { + if data := rawdb.ReadStorageSnapshot(helper.triedb.DiskDB(), hashData([]byte("acc-2")), hashData([]byte("b-key-1"))); data != nil { t.Fatalf("expected slot to be removed, got %v", string(data)) } } @@ -608,40 +603,44 @@ func enableLogging() { // Tests that snapshot generation when an extra account with storage exists in the snap state. func TestGenerateWithManyExtraAccounts(t *testing.T) { + testGenerateWithManyExtraAccounts(t, rawdb.HashScheme) + testGenerateWithManyExtraAccounts(t, rawdb.PathScheme) +} + +func testGenerateWithManyExtraAccounts(t *testing.T, scheme string) { if false { enableLogging() } - var ( - diskdb = memorydb.New() - triedb = trie.NewDatabase(diskdb) - stTrie = getStorageTrie(3, triedb) - ) - accTrie, _ := trie.NewSecure(common.Hash{}, triedb) - { // Account one in the trie - acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + helper := newHelper(scheme) + { + // Account one in the trie + stRoot := helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-1")), + []string{"key-1", "key-2", "key-3"}, + []string{"val-1", "val-2", "val-3"}, + true, + ) + acc := &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()} val, _ := rlp.EncodeToBytes(acc) - accTrie.Update([]byte("acc-1"), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e + helper.accTrie.Update([]byte("acc-1"), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e + // Identical in the snap key := hashData([]byte("acc-1")) - rawdb.WriteAccountSnapshot(diskdb, key, val) - rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-1")), []byte("val-1")) - rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-2")), []byte("val-2")) - rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-3")), []byte("val-3")) + rawdb.WriteAccountSnapshot(helper.diskdb, key, val) + rawdb.WriteStorageSnapshot(helper.diskdb, key, hashData([]byte("key-1")), []byte("val-1")) + rawdb.WriteStorageSnapshot(helper.diskdb, key, hashData([]byte("key-2")), []byte("val-2")) + rawdb.WriteStorageSnapshot(helper.diskdb, key, hashData([]byte("key-3")), []byte("val-3")) } - { // 100 accounts exist only in snapshot + { + // 100 accounts exist only in snapshot for i := 0; i < 1000; i++ { - //acc := &Account{Balance: big.NewInt(int64(i)), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} - acc := &Account{Balance: big.NewInt(int64(i)), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} + //acc := &types.StateAccount{Balance: big.NewInt(int64(i)), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + acc := &types.StateAccount{Balance: big.NewInt(int64(i)), Root: emptyRoot, CodeHash: emptyCode.Bytes()} val, _ := rlp.EncodeToBytes(acc) key := hashData([]byte(fmt.Sprintf("acc-%d", i))) - rawdb.WriteAccountSnapshot(diskdb, key, val) + rawdb.WriteAccountSnapshot(helper.diskdb, key, val) } } - root, _, _ := accTrie.Commit(nil) - t.Logf("root: %x", root) - triedb.Commit(root, false, nil) - - snap := generateSnapshot(diskdb, triedb, 16, root) + root, snap := helper.CommitAndGenerate() select { case <-snap.genPending: // Snapshot generation succeeded @@ -666,35 +665,31 @@ func TestGenerateWithManyExtraAccounts(t *testing.T) { // So in trie, we iterate 2 entries 0x03, 0x07. We create the 0x07 in the database and abort the procedure, because the trie is exhausted. // But in the database, we still have the stale storage slots 0x04, 0x05. They are not iterated yet, but the procedure is finished. func TestGenerateWithExtraBeforeAndAfter(t *testing.T) { + testGenerateWithExtraBeforeAndAfter(t, rawdb.HashScheme) + testGenerateWithExtraBeforeAndAfter(t, rawdb.PathScheme) +} + +func testGenerateWithExtraBeforeAndAfter(t *testing.T, scheme string) { accountCheckRange = 3 if false { enableLogging() } - var ( - diskdb = memorydb.New() - triedb = trie.NewDatabase(diskdb) - ) - accTrie, _ := trie.New(common.Hash{}, triedb) + helper := newHelper(scheme) { - acc := &Account{Balance: big.NewInt(1), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} + acc := &types.StateAccount{Balance: big.NewInt(1), Root: emptyRoot, CodeHash: emptyCode.Bytes()} val, _ := rlp.EncodeToBytes(acc) - accTrie.Update(common.HexToHash("0x03").Bytes(), val) - accTrie.Update(common.HexToHash("0x07").Bytes(), val) - - rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x01"), val) - rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x02"), val) - rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x03"), val) - rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x04"), val) - rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x05"), val) - rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x06"), val) - rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x07"), val) - } - - root, _, _ := accTrie.Commit(nil) - t.Logf("root: %x", root) - triedb.Commit(root, false, nil) - - snap := generateSnapshot(diskdb, triedb, 16, root) + helper.accTrie.Update(common.HexToHash("0x03").Bytes(), val) + helper.accTrie.Update(common.HexToHash("0x07").Bytes(), val) + + rawdb.WriteAccountSnapshot(helper.diskdb, common.HexToHash("0x01"), val) + rawdb.WriteAccountSnapshot(helper.diskdb, common.HexToHash("0x02"), val) + rawdb.WriteAccountSnapshot(helper.diskdb, common.HexToHash("0x03"), val) + rawdb.WriteAccountSnapshot(helper.diskdb, common.HexToHash("0x04"), val) + rawdb.WriteAccountSnapshot(helper.diskdb, common.HexToHash("0x05"), val) + rawdb.WriteAccountSnapshot(helper.diskdb, common.HexToHash("0x06"), val) + rawdb.WriteAccountSnapshot(helper.diskdb, common.HexToHash("0x07"), val) + } + root, snap := helper.CommitAndGenerate() select { case <-snap.genPending: // Snapshot generation succeeded @@ -712,33 +707,29 @@ func TestGenerateWithExtraBeforeAndAfter(t *testing.T) { // TestGenerateWithMalformedSnapdata tests what happes if we have some junk // in the snapshot database, which cannot be parsed back to an account func TestGenerateWithMalformedSnapdata(t *testing.T) { + testGenerateWithMalformedSnapdata(t, rawdb.HashScheme) + testGenerateWithMalformedSnapdata(t, rawdb.PathScheme) +} + +func testGenerateWithMalformedSnapdata(t *testing.T, scheme string) { accountCheckRange = 3 if false { enableLogging() } - var ( - diskdb = memorydb.New() - triedb = trie.NewDatabase(diskdb) - ) - accTrie, _ := trie.New(common.Hash{}, triedb) + helper := newHelper(scheme) { - acc := &Account{Balance: big.NewInt(1), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} + acc := &types.StateAccount{Balance: big.NewInt(1), Root: emptyRoot, CodeHash: emptyCode.Bytes()} val, _ := rlp.EncodeToBytes(acc) - accTrie.Update(common.HexToHash("0x03").Bytes(), val) + helper.accTrie.Update(common.HexToHash("0x03").Bytes(), val) junk := make([]byte, 100) copy(junk, []byte{0xde, 0xad}) - rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x02"), junk) - rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x03"), junk) - rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x04"), junk) - rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x05"), junk) + rawdb.WriteAccountSnapshot(helper.diskdb, common.HexToHash("0x02"), junk) + rawdb.WriteAccountSnapshot(helper.diskdb, common.HexToHash("0x03"), junk) + rawdb.WriteAccountSnapshot(helper.diskdb, common.HexToHash("0x04"), junk) + rawdb.WriteAccountSnapshot(helper.diskdb, common.HexToHash("0x05"), junk) } - - root, _, _ := accTrie.Commit(nil) - t.Logf("root: %x", root) - triedb.Commit(root, false, nil) - - snap := generateSnapshot(diskdb, triedb, 16, root) + root, snap := helper.CommitAndGenerate() select { case <-snap.genPending: // Snapshot generation succeeded @@ -752,23 +743,28 @@ func TestGenerateWithMalformedSnapdata(t *testing.T) { snap.genAbort <- stop <-stop // If we now inspect the snap db, there should exist no extraneous storage items - if data := rawdb.ReadStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("b-key-1"))); data != nil { + if data := rawdb.ReadStorageSnapshot(helper.diskdb, hashData([]byte("acc-2")), hashData([]byte("b-key-1"))); data != nil { t.Fatalf("expected slot to be removed, got %v", string(data)) } } func TestGenerateFromEmptySnap(t *testing.T) { + testGenerateFromEmptySnap(t, rawdb.HashScheme) + testGenerateFromEmptySnap(t, rawdb.PathScheme) +} + +func testGenerateFromEmptySnap(t *testing.T, scheme string) { //enableLogging() accountCheckRange = 10 storageCheckRange = 20 - helper := newHelper() - stRoot := helper.makeStorageTrie([]string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) + helper := newHelper(scheme) // Add 1K accounts to the trie for i := 0; i < 400; i++ { + stRoot := helper.makeStorageTrie(common.Hash{}, hashData([]byte(fmt.Sprintf("acc-%d", i))), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addTrieAccount(fmt.Sprintf("acc-%d", i), - &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + &types.StateAccount{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) } - root, snap := helper.Generate() + root, snap := helper.CommitAndGenerate() t.Logf("Root: %#x\n", root) // Root: 0x6f7af6d2e1a1bf2b84a3beb3f8b64388465fbc1e274ca5d5d3fc787ca78f59e4 select { @@ -793,17 +789,22 @@ func TestGenerateFromEmptySnap(t *testing.T) { // This hits a case where the snap verification passes, but there are more elements in the trie // which we must also add. func TestGenerateWithIncompleteStorage(t *testing.T) { + testGenerateWithIncompleteStorage(t, rawdb.HashScheme) + testGenerateWithIncompleteStorage(t, rawdb.PathScheme) +} + +func testGenerateWithIncompleteStorage(t *testing.T, scheme string) { storageCheckRange = 4 - helper := newHelper() + helper := newHelper(scheme) stKeys := []string{"1", "2", "3", "4", "5", "6", "7", "8"} stVals := []string{"v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8"} - stRoot := helper.makeStorageTrie(stKeys, stVals) // We add 8 accounts, each one is missing exactly one of the storage slots. This means // we don't have to order the keys and figure out exactly which hash-key winds up // on the sensitive spots at the boundaries for i := 0; i < 8; i++ { accKey := fmt.Sprintf("acc-%d", i) - helper.addAccount(accKey, &Account{Balance: big.NewInt(int64(i)), Root: stRoot, CodeHash: emptyCode.Bytes()}) + stRoot := helper.makeStorageTrie(common.Hash{}, hashData([]byte(accKey)), stKeys, stVals, true) + helper.addAccount(accKey, &types.StateAccount{Balance: big.NewInt(int64(i)), Root: stRoot, CodeHash: emptyCode.Bytes()}) var moddedKeys []string var moddedVals []string for ii := 0; ii < 8; ii++ { @@ -814,8 +815,7 @@ func TestGenerateWithIncompleteStorage(t *testing.T) { } helper.addSnapStorage(accKey, moddedKeys, moddedVals) } - - root, snap := helper.Generate() + root, snap := helper.CommitAndGenerate() t.Logf("Root: %#x\n", root) // Root: 0xca73f6f05ba4ca3024ef340ef3dfca8fdabc1b677ff13f5a9571fd49c16e67ff select { diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go index 6ee6b06bb5..267a2ffd83 100644 --- a/core/state/snapshot/snapshot.go +++ b/core/state/snapshot/snapshot.go @@ -26,6 +26,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/metrics" @@ -103,7 +104,7 @@ type Snapshot interface { // Account directly retrieves the account associated with a particular hash in // the snapshot slim data format. - Account(hash common.Hash) (*Account, error) + Account(hash common.Hash) (*types.SlimAccount, error) // AccountRLP directly retrieves the account RLP associated with a particular // hash in the snapshot slim data format. @@ -179,10 +180,10 @@ type Tree struct { // If the memory layers in the journal do not match the disk layer (e.g. there is // a gap) or the journal is missing, there are two repair cases: // -// - if the 'recovery' parameter is true, all memory diff-layers will be discarded. -// This case happens when the snapshot is 'ahead' of the state trie. -// - otherwise, the entire snapshot is considered invalid and will be recreated on -// a background thread. +// - if the 'recovery' parameter is true, all memory diff-layers will be discarded. +// This case happens when the snapshot is 'ahead' of the state trie. +// - otherwise, the entire snapshot is considered invalid and will be recreated on +// a background thread. func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, async bool, rebuild bool, recovery bool) (*Tree, error) { // Create a new, empty snapshot tree snap := &Tree{ @@ -767,14 +768,14 @@ func (t *Tree) Verify(root common.Hash) error { } defer acctIt.Release() - got, err := generateTrieRoot(nil, acctIt, common.Hash{}, stackTrieGenerate, func(db ethdb.KeyValueWriter, accountHash, codeHash common.Hash, stat *generateStats) (common.Hash, error) { + got, err := generateTrieRoot(nil, "", acctIt, common.Hash{}, stackTrieGenerate, func(db ethdb.KeyValueWriter, accountHash, codeHash common.Hash, stat *generateStats) (common.Hash, error) { storageIt, err := t.StorageIterator(root, accountHash, common.Hash{}) if err != nil { return common.Hash{}, err } defer storageIt.Release() - hash, err := generateTrieRoot(nil, storageIt, accountHash, stackTrieGenerate, nil, stat, false) + hash, err := generateTrieRoot(nil, "", storageIt, accountHash, stackTrieGenerate, nil, stat, false) if err != nil { return common.Hash{}, err } diff --git a/core/state/snapshot/snapshot_test.go b/core/state/snapshot/snapshot_test.go index 87c46629d9..0daf4dfd78 100644 --- a/core/state/snapshot/snapshot_test.go +++ b/core/state/snapshot/snapshot_test.go @@ -27,6 +27,7 @@ import ( "github.com/VictoriaMetrics/fastcache" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/rlp" ) @@ -42,10 +43,10 @@ func randomHash() common.Hash { // randomAccount generates a random account and returns it RLP encoded. func randomAccount() []byte { root := randomHash() - a := Account{ + a := &types.StateAccount{ Balance: big.NewInt(rand.Int63()), Nonce: rand.Uint64(), - Root: root[:], + Root: root, CodeHash: emptyCode[:], } data, _ := rlp.EncodeToBytes(a) @@ -463,7 +464,7 @@ func TestReadStateDuringFlattening(t *testing.T) { snap := snaps.Snapshot(common.HexToHash("0xa3")) // Register the testing hook to access the state after flattening - var result = make(chan *Account) + var result = make(chan *types.SlimAccount) snaps.onFlatten = func() { // Spin up a thread to read the account from the pre-created // snapshot handler. It's expected to be blocked. diff --git a/core/state/state_object.go b/core/state/state_object.go index 22d90b8420..be5452b4aa 100644 --- a/core/state/state_object.go +++ b/core/state/state_object.go @@ -28,6 +28,7 @@ import ( "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/trienode" ) var emptyCodeHash = crypto.Keccak256(nil) @@ -61,13 +62,14 @@ func (s Storage) Copy() Storage { // // The usage pattern is as follows: // First you need to obtain a state object. -// Account values can be accessed and modified through the object. -// Finally, call CommitTrie to write the modified storage trie into a database. +// Account values as well as storages can be accessed and modified through the object. +// Finally, call commit to return the changes of storage trie and update account data. type stateObject struct { - address common.Address - addrHash common.Hash // hash of ethereum address of the account - data types.StateAccount + address common.Address // address of the account + addrHash common.Hash // hash of ethereum address of the account + data types.StateAccount // Account data with all mutations applied in the scope of block db *StateDB + origin *types.StateAccount // Account original data without any change applied, nil means it was not existent before // DB error. // State objects are used by the consensus core and VM which are @@ -80,17 +82,22 @@ type stateObject struct { trie Trie // storage trie, which becomes non-nil on first access code Code // contract bytecode, which gets set when code is loaded - originStorage Storage // Storage cache of original entries to dedup rewrites, reset for every transaction + originStorage Storage // Storage cache of original entries to dedup rewrites pendingStorage Storage // Storage entries that need to be flushed to disk, at the end of an entire block - dirtyStorage Storage // Storage entries that have been modified in the current transaction execution + dirtyStorage Storage // Storage entries that have been modified in the current transaction execution, reset for every transaction fakeStorage Storage // Fake storage which constructed by caller for debugging purpose. // Cache flags. - // When an object is marked self-destructed it will be delete from the trie - // during the "update" phase of the state transition. - dirtyCode bool // true if the code was updated + dirtyCode bool // true if the code was updated + + // Flag whether the account was marked as selfDestructed. The selfDestructed account + // is still accessible in the scope of same transaction. selfDestructed bool - deleted bool + + // Flag whether the account was marked as deleted. The selfDestructed account + // or the account is considered as empty will be marked as deleted at + // the end of transaction and no longer accessible anymore. + deleted bool // Flag whether the object was created in the current transaction created bool @@ -102,21 +109,19 @@ func (s *stateObject) empty() bool { } // newObject creates a state object. -func newObject(db *StateDB, address common.Address, data types.StateAccount) *stateObject { - if data.Balance == nil { - data.Balance = new(big.Int) - } - if data.CodeHash == nil { - data.CodeHash = emptyCodeHash - } - if data.Root == (common.Hash{}) { - data.Root = emptyRoot +func newObject(db *StateDB, address common.Address, acct *types.StateAccount) *stateObject { + // origin is supposed to not be changed directly but only be reassigned with the current state when committing + // so it's safe to use pointer here. + origin := acct + if acct == nil { + acct = types.NewEmptyStateAccount() } return &stateObject{ db: db, address: address, addrHash: crypto.Keccak256Hash(address[:]), - data: data, + origin: origin, + data: *acct, originStorage: make(Storage), pendingStorage: make(Storage), dirtyStorage: make(Storage), @@ -150,20 +155,20 @@ func (s *stateObject) touch() { } } -func (s *stateObject) getTrie(db Database) Trie { +func (s *stateObject) getTrie() Trie { if s.trie == nil { // Try fetching from prefetcher first // We don't prefetch empty tries if s.data.Root != emptyRoot && s.db.prefetcher != nil { // When the miner is creating the pending state, there is no // prefetcher - s.trie = s.db.prefetcher.trie(s.data.Root) + s.trie = s.db.prefetcher.trie(s.addrHash, s.data.Root) } if s.trie == nil { var err error - s.trie, err = db.OpenStorageTrie(s.addrHash, s.data.Root) + s.trie, err = s.db.db.OpenStorageTrie(s.db.originalRoot, s.addrHash, s.data.Root) if err != nil { - s.trie, _ = db.OpenStorageTrie(s.addrHash, common.Hash{}) + s.trie, _ = s.db.db.OpenStorageTrie(s.db.originalRoot, s.addrHash, common.Hash{}) s.setError(fmt.Errorf("can't create storage trie: %v", err)) } } @@ -172,7 +177,7 @@ func (s *stateObject) getTrie(db Database) Trie { } // GetState retrieves a value from the account storage trie. -func (s *stateObject) GetState(db Database, key common.Hash) common.Hash { +func (s *stateObject) GetState(key common.Hash) common.Hash { // If the fake storage is set, only lookup the state here(in the debugging mode) if s.fakeStorage != nil { return s.fakeStorage[key] @@ -183,11 +188,11 @@ func (s *stateObject) GetState(db Database, key common.Hash) common.Hash { return value } // Otherwise return the entry's original value - return s.GetCommittedState(db, key) + return s.GetCommittedState(key) } // GetCommittedState retrieves a value from the committed account storage trie. -func (s *stateObject) GetCommittedState(db Database, key common.Hash) common.Hash { +func (s *stateObject) GetCommittedState(key common.Hash) common.Hash { // If the fake storage is set, only lookup the state here(in the debugging mode) if s.fakeStorage != nil { return s.fakeStorage[key] @@ -226,7 +231,7 @@ func (s *stateObject) GetCommittedState(db Database, key common.Hash) common.Has // 1) resurrect happened, and new slot values were set -- those should // have been handles via pendingStorage above. // 2) we don't have new values, and can deliver empty response back - if _, destructed := s.db.snapDestructs[s.addrHash]; destructed { + if _, destructed := s.db.stateObjectsDestruct[s.address]; destructed { return common.Hash{} } enc, err = s.db.snap.Storage(s.addrHash, crypto.Keccak256Hash(key.Bytes())) @@ -242,7 +247,7 @@ func (s *stateObject) GetCommittedState(db Database, key common.Hash) common.Has if metrics.EnabledExpensive { meter = &s.db.StorageReads } - if enc, err = s.getTrie(db).TryGet(key.Bytes()); err != nil { + if enc, err = s.getTrie().TryGet(key.Bytes()); err != nil { s.setError(err) return common.Hash{} } @@ -260,14 +265,14 @@ func (s *stateObject) GetCommittedState(db Database, key common.Hash) common.Has } // SetState updates a value in account storage. -func (s *stateObject) SetState(db Database, key, value common.Hash) { +func (s *stateObject) SetState(key, value common.Hash) { // If the fake storage is set, put the temporary state update here. if s.fakeStorage != nil { s.fakeStorage[key] = value return } // If the new value is the same as old, don't set - prev := s.GetState(db, key) + prev := s.GetState(key) if prev == value { return } @@ -313,7 +318,7 @@ func (s *stateObject) finalise(prefetch bool) { } } if s.db.prefetcher != nil && prefetch && len(slotsToPrefetch) > 0 && s.data.Root != emptyRoot { - s.db.prefetcher.prefetch(s.data.Root, slotsToPrefetch) + s.db.prefetcher.prefetch(s.addrHash, s.data.Root, slotsToPrefetch) } if len(s.dirtyStorage) > 0 { s.dirtyStorage = make(Storage) @@ -322,7 +327,7 @@ func (s *stateObject) finalise(prefetch bool) { // updateTrie writes cached storage modifications into the object's storage trie. // It will return nil if the trie has not been loaded and no changes have been made -func (s *stateObject) updateTrie(db Database) Trie { +func (s *stateObject) updateTrie() Trie { // Make sure all dirty slots are finalized into the pending storage area s.finalise(false) // Don't prefetch anymore, pull directly if need be if len(s.pendingStorage) == 0 { @@ -333,9 +338,12 @@ func (s *stateObject) updateTrie(db Database) Trie { defer func(start time.Time) { s.db.StorageUpdates += time.Since(start) }(time.Now()) } // The snapshot storage map for the object - var storage map[common.Hash][]byte + var ( + storage map[common.Hash][]byte + origin map[common.Hash][]byte + ) // Insert all the pending updates into the trie - tr := s.getTrie(db) + tr := s.getTrie() hasher := s.db.hasher usedStorage := make([][]byte, 0, len(s.pendingStorage)) @@ -344,6 +352,7 @@ func (s *stateObject) updateTrie(db Database) Trie { if value == s.originStorage[key] { continue } + prev := s.originStorage[key] s.originStorage[key] = value var v []byte @@ -356,21 +365,39 @@ func (s *stateObject) updateTrie(db Database) Trie { s.setError(tr.TryUpdate(key[:], v)) s.db.StorageUpdated += 1 } - // If state snapshotting is active, cache the data til commit - if s.db.snap != nil { - if storage == nil { - // Retrieve the old storage map, if available, create a new one otherwise - if storage = s.db.snapStorage[s.addrHash]; storage == nil { - storage = make(map[common.Hash][]byte) - s.db.snapStorage[s.addrHash] = storage - } + // Cache the mutated storage slots until commit + if storage == nil { + // Retrieve the old storage map, if available, create a new one otherwise + if storage = s.db.storages[s.addrHash]; storage == nil { + storage = make(map[common.Hash][]byte) + s.db.storages[s.addrHash] = storage } - storage[crypto.HashData(hasher, key[:])] = v // v will be nil if it's deleted } + khash := crypto.HashData(hasher, key[:]) + storage[khash] = v // v will be nil if it's deleted + // Cache the original value of mutated storage slots + if origin == nil { + if origin = s.db.storagesOrigin[s.address]; origin == nil { + origin = make(map[common.Hash][]byte) + s.db.storagesOrigin[s.address] = origin + } + } + // Track the original value of slot only if it's mutated first time + if _, ok := origin[khash]; !ok { + if prev == (common.Hash{}) { + origin[khash] = nil // nil if it was not present previously + } else { + // Encoding []byte cannot fail, ok to ignore the error. + b, _ := rlp.EncodeToBytes(common.TrimLeftZeroes(prev[:])) + origin[khash] = b + } + } + + // Cache the items for preloading usedStorage = append(usedStorage, common.CopyBytes(key[:])) // Copy needed for closure } if s.db.prefetcher != nil { - s.db.prefetcher.used(s.data.Root, usedStorage) + s.db.prefetcher.used(s.addrHash, s.data.Root, usedStorage) } if len(s.pendingStorage) > 0 { s.pendingStorage = make(Storage) @@ -379,9 +406,9 @@ func (s *stateObject) updateTrie(db Database) Trie { } // UpdateRoot sets the trie root to the current root hash of -func (s *stateObject) updateRoot(db Database) { +func (s *stateObject) updateRoot() { // If nothing changed, don't bother with hashing anything - if s.updateTrie(db) == nil { + if s.updateTrie() == nil { return } // Track the amount of time wasted on hashing the storage trie @@ -391,25 +418,27 @@ func (s *stateObject) updateRoot(db Database) { s.data.Root = s.trie.Hash() } -// CommitTrie the storage trie of the object to db. -// This updates the trie root. -func (s *stateObject) CommitTrie(db Database) (int, error) { +// commit returns the changes made in storage trie and updates the account data. +func (s *stateObject) commit() (*trienode.NodeSet, error) { // If nothing changed, don't bother with hashing anything - if s.updateTrie(db) == nil { - return 0, nil + if s.updateTrie() == nil { + s.origin = s.data.Copy() // Update original account data after commit + return nil, nil } if s.dbErr != nil { - return 0, s.dbErr + return nil, s.dbErr } // Track the amount of time wasted on committing the storage trie if metrics.EnabledExpensive { defer func(start time.Time) { s.db.StorageCommits += time.Since(start) }(time.Now()) } - root, committed, err := s.trie.Commit(nil) + root, nodes, err := s.trie.Commit(false) if err == nil { s.data.Root = root } - return committed, err + // Update original account data after commit + s.origin = s.data.Copy() + return nodes, err } // AddBalance adds amount to s's balance. @@ -448,7 +477,13 @@ func (s *stateObject) setBalance(amount *big.Int) { } func (s *stateObject) deepCopy(db *StateDB) *stateObject { - stateObject := newObject(db, s.address, s.data) + stateObject := &stateObject{ + db: db, + address: s.address, + addrHash: s.addrHash, + origin: s.origin, + data: s.data, + } if s.trie != nil { stateObject.trie = db.db.CopyTrie(s.trie) } @@ -472,14 +507,14 @@ func (s *stateObject) Address() common.Address { } // Code returns the contract code associated with this object, if any. -func (s *stateObject) Code(db Database) []byte { +func (s *stateObject) Code() []byte { if s.code != nil { return s.code } if bytes.Equal(s.CodeHash(), emptyCodeHash) { return nil } - code, err := db.ContractCode(s.addrHash, common.BytesToHash(s.CodeHash())) + code, err := s.db.db.ContractCode(s.addrHash, common.BytesToHash(s.CodeHash())) if err != nil { s.setError(fmt.Errorf("can't load code hash %x: %v", s.CodeHash(), err)) } @@ -505,7 +540,7 @@ func (s *stateObject) CodeSize(db Database) int { } func (s *stateObject) SetCode(codeHash common.Hash, code []byte) { - prevcode := s.Code(s.db.db) + prevcode := s.Code() s.db.journal.append(codeChange{ account: &s.address, prevhash: s.CodeHash(), diff --git a/core/state/state_test.go b/core/state/state_test.go index 6868a78b15..a66e87de00 100644 --- a/core/state/state_test.go +++ b/core/state/state_test.go @@ -23,25 +23,28 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/trie" ) -type stateTest struct { +type stateEnv struct { db ethdb.Database state *StateDB } -func newStateTest() *stateTest { +func newStateEnv() *stateEnv { db := rawdb.NewMemoryDatabase() sdb, _ := New(common.Hash{}, NewDatabase(db), nil) - return &stateTest{db: db, state: sdb} + return &stateEnv{db: db, state: sdb} } func TestDump(t *testing.T) { db := rawdb.NewMemoryDatabase() - sdb, _ := New(common.Hash{}, NewDatabaseWithConfig(db, nil), nil) - s := &stateTest{db: db, state: sdb} + tdb := NewDatabaseWithConfig(db, &trie.Config{Preimages: true}) + sdb, _ := New(types.EmptyRootHash, tdb, nil) + s := &stateEnv{db: db, state: sdb} // generate a few entries obj1 := s.state.GetOrNewStateObject(common.BytesToAddress([]byte{0x01})) @@ -54,9 +57,11 @@ func TestDump(t *testing.T) { // write some of them to the trie s.state.updateStateObject(obj1) s.state.updateStateObject(obj2) - s.state.Commit(false) + s.state.Commit(0, false) + root, _ := s.state.Commit(0, false) // check that DumpToCollector contains the state objects that are in trie + s.state, _ = New(root, tdb, nil) got := string(s.state.Dump(nil)) want := `{ "root": "71edff0130dd2385947095001c73d9e28d862fc286fca2b922ca6f6f3cddfdd2", @@ -91,14 +96,14 @@ func TestDump(t *testing.T) { } func TestNull(t *testing.T) { - s := newStateTest() + s := newStateEnv() address := common.HexToAddress("0x823140710bf13990e4500136726d8b55") s.state.CreateAccount(address) //value := common.FromHex("0x823140710bf13990e4500136726d8b55") var value common.Hash s.state.SetState(address, common.Hash{}, value) - s.state.Commit(false) + s.state.Commit(0, false) if value := s.state.GetState(address, common.Hash{}); value != (common.Hash{}) { t.Errorf("expected empty current value, got %x", value) @@ -113,7 +118,7 @@ func TestSnapshot(t *testing.T) { var storageaddr common.Hash data1 := common.BytesToHash([]byte{42}) data2 := common.BytesToHash([]byte{43}) - s := newStateTest() + s := newStateEnv() // snapshot the genesis state genesis := s.state.Snapshot() @@ -144,7 +149,7 @@ func TestSnapshot(t *testing.T) { } func TestSnapshotEmpty(t *testing.T) { - s := newStateTest() + s := newStateEnv() s.state.RevertToSnapshot(s.state.Snapshot()) } @@ -170,7 +175,7 @@ func TestSnapshot2(t *testing.T) { so0.deleted = false state.setStateObject(so0) - root, _ := state.Commit(false) + root, _ := state.Commit(0, false) state, _ = New(root, state.db, state.snaps) // and one with deleted == true @@ -192,8 +197,8 @@ func TestSnapshot2(t *testing.T) { so0Restored := state.getStateObject(stateobjaddr0) // Update lazily-loaded values before comparing. - so0Restored.GetState(state.db, storageaddr) - so0Restored.Code(state.db) + so0Restored.GetState(storageaddr) + so0Restored.Code() // non-deleted is equal (restored) compareStateObjects(so0Restored, so0, t) diff --git a/core/state/statedb.go b/core/state/statedb.go index de593428bc..1d5ff3b369 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -34,6 +34,8 @@ import ( "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/trienode" + "github.com/ethereum/go-ethereum/trie/triestate" ) type revision struct { @@ -62,23 +64,36 @@ func (n *proofList) Delete(key []byte) error { // nested states. It's the general query interface to retrieve: // * Contracts // * Accounts +// +// Once the state is committed, tries cached in stateDB (including account +// trie, storage tries) will no longer be functional. A new state instance +// must be created with new root and updated database for accessing post- +// commit states. type StateDB struct { - db Database - prefetcher *triePrefetcher - originalRoot common.Hash // The pre-state root, before any changes were made - trie Trie - hasher crypto.KeccakState - - snaps *snapshot.Tree - snap snapshot.Snapshot - snapDestructs map[common.Hash]struct{} - snapAccounts map[common.Hash][]byte - snapStorage map[common.Hash]map[common.Hash][]byte + db Database + prefetcher *triePrefetcher + trie Trie + hasher crypto.KeccakState + + // originalRoot is the pre-state root, before any changes were made. + // It will be updated when the Commit is called. + originalRoot common.Hash + + snaps *snapshot.Tree + snap snapshot.Snapshot + + // These maps hold the state changes (including the corresponding + // original value) that occurred in this **block**. + accounts map[common.Hash][]byte // The mutated accounts in 'slim RLP' encoding + storages map[common.Hash]map[common.Hash][]byte // The mutated slots in prefix-zero trimmed rlp format + accountsOrigin map[common.Address][]byte // The original value of mutated accounts in 'slim RLP' encoding + storagesOrigin map[common.Address]map[common.Hash][]byte // The original value of mutated slots in prefix-zero trimmed rlp format // This map holds 'live' objects, which will get modified while processing a state transition. - stateObjects map[common.Address]*stateObject - stateObjectsPending map[common.Address]struct{} // State objects finalized but not yet written to the trie - stateObjectsDirty map[common.Address]struct{} // State objects modified in the current execution + stateObjects map[common.Address]*stateObject + stateObjectsPending map[common.Address]struct{} // State objects finalized but not yet written to the trie + stateObjectsDirty map[common.Address]struct{} // State objects modified in the current execution + stateObjectsDestruct map[common.Address]*types.StateAccount // State objects destructed in the block along with its previous value // DB error. // State objects are used by the consensus core and VM which are @@ -121,6 +136,7 @@ type StateDB struct { SnapshotAccountReads time.Duration SnapshotStorageReads time.Duration SnapshotCommits time.Duration + TrieDBCommits time.Duration AccountUpdated int StorageUpdated int @@ -135,26 +151,27 @@ func New(root common.Hash, db Database, snaps *snapshot.Tree) (*StateDB, error) return nil, err } sdb := &StateDB{ - db: db, - trie: tr, - originalRoot: root, - snaps: snaps, - stateObjects: make(map[common.Address]*stateObject), - stateObjectsPending: make(map[common.Address]struct{}), - stateObjectsDirty: make(map[common.Address]struct{}), - logs: make(map[common.Hash][]*types.Log), - preimages: make(map[common.Hash][]byte), - journal: newJournal(), - accessList: newAccessList(), - transientStorage: newTransientStorage(), - hasher: crypto.NewKeccakState(), + db: db, + trie: tr, + originalRoot: root, + snaps: snaps, + accounts: make(map[common.Hash][]byte), + storages: make(map[common.Hash]map[common.Hash][]byte), + accountsOrigin: make(map[common.Address][]byte), + storagesOrigin: make(map[common.Address]map[common.Hash][]byte), + stateObjects: make(map[common.Address]*stateObject), + stateObjectsPending: make(map[common.Address]struct{}), + stateObjectsDirty: make(map[common.Address]struct{}), + stateObjectsDestruct: make(map[common.Address]*types.StateAccount), + logs: make(map[common.Hash][]*types.Log), + preimages: make(map[common.Hash][]byte), + journal: newJournal(), + accessList: newAccessList(), + transientStorage: newTransientStorage(), + hasher: crypto.NewKeccakState(), } if sdb.snaps != nil { - if sdb.snap = sdb.snaps.Snapshot(root); sdb.snap != nil { - sdb.snapDestructs = make(map[common.Hash]struct{}) - sdb.snapAccounts = make(map[common.Hash][]byte) - sdb.snapStorage = make(map[common.Hash]map[common.Hash][]byte) - } + sdb.snap = sdb.snaps.Snapshot(root) } return sdb, nil } @@ -288,7 +305,7 @@ func (s *StateDB) TxIndex() int { func (s *StateDB) GetCode(addr common.Address) []byte { stateObject := s.getStateObject(addr) if stateObject != nil { - return stateObject.Code(s.db) + return stateObject.Code() } return nil } @@ -313,7 +330,7 @@ func (s *StateDB) GetCodeHash(addr common.Address) common.Hash { func (s *StateDB) GetState(addr common.Address, hash common.Hash) common.Hash { stateObject := s.getStateObject(addr) if stateObject != nil { - return stateObject.GetState(s.db, hash) + return stateObject.GetState(hash) } return common.Hash{} } @@ -345,7 +362,7 @@ func (s *StateDB) GetStorageProof(a common.Address, key common.Hash) ([][]byte, func (s *StateDB) GetCommittedState(addr common.Address, hash common.Hash) common.Hash { stateObject := s.getStateObject(addr) if stateObject != nil { - return stateObject.GetCommittedState(s.db, hash) + return stateObject.GetCommittedState(hash) } return common.Hash{} } @@ -363,8 +380,8 @@ func (s *StateDB) StorageTrie(addr common.Address) Trie { return nil } cpy := stateObject.deepCopy(s) - cpy.updateTrie(s.db) - return cpy.getTrie(s.db) + cpy.updateTrie() + return cpy.getTrie() } func (s *StateDB) HasSelfDestructed(addr common.Address) bool { @@ -419,13 +436,21 @@ func (s *StateDB) SetCode(addr common.Address, code []byte) { func (s *StateDB) SetState(addr common.Address, key, value common.Hash) { stateObject := s.GetOrNewStateObject(addr) if stateObject != nil { - stateObject.SetState(s.db, key, value) + stateObject.SetState(key, value) } } // SetStorage replaces the entire storage for the specified account with given // storage. This function should only be used for debugging. func (s *StateDB) SetStorage(addr common.Address, storage map[common.Hash]common.Hash) { + // SetStorage needs to wipe existing storage. We achieve this by pretending + // that the account self-destructed earlier in this block, by flagging + // it in stateObjectsDestruct. The effect of doing so is that storage lookups + // will not hit disk, since it is assumed that the disk-data is belonging + // to a previous incarnation of the object. + if _, ok := s.stateObjectsDestruct[addr]; !ok { + s.stateObjectsDestruct[addr] = nil + } stateObject := s.GetOrNewStateObject(addr) if stateObject != nil { stateObject.SetStorage(storage) @@ -509,12 +534,20 @@ func (s *StateDB) updateStateObject(obj *stateObject) { s.setError(fmt.Errorf("updateStateObject (%x) error: %v", addr[:], err)) } - // If state snapshotting is active, cache the data til commit. Note, this - // update mechanism is not symmetric to the deletion, because whereas it is - // enough to track account updates at commit time, deletions need tracking - // at transaction boundary level to ensure we capture state clearing. - if s.snap != nil { - s.snapAccounts[obj.addrHash] = snapshot.SlimAccountRLP(obj.data.Nonce, obj.data.Balance, obj.data.Root, obj.data.CodeHash) + // Cache the data until commit. Note, this update mechanism is not symmetric + // to the deletion, because whereas it is enough to track account updates + // at commit time, deletions need tracking at transaction boundary level to + // ensure we capture state clearing. + s.accounts[obj.addrHash] = types.SlimAccountRLP(obj.data) + // Track the original value of mutated account, nil means it was not present. + // Skip if it has been tracked (because updateStateObject may be called + // multiple times in a block). + if _, ok := s.accountsOrigin[obj.address]; !ok { + if obj.origin == nil { + s.accountsOrigin[obj.address] = nil + } else { + s.accountsOrigin[obj.address] = types.SlimAccountRLP(*obj.origin) + } } } @@ -559,7 +592,7 @@ func (s *StateDB) getDeletedStateObject(addr common.Address) *stateObject { if metrics.EnabledExpensive { defer func(start time.Time) { s.SnapshotAccountReads += time.Since(start) }(time.Now()) } - var acc *snapshot.Account + var acc *types.SlimAccount if acc, err = s.snap.Account(crypto.HashData(s.hasher, addr.Bytes())); err == nil { if acc == nil { return nil @@ -598,7 +631,7 @@ func (s *StateDB) getDeletedStateObject(addr common.Address) *stateObject { } } // Insert into the live set - obj := newObject(s, addr, *data) + obj := newObject(s, addr, data) s.setStateObject(obj) return obj } @@ -619,20 +652,38 @@ func (s *StateDB) GetOrNewStateObject(addr common.Address) *stateObject { // createObject creates a new state object. If there is an existing account with // the given address, it is overwritten and returned as the second return value. func (s *StateDB) createObject(addr common.Address) (newobj, prev *stateObject) { + // The original account should be marked as destructed and all cached + // account and storage data should be cleared as well. Note, it must + // be done here, otherwise the destruction event of original one will + // be lost. prev = s.getDeletedStateObject(addr) // Note, prev might have been deleted, we need that! var prevdestruct bool - if s.snap != nil && prev != nil { - _, prevdestruct = s.snapDestructs[prev.addrHash] + if prev != nil { + _, prevdestruct = s.stateObjectsDestruct[prev.address] if !prevdestruct { - s.snapDestructs[prev.addrHash] = struct{}{} + s.stateObjectsDestruct[prev.address] = prev.origin } } - newobj = newObject(s, addr, types.StateAccount{}) + newobj = newObject(s, addr, nil) if prev == nil { s.journal.append(createObjectChange{account: &addr}) } else { - s.journal.append(resetObjectChange{prev: prev, prevdestruct: prevdestruct}) + prevAccount, ok := s.accountsOrigin[prev.address] + s.journal.append(resetObjectChange{ + account: &addr, + prev: prev, + prevdestruct: prevdestruct, + prevAccount: s.accounts[prev.addrHash], + prevStorage: s.storages[prev.addrHash], + prevAccountOriginExist: ok, + prevAccountOrigin: prevAccount, + prevStorageOrigin: s.storagesOrigin[prev.address], + }) + delete(s.accounts, prev.addrHash) + delete(s.storages, prev.addrHash) + delete(s.accountsOrigin, prev.address) + delete(s.storagesOrigin, prev.address) } newobj.created = true @@ -661,15 +712,19 @@ func (s *StateDB) CreateAccount(addr common.Address) { } } -func (db *StateDB) ForEachStorage(addr common.Address, cb func(key, value common.Hash) bool) error { - so := db.getStateObject(addr) +func (s *StateDB) ForEachStorage(addr common.Address, cb func(key, value common.Hash) bool) error { + so := s.getStateObject(addr) if so == nil { return nil } - it := trie.NewIterator(so.getTrie(db.db).NodeIterator(nil)) + trieIt, err := so.getTrie().NodeIterator(nil) + if err != nil { + return err + } + it := trie.NewIterator(trieIt) for it.Next() { - key := common.BytesToHash(db.trie.GetKey(it.Key)) + key := common.BytesToHash(s.trie.GetKey(it.Key)) if value, dirty := so.dirtyStorage[key]; dirty { if !cb(key, value) { return nil @@ -707,17 +762,30 @@ func (s *StateDB) Blacklisted(contractAddr *common.Address, addr *common.Address func (s *StateDB) Copy() *StateDB { // Copy all the basic fields, initialize the memory ones state := &StateDB{ - db: s.db, - trie: s.db.CopyTrie(s.trie), - stateObjects: make(map[common.Address]*stateObject, len(s.journal.dirties)), - stateObjectsPending: make(map[common.Address]struct{}, len(s.stateObjectsPending)), - stateObjectsDirty: make(map[common.Address]struct{}, len(s.journal.dirties)), - refund: s.refund, - logs: make(map[common.Hash][]*types.Log, len(s.logs)), - logSize: s.logSize, - preimages: make(map[common.Hash][]byte, len(s.preimages)), - journal: newJournal(), - hasher: crypto.NewKeccakState(), + db: s.db, + trie: s.db.CopyTrie(s.trie), + originalRoot: s.originalRoot, + accounts: copySet(s.accounts), + storages: copy2DSet(s.storages), + accountsOrigin: copySet(s.accountsOrigin), + storagesOrigin: copy2DSet(s.storagesOrigin), + stateObjects: make(map[common.Address]*stateObject, len(s.journal.dirties)), + stateObjectsPending: make(map[common.Address]struct{}, len(s.stateObjectsPending)), + stateObjectsDirty: make(map[common.Address]struct{}, len(s.journal.dirties)), + stateObjectsDestruct: make(map[common.Address]*types.StateAccount, len(s.stateObjectsDestruct)), + refund: s.refund, + logs: make(map[common.Hash][]*types.Log, len(s.logs)), + logSize: s.logSize, + preimages: make(map[common.Hash][]byte, len(s.preimages)), + journal: newJournal(), + hasher: crypto.NewKeccakState(), + + // In order for the block producer to be able to use and make additions + // to the snapshot tree, we need to copy that as well. Otherwise, any + // block mined by ourselves will cause gaps in the tree, and force the + // miner to operate trie-backed only. + snaps: s.snaps, + snap: s.snap, } // Copy the dirty states, logs, and preimages for addr := range s.journal.dirties { @@ -750,6 +818,12 @@ func (s *StateDB) Copy() *StateDB { } state.stateObjectsDirty[addr] = struct{}{} } + + // Deep copy the destruction markers. + for addr, value := range s.stateObjectsDestruct { + state.stateObjectsDestruct[addr] = value + } + for hash, logs := range s.logs { cpy := make([]*types.Log, len(logs)) for i, l := range logs { @@ -776,31 +850,7 @@ func (s *StateDB) Copy() *StateDB { if s.prefetcher != nil { state.prefetcher = s.prefetcher.copy() } - if s.snaps != nil { - // In order for the miner to be able to use and make additions - // to the snapshot tree, we need to copy that aswell. - // Otherwise, any block mined by ourselves will cause gaps in the tree, - // and force the miner to operate trie-backed only - state.snaps = s.snaps - state.snap = s.snap - // deep copy needed - state.snapDestructs = make(map[common.Hash]struct{}) - for k, v := range s.snapDestructs { - state.snapDestructs[k] = v - } - state.snapAccounts = make(map[common.Hash][]byte) - for k, v := range s.snapAccounts { - state.snapAccounts[k] = v - } - state.snapStorage = make(map[common.Hash]map[common.Hash][]byte) - for k, v := range s.snapStorage { - temp := make(map[common.Hash][]byte) - for kk, vv := range v { - temp[kk] = vv - } - state.snapStorage[k] = temp - } - } + return state } @@ -852,15 +902,20 @@ func (s *StateDB) Finalise(deleteEmptyObjects bool) { if obj.selfDestructed || (deleteEmptyObjects && obj.empty()) { obj.deleted = true - // If state snapshotting is active, also mark the destruction there. + // We need to maintain account deletions explicitly (will remain + // set indefinitely). Note only the first occurred self-destruct + // event is tracked. + if _, ok := s.stateObjectsDestruct[obj.address]; !ok { + s.stateObjectsDestruct[obj.address] = obj.origin + } + // Note, we can't do this only at the end of a block because multiple // transactions within the same block might self destruct and then // ressurrect an account; but the snapshotter needs both events. - if s.snap != nil { - s.snapDestructs[obj.addrHash] = struct{}{} // We need to maintain account deletions explicitly (will remain set indefinitely) - delete(s.snapAccounts, obj.addrHash) // Clear out any previously updated account data (may be recreated via a ressurrect) - delete(s.snapStorage, obj.addrHash) // Clear out any previously updated storage data (may be recreated via a ressurrect) - } + delete(s.accounts, obj.addrHash) // Clear out any previously updated account data (may be recreated via a resurrect) + delete(s.storages, obj.addrHash) // Clear out any previously updated storage data (may be recreated via a resurrect) + delete(s.accountsOrigin, obj.address) // Clear out any previously updated account data (may be recreated via a resurrect) + delete(s.storagesOrigin, obj.address) // Clear out any previously updated storage data (may be recreated via a resurrect) } else { obj.finalise(true) // Prefetch slots in the background } @@ -874,7 +929,7 @@ func (s *StateDB) Finalise(deleteEmptyObjects bool) { addressesToPrefetch = append(addressesToPrefetch, common.CopyBytes(addr[:])) // Copy needed for closure } if s.prefetcher != nil && len(addressesToPrefetch) > 0 { - s.prefetcher.prefetch(s.originalRoot, addressesToPrefetch) + s.prefetcher.prefetch(common.Hash{}, s.originalRoot, addressesToPrefetch) } // Invalidate journal because reverting across transactions is not allowed. s.clearJournalAndRefund() @@ -908,14 +963,14 @@ func (s *StateDB) IntermediateRoot(deleteEmptyObjects bool) common.Hash { // to pull useful data from disk. for addr := range s.stateObjectsPending { if obj := s.stateObjects[addr]; !obj.deleted { - obj.updateRoot(s.db) + obj.updateRoot() } } // Now we're about to start to write changes to the trie. The trie is so far // _untouched_. We can check with the prefetcher, if it can give us a trie // which has the same root, but also has some content loaded into it. if prefetcher != nil { - if trie := prefetcher.trie(s.originalRoot); trie != nil { + if trie := prefetcher.trie(common.Hash{}, s.originalRoot); trie != nil { s.trie = trie } } @@ -931,7 +986,7 @@ func (s *StateDB) IntermediateRoot(deleteEmptyObjects bool) common.Hash { usedAddrs = append(usedAddrs, common.CopyBytes(addr[:])) // Copy needed for closure } if prefetcher != nil { - prefetcher.used(s.originalRoot, usedAddrs) + prefetcher.used(common.Hash{}, s.originalRoot, usedAddrs) } if len(s.stateObjectsPending) > 0 { s.stateObjectsPending = make(map[common.Address]struct{}) @@ -943,6 +998,137 @@ func (s *StateDB) IntermediateRoot(deleteEmptyObjects bool) common.Hash { return s.trie.Hash() } +// deleteStorage iterates the storage trie belongs to the account and mark all +// slots inside as deleted. +func (s *StateDB) deleteStorage(addr common.Address, addrHash common.Hash, root common.Hash) (bool, map[common.Hash][]byte, *trienode.NodeSet, error) { + start := time.Now() + tr, err := s.db.OpenStorageTrie(s.originalRoot, addrHash, root) + if err != nil { + return false, nil, nil, fmt.Errorf("failed to open storage trie, err: %w", err) + } + it, err := tr.NodeIterator(nil) + if err != nil { + return false, nil, nil, fmt.Errorf("failed to create iterator, err: %w", err) + } + var ( + set = trienode.NewNodeSet(addrHash) + slots = make(map[common.Hash][]byte) + stateSize common.StorageSize + nodeSize common.StorageSize + ) + for it.Next(true) { + // arbitrary stateSize limit, make it configurable + if stateSize+nodeSize > 512*1024*1024 { + log.Info("Skip large storage deletion", "address", addr.Hex(), "states", stateSize, "nodes", nodeSize) + if metrics.EnabledExpensive { + slotDeletionSkip.Inc(1) + } + return true, nil, nil, nil + } + if it.Leaf() { + slots[common.BytesToHash(it.LeafKey())] = common.CopyBytes(it.LeafBlob()) + stateSize += common.StorageSize(common.HashLength + len(it.LeafBlob())) + continue + } + if it.Hash() == (common.Hash{}) { + continue + } + nodeSize += common.StorageSize(len(it.Path())) + set.AddNode(it.Path(), trienode.NewDeleted()) + } + if err := it.Error(); err != nil { + return false, nil, nil, err + } + if metrics.EnabledExpensive { + if int64(len(slots)) > slotDeletionMaxCount.Value() { + slotDeletionMaxCount.Update(int64(len(slots))) + } + if int64(stateSize+nodeSize) > slotDeletionMaxSize.Value() { + slotDeletionMaxSize.Update(int64(stateSize + nodeSize)) + } + slotDeletionTimer.UpdateSince(start) + slotDeletionCount.Mark(int64(len(slots))) + slotDeletionSize.Mark(int64(stateSize + nodeSize)) + } + return false, slots, set, nil +} + +// handleDestruction processes all destruction markers and deletes the account +// and associated storage slots if necessary. There are four possible situations +// here: +// +// - the account was not existent and be marked as destructed +// +// - the account was not existent and be marked as destructed, +// however, it's resurrected later in the same block. +// +// - the account was existent and be marked as destructed +// +// - the account was existent and be marked as destructed, +// however it's resurrected later in the same block. +// +// In case (a), nothing needs be deleted, nil to nil transition can be ignored. +// +// In case (b), nothing needs be deleted, nil is used as the original value for +// newly created account and storages +// +// In case (c), **original** account along with its storages should be deleted, +// with their values be tracked as original value. +// +// In case (d), **original** account along with its storages should be deleted, +// with their values be tracked as original value. +func (s *StateDB) handleDestruction(nodes *trienode.MergedNodeSet) (map[common.Address]struct{}, error) { + incomplete := make(map[common.Address]struct{}) + for addr, prev := range s.stateObjectsDestruct { + // The original account was non-existing, and it's marked as destructed + // in the scope of block. It can be case (a) or (b). + // - for (a), skip it without doing anything. + // - for (b), track account's original value as nil. It may overwrite + // the data cached in s.accountsOrigin set by 'updateStateObject'. + addrHash := crypto.Keccak256Hash(addr[:]) + if prev == nil { + if _, ok := s.accounts[addrHash]; ok { + s.accountsOrigin[addr] = nil // case (b) + } + continue + } + // It can overwrite the data in s.accountsOrigin set by 'updateStateObject'. + s.accountsOrigin[addr] = types.SlimAccountRLP(*prev) // case (c) or (d) + + // Short circuit if the storage was empty. + if prev.Root == types.EmptyRootHash { + continue + } + // Remove storage slots belong to the account. + aborted, slots, set, err := s.deleteStorage(addr, addrHash, prev.Root) + if err != nil { + return nil, fmt.Errorf("failed to delete storage, err: %w", err) + } + // The storage is too huge to handle, skip it but mark as incomplete. + // For case (d), the account is resurrected might with a few slots + // created. In this case, wipe the entire storage state diff because + // of aborted deletion. + if aborted { + incomplete[addr] = struct{}{} + delete(s.storagesOrigin, addr) + continue + } + if s.storagesOrigin[addr] == nil { + s.storagesOrigin[addr] = slots + } else { + // It can overwrite the data in s.storagesOrigin[addrHash] set by + // 'object.updateTrie'. + for key, val := range slots { + s.storagesOrigin[addr][key] = val + } + } + if err := nodes.Merge(set); err != nil { + return nil, err + } + } + return incomplete, nil +} + // SetTxContext sets the current transaction hash and index which are // used when the EVM emits new state logs. It should be invoked before // transaction execution. @@ -960,7 +1146,14 @@ func (s *StateDB) clearJournalAndRefund() { } // Commit writes the state to the underlying in-memory trie database. -func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { +// +// The associated block number of the state transition is also provided +// for more chain context. +// Once the state is committed, tries cached in stateDB (including account +// trie, storage tries) will no longer be functional. A new state instance +// must be created with new root and updated database for accessing post- +// commit states. +func (s *StateDB) Commit(block uint64, deleteEmptyObjects bool) (common.Hash, error) { if s.dbErr != nil { return common.Hash{}, fmt.Errorf("commit aborted due to earlier error: %v", s.dbErr) } @@ -968,8 +1161,21 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { s.IntermediateRoot(deleteEmptyObjects) // Commit objects to the trie, measuring the elapsed time - var storageCommitted int - codeWriter := s.db.TrieDB().DiskDB().NewBatch() + var ( + accountTrieNodesUpdated int + accountTrieNodesDeleted int + storageTrieNodesUpdated int + storageTrieNodesDeleted int + nodes = trienode.NewMergedNodeSet() + codeWriter = s.db.TrieDB().DiskDB().NewBatch() + ) + + // Handle all state deletions first + incomplete, err := s.handleDestruction(nodes) + if err != nil { + return common.Hash{}, err + } + // Handle all state updates afterwards for addr := range s.stateObjectsDirty { if obj := s.stateObjects[addr]; !obj.deleted { // Write any contract code associated with the state object @@ -978,16 +1184,22 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { obj.dirtyCode = false } // Write any storage changes in the state object to its storage trie - committed, err := obj.CommitTrie(s.db) + nodeSet, err := obj.commit() if err != nil { return common.Hash{}, err } - storageCommitted += committed + + // Merge the dirty nodes of storage trie into global set + if nodeSet != nil { + if err := nodes.Merge(nodeSet); err != nil { + return common.Hash{}, err + } + updated, deleted := nodeSet.Size() + storageTrieNodesUpdated += updated + storageTrieNodesDeleted += deleted + } } } - if len(s.stateObjectsDirty) > 0 { - s.stateObjectsDirty = make(map[common.Address]struct{}) - } if codeWriter.ValueSize() > 0 { if err := codeWriter.Write(); err != nil { log.Crit("Failed to commit dirty codes", "error", err) @@ -998,21 +1210,18 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { if metrics.EnabledExpensive { start = time.Now() } - // The onleaf func is called _serially_, so we can reuse the same account - // for unmarshalling every time. - var account types.StateAccount - root, accountCommitted, err := s.trie.Commit(func(_ [][]byte, _ []byte, leaf []byte, parent common.Hash) error { - if err := rlp.DecodeBytes(leaf, &account); err != nil { - return nil - } - if account.Root != emptyRoot { - s.db.TrieDB().Reference(account.Root, parent) - } - return nil - }) + root, nodeSet, err := s.trie.Commit(true) if err != nil { return common.Hash{}, err } + + // Merge the dirty nodes of account trie into global set + if nodeSet != nil { + if err := nodes.Merge(nodeSet); err != nil { + return common.Hash{}, err + } + accountTrieNodesUpdated, accountTrieNodesDeleted = nodeSet.Size() + } if metrics.EnabledExpensive { s.AccountCommits += time.Since(start) @@ -1020,19 +1229,19 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { storageUpdatedMeter.Mark(int64(s.StorageUpdated)) accountDeletedMeter.Mark(int64(s.AccountDeleted)) storageDeletedMeter.Mark(int64(s.StorageDeleted)) - accountCommittedMeter.Mark(int64(accountCommitted)) - storageCommittedMeter.Mark(int64(storageCommitted)) + accountTrieUpdatedMeter.Mark(int64(accountTrieNodesUpdated)) + accountTrieDeletedMeter.Mark(int64(accountTrieNodesDeleted)) + storageTriesUpdatedMeter.Mark(int64(storageTrieNodesUpdated)) + storageTriesDeletedMeter.Mark(int64(storageTrieNodesDeleted)) s.AccountUpdated, s.AccountDeleted = 0, 0 s.StorageUpdated, s.StorageDeleted = 0, 0 } // If snapshotting is enabled, update the snapshot tree with this new version if s.snap != nil { - if metrics.EnabledExpensive { - defer func(start time.Time) { s.SnapshotCommits += time.Since(start) }(time.Now()) - } + start := time.Now() // Only update if there's a state transition (skip empty Clique blocks) if parent := s.snap.Root(); parent != root { - if err := s.snaps.Update(root, parent, s.snapDestructs, s.snapAccounts, s.snapStorage); err != nil { + if err := s.snaps.Update(root, parent, s.convertAccountSet(s.stateObjectsDestruct), s.accounts, s.storages); err != nil { log.Warn("Failed to update snapshot tree", "from", parent, "to", root, "err", err) } // Keep 128 diff layers in the memory, persistent layer is 129th. @@ -1043,9 +1252,39 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { log.Warn("Failed to cap snapshot tree", "root", root, "layers", 128, "err", err) } } - s.snap, s.snapDestructs, s.snapAccounts, s.snapStorage = nil, nil, nil, nil + s.snap = nil + if metrics.EnabledExpensive { + s.SnapshotCommits += time.Since(start) + } + } + + // Update Trie MergeNodeSets. + if root == (common.Hash{}) { + root = emptyRoot + } + origin := s.originalRoot + if origin == (common.Hash{}) { + origin = emptyRoot + } + if root != origin { + start := time.Now() + set := triestate.New(s.accountsOrigin, s.storagesOrigin, incomplete) + if err := s.db.TrieDB().Update(root, origin, block, nodes, set); err != nil { + return common.Hash{}, err + } + s.originalRoot = root + if metrics.EnabledExpensive { + s.TrieDBCommits += time.Since(start) + } } - return root, err + // Clear all internal flags at the end of commit operation. + s.accounts = make(map[common.Hash][]byte) + s.storages = make(map[common.Hash]map[common.Hash][]byte) + s.accountsOrigin = make(map[common.Address][]byte) + s.storagesOrigin = make(map[common.Address]map[common.Hash][]byte) + s.stateObjectsDirty = make(map[common.Address]struct{}) + s.stateObjectsDestruct = make(map[common.Address]*types.StateAccount) + return root, nil } // ResetAccessList sets access list to empty @@ -1151,3 +1390,38 @@ func (s *StateDB) DirtyAccounts(hash common.Hash, number uint64) []*types.DirtyS return dirtyAccounts } + +// convertAccountSet converts a provided account set from address keyed to hash keyed. +func (s *StateDB) convertAccountSet(set map[common.Address]*types.StateAccount) map[common.Hash]struct{} { + ret := make(map[common.Hash]struct{}, len(set)) + for addr := range set { + obj, exist := s.stateObjects[addr] + if !exist { + ret[crypto.Keccak256Hash(addr[:])] = struct{}{} + } else { + ret[obj.addrHash] = struct{}{} + } + } + return ret +} + +// copySet returns a deep-copied set. +func copySet[k comparable](set map[k][]byte) map[k][]byte { + copied := make(map[k][]byte, len(set)) + for key, val := range set { + copied[key] = common.CopyBytes(val) + } + return copied +} + +// copy2DSet returns a two-dimensional deep-copied set. +func copy2DSet[k comparable](set map[k]map[common.Hash][]byte) map[k]map[common.Hash][]byte { + copied := make(map[k]map[common.Hash][]byte, len(set)) + for addr, subset := range set { + copied[addr] = make(map[common.Hash][]byte, len(subset)) + for key, val := range subset { + copied[addr][key] = common.CopyBytes(val) + } + } + return copied +} diff --git a/core/state/statedb_fuzz_test.go b/core/state/statedb_fuzz_test.go new file mode 100644 index 0000000000..1bef564ed9 --- /dev/null +++ b/core/state/statedb_fuzz_test.go @@ -0,0 +1,364 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see + +package state + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "math" + "math/big" + "math/rand" + "reflect" + "strings" + "testing" + "testing/quick" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/triestate" +) + +// A stateTest checks that the state changes are correctly captured. Instances +// of this test with pseudorandom content are created by Generate. +// +// The test works as follows: +// +// A list of states are created by applying actions. The state changes between +// each state instance are tracked and be verified. +type stateTest struct { + addrs []common.Address // all account addresses + actions [][]testAction // modifications to the state, grouped by block + chunk int // The number of actions per chunk + err error // failure details are reported through this field +} + +// newStateTestAction creates a random action that changes state. +func newStateTestAction(addr common.Address, r *rand.Rand, index int) testAction { + actions := []testAction{ + { + name: "SetBalance", + fn: func(a testAction, s *StateDB) { + s.SetBalance(addr, big.NewInt(a.args[0])) + }, + args: make([]int64, 1), + }, + { + name: "SetNonce", + fn: func(a testAction, s *StateDB) { + s.SetNonce(addr, uint64(a.args[0])) + }, + args: make([]int64, 1), + }, + { + name: "SetState", + fn: func(a testAction, s *StateDB) { + var key, val common.Hash + binary.BigEndian.PutUint16(key[:], uint16(a.args[0])) + binary.BigEndian.PutUint16(val[:], uint16(a.args[1])) + s.SetState(addr, key, val) + }, + args: make([]int64, 2), + }, + { + name: "SetCode", + fn: func(a testAction, s *StateDB) { + code := make([]byte, 16) + binary.BigEndian.PutUint64(code, uint64(a.args[0])) + binary.BigEndian.PutUint64(code[8:], uint64(a.args[1])) + s.SetCode(addr, code) + }, + args: make([]int64, 2), + }, + { + name: "CreateAccount", + fn: func(a testAction, s *StateDB) { + s.CreateAccount(addr) + }, + }, + { + name: "Suicide", + fn: func(a testAction, s *StateDB) { + s.SelfDestruct(addr) + }, + }, + } + var nonRandom = index != -1 + if index == -1 { + index = r.Intn(len(actions)) + } + action := actions[index] + var names []string + if !action.noAddr { + names = append(names, addr.Hex()) + } + for i := range action.args { + if nonRandom { + action.args[i] = rand.Int63n(10000) + 1 // set balance to non-zero + } else { + action.args[i] = rand.Int63n(10000) + } + names = append(names, fmt.Sprint(action.args[i])) + } + action.name += " " + strings.Join(names, ", ") + return action +} + +// Generate returns a new snapshot test of the given size. All randomness is +// derived from r. +func (*stateTest) Generate(r *rand.Rand, size int) reflect.Value { + addrs := make([]common.Address, 5) + for i := range addrs { + addrs[i][0] = byte(i) + } + actions := make([][]testAction, rand.Intn(5)+1) + + for i := 0; i < len(actions); i++ { + actions[i] = make([]testAction, size) + for j := range actions[i] { + if j == 0 { + // Always include a set balance action to make sure + // the state changes are not empty. + actions[i][j] = newStateTestAction(common.HexToAddress("0xdeadbeef"), r, 0) + continue + } + actions[i][j] = newStateTestAction(addrs[r.Intn(len(addrs))], r, -1) + } + } + chunk := int(math.Sqrt(float64(size))) + if size > 0 && chunk == 0 { + chunk = 1 + } + return reflect.ValueOf(&stateTest{ + addrs: addrs, + actions: actions, + chunk: chunk, + }) +} + +func (test *stateTest) String() string { + out := new(bytes.Buffer) + for i, actions := range test.actions { + fmt.Fprintf(out, "---- block %d ----\n", i) + for j, action := range actions { + if j%test.chunk == 0 { + fmt.Fprintf(out, "---- transaction %d ----\n", j/test.chunk) + } + fmt.Fprintf(out, "%4d: %s\n", j%test.chunk, action.name) + } + } + return out.String() +} + +func (test *stateTest) run() bool { + var ( + roots []common.Hash + accountList []map[common.Address][]byte + storageList []map[common.Address]map[common.Hash][]byte + onCommit = func(states *triestate.Set) { + accountList = append(accountList, copySet(states.Accounts)) + storageList = append(storageList, copy2DSet(states.Storages)) + } + disk = rawdb.NewMemoryDatabase() + tdb = trie.NewDatabase(disk, &trie.Config{OnCommit: onCommit}) + sdb = NewDatabaseWithNodeDB(disk, tdb) + byzantium = rand.Intn(2) == 0 + ) + for i, actions := range test.actions { + root := types.EmptyRootHash + if i != 0 { + root = roots[len(roots)-1] + } + state, err := New(root, sdb, nil) + if err != nil { + panic(err) + } + for i, action := range actions { + if i%test.chunk == 0 && i != 0 { + if byzantium { + state.Finalise(true) // call finalise at the transaction boundary + } else { + state.IntermediateRoot(true) // call intermediateRoot at the transaction boundary + } + } + action.fn(action, state) + } + if byzantium { + state.Finalise(true) // call finalise at the transaction boundary + } else { + state.IntermediateRoot(true) // call intermediateRoot at the transaction boundary + } + nroot, err := state.Commit(0, true) // call commit at the block boundary + if err != nil { + panic(err) + } + if nroot == root { + return true // filter out non-change state transition + } + roots = append(roots, nroot) + } + for i := 0; i < len(test.actions); i++ { + root := types.EmptyRootHash + if i != 0 { + root = roots[i-1] + } + test.err = test.verify(root, roots[i], tdb, accountList[i], storageList[i]) + if test.err != nil { + return false + } + } + return true +} + +// verifyAccountCreation this function is called once the state diff says that +// specific account was not present. A serial of checks will be performed to +// ensure the state diff is correct, includes: +// +// - the account was indeed not present in trie +// - the account is present in new trie, nil->nil is regarded as invalid +// - the slots transition is correct +func (test *stateTest) verifyAccountCreation(next common.Hash, db *trie.Database, otr, ntr *trie.Trie, addr common.Address, slots map[common.Hash][]byte) error { + // Verify account change + addrHash := crypto.Keccak256Hash(addr.Bytes()) + oBlob := otr.Get(addrHash.Bytes()) + nBlob := ntr.Get(addrHash.Bytes()) + if len(oBlob) != 0 { + return fmt.Errorf("unexpected account in old trie, %x", addrHash) + } + if len(nBlob) == 0 { + return fmt.Errorf("missing account in new trie, %x", addrHash) + } + + // Verify storage changes + var nAcct types.StateAccount + if err := rlp.DecodeBytes(nBlob, &nAcct); err != nil { + return err + } + // Account has no slot, empty slot set is expected + if nAcct.Root == types.EmptyRootHash { + if len(slots) != 0 { + return fmt.Errorf("unexpected slot changes %x", addrHash) + } + return nil + } + // Account has slots, ensure all new slots are contained + st, err := trie.New(trie.StorageTrieID(next, addrHash, nAcct.Root), db) + if err != nil { + return err + } + for key, val := range slots { + st.Update(key.Bytes(), val) + } + if st.Hash() != types.EmptyRootHash { + return errors.New("invalid slot changes") + } + return nil +} + +// verifyAccountUpdate this function is called once the state diff says that +// specific account was present. A serial of checks will be performed to +// ensure the state diff is correct, includes: +// +// - the account was indeed present in trie +// - the account in old trie matches the provided value +// - the slots transition is correct +func (test *stateTest) verifyAccountUpdate(next common.Hash, db *trie.Database, otr, ntr *trie.Trie, addr common.Address, origin []byte, slots map[common.Hash][]byte) error { + // Verify account change + addrHash := crypto.Keccak256Hash(addr.Bytes()) + oBlob := otr.Get(addrHash.Bytes()) + nBlob := ntr.Get(addrHash.Bytes()) + if len(oBlob) == 0 { + return fmt.Errorf("missing account in old trie, %x", addrHash) + } + full, err := types.FullAccountRLP(origin) + if err != nil { + return err + } + if !bytes.Equal(full, oBlob) { + return fmt.Errorf("account value is not matched, %x", addrHash) + } + + // Decode accounts + var ( + oAcct types.StateAccount + nAcct types.StateAccount + nRoot common.Hash + ) + if err := rlp.DecodeBytes(oBlob, &oAcct); err != nil { + return err + } + if len(nBlob) == 0 { + nRoot = types.EmptyRootHash + } else { + if err := rlp.DecodeBytes(nBlob, &nAcct); err != nil { + return err + } + nRoot = nAcct.Root + } + + // Verify storage + st, err := trie.New(trie.StorageTrieID(next, addrHash, nRoot), db) + if err != nil { + return err + } + for key, val := range slots { + st.Update(key.Bytes(), val) + } + if st.Hash() != oAcct.Root { + return errors.New("invalid slot changes") + } + return nil +} + +func (test *stateTest) verify(root common.Hash, next common.Hash, db *trie.Database, accountsOrigin map[common.Address][]byte, storagesOrigin map[common.Address]map[common.Hash][]byte) error { + otr, err := trie.New(trie.StateTrieID(root), db) + if err != nil { + return err + } + ntr, err := trie.New(trie.StateTrieID(next), db) + if err != nil { + return err + } + for addr, account := range accountsOrigin { + var err error + if len(account) == 0 { + err = test.verifyAccountCreation(next, db, otr, ntr, addr, storagesOrigin[addr]) + } else { + err = test.verifyAccountUpdate(next, db, otr, ntr, addr, accountsOrigin[addr], storagesOrigin[addr]) + } + if err != nil { + return err + } + } + return nil +} + +func TestStateChanges(t *testing.T) { + config := &quick.Config{MaxCount: 1000} + err := quick.Check((*stateTest).run, config) + if cerr, ok := err.(*quick.CheckError); ok { + test := cerr.In[0].(*stateTest) + t.Errorf("%v:\n%s", test.err, test) + } else if err != nil { + t.Error(err) + } +} diff --git a/core/state/statedb_test.go b/core/state/statedb_test.go index be7d4e281e..841702f98c 100644 --- a/core/state/statedb_test.go +++ b/core/state/statedb_test.go @@ -32,6 +32,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/trie" ) // Tests that updating a state trie does not leak any database writes prior to @@ -55,7 +56,7 @@ func TestUpdateLeaks(t *testing.T) { } root := state.IntermediateRoot(false) - if err := state.Database().TrieDB().Commit(root, false, nil); err != nil { + if err := state.Database().TrieDB().Commit(root, false); err != nil { t.Errorf("can not commit trie %v to persistent database", root.Hex()) } @@ -102,19 +103,19 @@ func TestIntermediateLeaks(t *testing.T) { } // Commit and cross check the databases. - transRoot, err := transState.Commit(false) + transRoot, err := transState.Commit(0, false) if err != nil { t.Fatalf("failed to commit transition state: %v", err) } - if err = transState.Database().TrieDB().Commit(transRoot, false, nil); err != nil { + if err = transState.Database().TrieDB().Commit(transRoot, false); err != nil { t.Errorf("can not commit trie %v to persistent database", transRoot.Hex()) } - finalRoot, err := finalState.Commit(false) + finalRoot, err := finalState.Commit(0, false) if err != nil { t.Fatalf("failed to commit final state: %v", err) } - if err = finalState.Database().TrieDB().Commit(finalRoot, false, nil); err != nil { + if err = finalState.Database().TrieDB().Commit(finalRoot, false); err != nil { t.Errorf("can not commit trie %v to persistent database", finalRoot.Hex()) } @@ -481,9 +482,9 @@ func (test *snapshotTest) checkEqual(state, checkstate *StateDB) error { } func TestTouchDelete(t *testing.T) { - s := newStateTest() + s := newStateEnv() s.state.GetOrNewStateObject(common.Address{}) - root, _ := s.state.Commit(false) + root, _ := s.state.Commit(0, false) s.state, _ = New(root, s.state.db, s.state.snaps) snapshot := s.state.Snapshot() @@ -518,7 +519,8 @@ func TestCopyOfCopy(t *testing.T) { // // See https://github.com/ethereum/go-ethereum/issues/20106. func TestCopyCommitCopy(t *testing.T) { - state, _ := New(common.Hash{}, NewDatabase(rawdb.NewMemoryDatabase()), nil) + tdb := NewDatabase(rawdb.NewMemoryDatabase()) + state, _ := New(types.EmptyRootHash, tdb, nil) // Create an account and check if the retrieved balance is correct addr := common.HexToAddress("0xaffeaffeaffeaffeaffeaffeaffeaffeaffeaffe") @@ -556,19 +558,6 @@ func TestCopyCommitCopy(t *testing.T) { t.Fatalf("first copy pre-commit committed storage slot mismatch: have %x, want %x", val, common.Hash{}) } - copyOne.Commit(false) - if balance := copyOne.GetBalance(addr); balance.Cmp(big.NewInt(42)) != 0 { - t.Fatalf("first copy post-commit balance mismatch: have %v, want %v", balance, 42) - } - if code := copyOne.GetCode(addr); !bytes.Equal(code, []byte("hello")) { - t.Fatalf("first copy post-commit code mismatch: have %x, want %x", code, []byte("hello")) - } - if val := copyOne.GetState(addr, skey); val != sval { - t.Fatalf("first copy post-commit non-committed storage slot mismatch: have %x, want %x", val, sval) - } - if val := copyOne.GetCommittedState(addr, skey); val != sval { - t.Fatalf("first copy post-commit committed storage slot mismatch: have %x, want %x", val, sval) - } // Copy the copy and check the balance once more copyTwo := copyOne.Copy() if balance := copyTwo.GetBalance(addr); balance.Cmp(big.NewInt(42)) != 0 { @@ -580,8 +569,23 @@ func TestCopyCommitCopy(t *testing.T) { if val := copyTwo.GetState(addr, skey); val != sval { t.Fatalf("second copy non-committed storage slot mismatch: have %x, want %x", val, sval) } - if val := copyTwo.GetCommittedState(addr, skey); val != sval { - t.Fatalf("second copy post-commit committed storage slot mismatch: have %x, want %x", val, sval) + if val := copyTwo.GetCommittedState(addr, skey); val != (common.Hash{}) { + t.Fatalf("second copy committed storage slot mismatch: have %x, want %x", val, sval) + } + // Commit state, ensure states can be loaded from disk + root, _ := state.Commit(0, false) + state, _ = New(root, tdb, nil) + if balance := state.GetBalance(addr); balance.Cmp(big.NewInt(42)) != 0 { + t.Fatalf("state post-commit balance mismatch: have %v, want %v", balance, 42) + } + if code := state.GetCode(addr); !bytes.Equal(code, []byte("hello")) { + t.Fatalf("state post-commit code mismatch: have %x, want %x", code, []byte("hello")) + } + if val := state.GetState(addr, skey); val != sval { + t.Fatalf("state post-commit non-committed storage slot mismatch: have %x, want %x", val, sval) + } + if val := state.GetCommittedState(addr, skey); val != sval { + t.Fatalf("state post-commit committed storage slot mismatch: have %x, want %x", val, sval) } } @@ -641,19 +645,7 @@ func TestCopyCopyCommitCopy(t *testing.T) { if val := copyTwo.GetCommittedState(addr, skey); val != (common.Hash{}) { t.Fatalf("second copy pre-commit committed storage slot mismatch: have %x, want %x", val, common.Hash{}) } - copyTwo.Commit(false) - if balance := copyTwo.GetBalance(addr); balance.Cmp(big.NewInt(42)) != 0 { - t.Fatalf("second copy post-commit balance mismatch: have %v, want %v", balance, 42) - } - if code := copyTwo.GetCode(addr); !bytes.Equal(code, []byte("hello")) { - t.Fatalf("second copy post-commit code mismatch: have %x, want %x", code, []byte("hello")) - } - if val := copyTwo.GetState(addr, skey); val != sval { - t.Fatalf("second copy post-commit non-committed storage slot mismatch: have %x, want %x", val, sval) - } - if val := copyTwo.GetCommittedState(addr, skey); val != sval { - t.Fatalf("second copy post-commit committed storage slot mismatch: have %x, want %x", val, sval) - } + // Copy the copy-copy and check the balance once more copyThree := copyTwo.Copy() if balance := copyThree.GetBalance(addr); balance.Cmp(big.NewInt(42)) != 0 { @@ -665,11 +657,60 @@ func TestCopyCopyCommitCopy(t *testing.T) { if val := copyThree.GetState(addr, skey); val != sval { t.Fatalf("third copy non-committed storage slot mismatch: have %x, want %x", val, sval) } - if val := copyThree.GetCommittedState(addr, skey); val != sval { + if val := copyThree.GetCommittedState(addr, skey); val != (common.Hash{}) { t.Fatalf("third copy committed storage slot mismatch: have %x, want %x", val, sval) } } +// TestCommitCopy tests the copy from a committed state is not functional. +func TestCommitCopy(t *testing.T) { + state, _ := New(types.EmptyRootHash, NewDatabase(rawdb.NewMemoryDatabase()), nil) + + // Create an account and check if the retrieved balance is correct + addr := common.HexToAddress("0xaffeaffeaffeaffeaffeaffeaffeaffeaffeaffe") + skey := common.HexToHash("aaa") + sval := common.HexToHash("bbb") + + state.SetBalance(addr, big.NewInt(42)) // Change the account trie + state.SetCode(addr, []byte("hello")) // Change an external metadata + state.SetState(addr, skey, sval) // Change the storage trie + + if balance := state.GetBalance(addr); balance.Cmp(big.NewInt(42)) != 0 { + t.Fatalf("initial balance mismatch: have %v, want %v", balance, 42) + } + if code := state.GetCode(addr); !bytes.Equal(code, []byte("hello")) { + t.Fatalf("initial code mismatch: have %x, want %x", code, []byte("hello")) + } + if val := state.GetState(addr, skey); val != sval { + t.Fatalf("initial non-committed storage slot mismatch: have %x, want %x", val, sval) + } + if val := state.GetCommittedState(addr, skey); val != (common.Hash{}) { + t.Fatalf("initial committed storage slot mismatch: have %x, want %x", val, common.Hash{}) + } + // Copy the committed state database, the copied one is not functional. + state.Commit(0, true) + copied := state.Copy() + if balance := copied.GetBalance(addr); balance.Cmp(big.NewInt(0)) != 0 { + t.Fatalf("unexpected balance: have %v", balance) + } + if code := copied.GetCode(addr); code != nil { + t.Fatalf("unexpected code: have %x", code) + } + if val := copied.GetState(addr, skey); val != (common.Hash{}) { + t.Fatalf("unexpected storage slot: have %x", val) + } + if val := copied.GetCommittedState(addr, skey); val != (common.Hash{}) { + t.Fatalf("unexpected storage slot: have %x", val) + } + // Should compare based on the error message. + if !strings.Contains(copied.Error().Error(), trie.ErrCommitted.Error()) { + t.Fatalf("unexpected state error, %v", copied.Error()) + } + // if !errors.Is(copied.Error(), trie.ErrCommitted) { + // t.Fatalf("unexpected state error, %v", copied.Error()) + // } +} + // TestDeleteCreateRevert tests a weird state transition corner case that we hit // while changing the internals of StateDB. The workflow is that a contract is // self-destructed, then in a follow-up transaction (but same block) it's created @@ -685,7 +726,7 @@ func TestDeleteCreateRevert(t *testing.T) { addr := common.BytesToAddress([]byte("so")) state.SetBalance(addr, big.NewInt(1)) - root, _ := state.Commit(false) + root, _ := state.Commit(0, false) state, _ = New(root, state.db, state.snaps) // Simulate self-destructing in one transaction, then create-reverting in another @@ -697,7 +738,7 @@ func TestDeleteCreateRevert(t *testing.T) { state.RevertToSnapshot(id) // Commit the entire state and make sure we don't crash and have the correct state - root, _ = state.Commit(true) + root, _ = state.Commit(0, true) state, _ = New(root, state.db, state.snaps) if state.getStateObject(addr) != nil { @@ -722,7 +763,7 @@ func TestMissingTrieNodes(t *testing.T) { a2 := common.BytesToAddress([]byte("another")) state.SetBalance(a2, big.NewInt(100)) state.SetCode(a2, []byte{1, 2, 4}) - root, _ = state.Commit(false) + root, _ = state.Commit(0, false) t.Logf("root: %x", root) // force-flush state.Database().TrieDB().Cap(0) @@ -746,7 +787,7 @@ func TestMissingTrieNodes(t *testing.T) { } // Modify the state state.SetBalance(addr, big.NewInt(2)) - root, err := state.Commit(false) + root, err := state.Commit(0, false) if err == nil { t.Fatalf("expected error, got root :%x", root) } diff --git a/core/state/sync.go b/core/state/sync.go index 734961d9c5..104f499fb0 100644 --- a/core/state/sync.go +++ b/core/state/sync.go @@ -27,20 +27,20 @@ import ( ) // NewStateSync create a new state trie download scheduler. -func NewStateSync(root common.Hash, database ethdb.KeyValueReader, bloom *trie.SyncBloom, onLeaf func(paths [][]byte, leaf []byte) error) *trie.Sync { +func NewStateSync(root common.Hash, database ethdb.KeyValueReader, bloom *trie.SyncBloom, onLeaf func(keys [][]byte, leaf []byte) error, scheme string) *trie.Sync { // Register the storage slot callback if the external callback is specified. - var onSlot func(paths [][]byte, hexpath []byte, leaf []byte, parent common.Hash) error + var onSlot func(keys [][]byte, path []byte, leaf []byte, parent common.Hash, parentPath []byte) error if onLeaf != nil { - onSlot = func(paths [][]byte, hexpath []byte, leaf []byte, parent common.Hash) error { - return onLeaf(paths, leaf) + onSlot = func(keys [][]byte, path []byte, leaf []byte, parent common.Hash, parentPath []byte) error { + return onLeaf(keys, leaf) } } // Register the account callback to connect the state trie and the storage // trie belongs to the contract. var syncer *trie.Sync - onAccount := func(paths [][]byte, hexpath []byte, leaf []byte, parent common.Hash) error { + onAccount := func(keys [][]byte, path []byte, leaf []byte, parent common.Hash, parentPath []byte) error { if onLeaf != nil { - if err := onLeaf(paths, leaf); err != nil { + if err := onLeaf(keys, leaf); err != nil { return err } } @@ -48,10 +48,10 @@ func NewStateSync(root common.Hash, database ethdb.KeyValueReader, bloom *trie.S if err := rlp.Decode(bytes.NewReader(leaf), &obj); err != nil { return err } - syncer.AddSubTrie(obj.Root, hexpath, parent, onSlot) - syncer.AddCodeEntry(common.BytesToHash(obj.CodeHash), hexpath, parent) + syncer.AddSubTrie(obj.Root, path, parent, parentPath, onSlot) + syncer.AddCodeEntry(common.BytesToHash(obj.CodeHash), path, parent, parentPath) return nil } - syncer = trie.NewSync(root, database, onAccount, bloom) + syncer = trie.NewSync(root, database, onAccount, bloom, scheme) return syncer } diff --git a/core/state/sync_test.go b/core/state/sync_test.go index beb8fcfd9c..559f37044c 100644 --- a/core/state/sync_test.go +++ b/core/state/sync_test.go @@ -29,6 +29,8 @@ import ( "github.com/ethereum/go-ethereum/ethdb/memorydb" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/triedb/hashdb" + "github.com/ethereum/go-ethereum/trie/triedb/pathdb" ) // testAccount is the data associated with an account used by the state tests. @@ -40,23 +42,28 @@ type testAccount struct { } // makeTestState create a sample test state to test node-wise reconstruction. -func makeTestState() (Database, common.Hash, []*testAccount) { +func makeTestState(scheme string) (ethdb.Database, Database, *trie.Database, common.Hash, []*testAccount) { // Create an empty state - db := NewDatabase(rawdb.NewMemoryDatabase()) - state, _ := New(common.Hash{}, db, nil) + config := &trie.Config{Preimages: true} + if scheme == rawdb.PathScheme { + config.PathDB = pathdb.Defaults + } else { + config.HashDB = hashdb.Defaults + } + db := rawdb.NewMemoryDatabase() + nodeDb := trie.NewDatabase(db, config) + sdb := NewDatabaseWithNodeDB(db, nodeDb) + state, _ := New(types.EmptyRootHash, sdb, nil) // Fill it with some arbitrary data var accounts []*testAccount for i := byte(0); i < 96; i++ { obj := state.GetOrNewStateObject(common.BytesToAddress([]byte{i})) acc := &testAccount{address: common.BytesToAddress([]byte{i})} - obj.AddBalance(big.NewInt(int64(11 * i))) acc.balance = big.NewInt(int64(11 * i)) - obj.SetNonce(uint64(42 * i)) acc.nonce = uint64(42 * i) - if i%3 == 0 { obj.SetCode(crypto.Keccak256Hash([]byte{i, i, i, i, i}), []byte{i, i, i, i, i}) acc.code = []byte{i, i, i, i, i} @@ -64,27 +71,30 @@ func makeTestState() (Database, common.Hash, []*testAccount) { if i%5 == 0 { for j := byte(0); j < 5; j++ { hash := crypto.Keccak256Hash([]byte{i, i, i, i, i, j, j}) - obj.SetState(db, hash, hash) + obj.SetState(hash, hash) } } - state.updateStateObject(obj) accounts = append(accounts, acc) } - root, _ := state.Commit(false) + root, _ := state.Commit(0, false) // Return the generated state - return db, root, accounts + return db, sdb, nodeDb, root, accounts } // checkStateAccounts cross references a reconstructed state with an expected // account array. -func checkStateAccounts(t *testing.T, db ethdb.Database, root common.Hash, accounts []*testAccount) { +func checkStateAccounts(t *testing.T, db ethdb.Database, scheme string, root common.Hash, accounts []*testAccount) { + var config trie.Config + if scheme == rawdb.PathScheme { + config.PathDB = pathdb.Defaults + } // Check root availability and state contents - state, err := New(root, NewDatabase(db), nil) + state, err := New(root, NewDatabaseWithConfig(db, &config), nil) if err != nil { t.Fatalf("failed to create state trie at %x: %v", root, err) } - if err := checkStateConsistency(db, root); err != nil { + if err := checkStateConsistency(db, scheme, root); err != nil { t.Fatalf("inconsistent state trie at %x: %v", root, err) } for i, acc := range accounts { @@ -101,27 +111,35 @@ func checkStateAccounts(t *testing.T, db ethdb.Database, root common.Hash, accou } // checkTrieConsistency checks that all nodes in a (sub-)trie are indeed present. -func checkTrieConsistency(db ethdb.Database, root common.Hash) error { +func checkTrieConsistency(db ethdb.Database, scheme string, root common.Hash) error { + config := &trie.Config{Preimages: true} + if scheme == rawdb.PathScheme { + config.PathDB = pathdb.Defaults + } if v, _ := db.Get(root[:]); v == nil { return nil // Consider a non existent state consistent. } - trie, err := trie.New(root, trie.NewDatabase(db)) + trie, err := trie.New(trie.StateTrieID(root), trie.NewDatabase(db, config)) if err != nil { return err } - it := trie.NodeIterator(nil) + it := trie.MustNodeIterator(nil) for it.Next(true) { } return it.Error() } // checkStateConsistency checks that all data of a state root is present. -func checkStateConsistency(db ethdb.Database, root common.Hash) error { - // Create and iterate a state trie rooted in a sub-node - if _, err := db.Get(root.Bytes()); err != nil { - return nil // Consider a non existent state consistent. +func checkStateConsistency(db ethdb.Database, scheme string, root common.Hash) error { + config := &trie.Config{Preimages: true} + if scheme == rawdb.PathScheme { + config.PathDB = pathdb.Defaults } - state, err := New(root, NewDatabase(db), nil) + // // Create and iterate a state trie rooted in a sub-node + // if _, err := db.Get(root.Bytes()); err != nil { + // return err // Consider a non existent state consistent. + // } + state, err := New(root, NewDatabaseWithConfig(db, config), nil) if err != nil { return err } @@ -133,8 +151,14 @@ func checkStateConsistency(db ethdb.Database, root common.Hash) error { // Tests that an empty state is not scheduled for syncing. func TestEmptyStateSync(t *testing.T) { - empty := common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421") - sync := NewStateSync(empty, rawdb.NewMemoryDatabase(), trie.NewSyncBloom(1, memorydb.New()), nil) + dbA := trie.NewDatabase(rawdb.NewMemoryDatabase(), nil) + dbB := trie.NewDatabase(rawdb.NewMemoryDatabase(), &trie.Config{PathDB: pathdb.Defaults}) + + sync := NewStateSync(types.EmptyRootHash, rawdb.NewMemoryDatabase(), trie.NewSyncBloom(1, memorydb.New()), nil, dbA.Scheme()) + if paths, nodes, codes := sync.Missing(1); len(paths) != 0 || len(nodes) != 0 || len(codes) != 0 { + t.Errorf("content requested for empty state: %v, %v, %v", nodes, paths, codes) + } + sync = NewStateSync(types.EmptyRootHash, rawdb.NewMemoryDatabase(), trie.NewSyncBloom(1, memorydb.New()), nil, dbB.Scheme()) if nodes, paths, codes := sync.Missing(1); len(nodes) != 0 || len(paths) != 0 || len(codes) != 0 { t.Errorf(" content requested for empty state: %v, %v, %v", nodes, paths, codes) } @@ -143,84 +167,122 @@ func TestEmptyStateSync(t *testing.T) { // Tests that given a root hash, a state can sync iteratively on a single thread, // requesting retrieval tasks and returning all of them in one go. func TestIterativeStateSyncIndividual(t *testing.T) { - testIterativeStateSync(t, 1, false, false) + + testIterativeStateSync(t, 1, false, false, rawdb.HashScheme) + testIterativeStateSync(t, 1, false, false, rawdb.PathScheme) } func TestIterativeStateSyncBatched(t *testing.T) { - testIterativeStateSync(t, 100, false, false) + testIterativeStateSync(t, 100, false, false, rawdb.HashScheme) + testIterativeStateSync(t, 100, false, false, rawdb.PathScheme) } func TestIterativeStateSyncIndividualFromDisk(t *testing.T) { - testIterativeStateSync(t, 1, true, false) + testIterativeStateSync(t, 1, true, false, rawdb.HashScheme) + testIterativeStateSync(t, 1, true, false, rawdb.PathScheme) } func TestIterativeStateSyncBatchedFromDisk(t *testing.T) { - testIterativeStateSync(t, 100, true, false) + testIterativeStateSync(t, 100, true, false, rawdb.HashScheme) + testIterativeStateSync(t, 100, true, false, rawdb.PathScheme) } func TestIterativeStateSyncIndividualByPath(t *testing.T) { - testIterativeStateSync(t, 1, false, true) + testIterativeStateSync(t, 1, false, true, rawdb.HashScheme) + testIterativeStateSync(t, 1, false, true, rawdb.PathScheme) } func TestIterativeStateSyncBatchedByPath(t *testing.T) { - testIterativeStateSync(t, 100, false, true) + testIterativeStateSync(t, 100, false, true, rawdb.HashScheme) + testIterativeStateSync(t, 100, false, true, rawdb.PathScheme) +} + +// stateElement represents the element in the state trie(bytecode or trie node). +type stateElement struct { + path string + hash common.Hash + code common.Hash + syncPath trie.SyncPath } -func testIterativeStateSync(t *testing.T, count int, commit bool, bypath bool) { +func testIterativeStateSync(t *testing.T, count int, commit bool, bypath bool, scheme string) { // Create a random state to copy - srcDb, srcRoot, srcAccounts := makeTestState() + _, srcDb, ndb, srcRoot, srcAccounts := makeTestState(scheme) if commit { - srcDb.TrieDB().Commit(srcRoot, false, nil) + ndb.Commit(srcRoot, false) } - srcTrie, _ := trie.New(srcRoot, srcDb.TrieDB()) + srcTrie, _ := trie.New(trie.StateTrieID(srcRoot), ndb) // Create a destination state and sync with the scheduler dstDb := rawdb.NewMemoryDatabase() - sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb), nil) + sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb), nil, ndb.Scheme()) - nodes, paths, codes := sched.Missing(count) var ( - hashQueue []common.Hash - pathQueue []trie.SyncPath + nodeElements []stateElement + codeElements []stateElement ) - if !bypath { - hashQueue = append(append(hashQueue[:0], nodes...), codes...) - } else { - hashQueue = append(hashQueue[:0], codes...) - pathQueue = append(pathQueue[:0], paths...) + paths, nodes, codes := sched.Missing(count) + for i := 0; i < len(paths); i++ { + nodeElements = append(nodeElements, stateElement{ + path: paths[i], + hash: nodes[i], + syncPath: trie.NewSyncPath([]byte(paths[i])), + }) } - for len(hashQueue)+len(pathQueue) > 0 { - results := make([]trie.SyncResult, len(hashQueue)+len(pathQueue)) - for i, hash := range hashQueue { - data, err := srcDb.TrieDB().Node(hash) - if err != nil { - data, err = srcDb.ContractCode(common.Hash{}, hash) - } + for i := 0; i < len(codes); i++ { + codeElements = append(codeElements, stateElement{code: codes[i]}) + } + reader, err := ndb.Reader(srcRoot) + if err != nil { + t.Fatalf("failed to create reader for root %x: %v", srcRoot, err) + } + for len(nodeElements)+len(codeElements) > 0 { + var ( + nodeResults = make([]trie.NodeSyncResult, len(nodeElements)) + codeResults = make([]trie.CodeSyncResult, len(codeElements)) + ) + for i, element := range codeElements { + data, err := srcDb.ContractCode(common.Hash{}, element.code) if err != nil { - t.Fatalf("failed to retrieve node data for hash %x", hash) + t.Fatalf("failed to retrieve contract bytecode for hash %x", element.code) } - results[i] = trie.SyncResult{Hash: hash, Data: data} + codeResults[i] = trie.CodeSyncResult{Hash: element.code, Data: data} } - for i, path := range pathQueue { - if len(path) == 1 { - data, _, err := srcTrie.TryGetNode(path[0]) - if err != nil { - t.Fatalf("failed to retrieve node data for path %x: %v", path, err) + for i, node := range nodeElements { + if bypath { + if len(node.syncPath) == 1 { + data, _, err := srcTrie.TryGetNode(node.syncPath[0]) + if err != nil { + t.Fatalf("failed to retrieve node data for path %x: %v", node.syncPath[0], err) + } + nodeResults[i] = trie.NodeSyncResult{Path: node.path, Data: data} + } else { + var acc types.StateAccount + if err := rlp.DecodeBytes(srcTrie.Get(node.syncPath[0]), &acc); err != nil { + t.Fatalf("failed to decode account on path %x: %v", node.syncPath[0], err) + } + stTrie, err := trie.New(trie.StorageTrieID(srcRoot, common.BytesToHash(node.syncPath[0]), acc.Root), ndb) + if err != nil { + t.Fatalf("failed to retriev storage trie for path %x: %v", node.syncPath[1], err) + } + data, _, err := stTrie.TryGetNode(node.syncPath[1]) + if err != nil { + t.Fatalf("failed to retrieve node data for path %x: %v", node.syncPath[1], err) + } + nodeResults[i] = trie.NodeSyncResult{Path: node.path, Data: data} } - results[len(hashQueue)+i] = trie.SyncResult{Hash: crypto.Keccak256Hash(data), Data: data} } else { - var acc types.StateAccount - if err := rlp.DecodeBytes(srcTrie.Get(path[0]), &acc); err != nil { - t.Fatalf("failed to decode account on path %x: %v", path, err) - } - stTrie, err := trie.New(acc.Root, srcDb.TrieDB()) - if err != nil { - t.Fatalf("failed to retriev storage trie for path %x: %v", path, err) - } - data, _, err := stTrie.TryGetNode(path[1]) + owner, inner := trie.ResolvePath([]byte(node.path)) + + data, err := reader.Node(owner, inner, node.hash) if err != nil { - t.Fatalf("failed to retrieve node data for path %x: %v", path, err) + t.Fatalf("failed to retrieve node data for key %v", []byte(node.path)) } - results[len(hashQueue)+i] = trie.SyncResult{Hash: crypto.Keccak256Hash(data), Data: data} + nodeResults[i] = trie.NodeSyncResult{Path: node.path, Data: data} + } + } + for _, result := range codeResults { + if err := sched.ProcessCode(result); err != nil { + t.Errorf("failed to process result %v", err) } } - for _, result := range results { - if err := sched.Process(result); err != nil { + for _, result := range nodeResults { + if err := sched.ProcessNode(result); err != nil { t.Errorf("failed to process result %v", err) } } @@ -230,48 +292,97 @@ func testIterativeStateSync(t *testing.T, count int, commit bool, bypath bool) { } batch.Write() - nodes, paths, codes = sched.Missing(count) - if !bypath { - hashQueue = append(append(hashQueue[:0], nodes...), codes...) - } else { - hashQueue = append(hashQueue[:0], codes...) - pathQueue = append(pathQueue[:0], paths...) + paths, nodes, codes = sched.Missing(count) + nodeElements = nodeElements[:0] + for i := 0; i < len(paths); i++ { + nodeElements = append(nodeElements, stateElement{ + path: paths[i], + hash: nodes[i], + syncPath: trie.NewSyncPath([]byte(paths[i])), + }) + } + codeElements = codeElements[:0] + for i := 0; i < len(codes); i++ { + codeElements = append(codeElements, stateElement{ + code: codes[i], + }) } } // Cross check that the two states are in sync - checkStateAccounts(t, dstDb, srcRoot, srcAccounts) + checkStateAccounts(t, dstDb, ndb.Scheme(), srcRoot, srcAccounts) } // Tests that the trie scheduler can correctly reconstruct the state even if only // partial results are returned, and the others sent only later. func TestIterativeDelayedStateSync(t *testing.T) { + testIterativeDelayedStateSync(t, rawdb.HashScheme) + testIterativeDelayedStateSync(t, rawdb.PathScheme) +} + +func testIterativeDelayedStateSync(t *testing.T, scheme string) { // Create a random state to copy - srcDb, srcRoot, srcAccounts := makeTestState() + _, srcDb, ndb, srcRoot, srcAccounts := makeTestState(scheme) // Create a destination state and sync with the scheduler dstDb := rawdb.NewMemoryDatabase() - sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb), nil) + sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb), nil, ndb.Scheme()) - nodes, _, codes := sched.Missing(0) - queue := append(append([]common.Hash{}, nodes...), codes...) + var ( + nodeElements []stateElement + codeElements []stateElement + ) + paths, nodes, codes := sched.Missing(0) + for i := 0; i < len(paths); i++ { + nodeElements = append(nodeElements, stateElement{ + path: paths[i], + hash: nodes[i], + syncPath: trie.NewSyncPath([]byte(paths[i])), + }) + } + for i := 0; i < len(codes); i++ { + codeElements = append(codeElements, stateElement{code: codes[i]}) + } + reader, err := ndb.Reader(srcRoot) + if err != nil { + t.Fatalf("failed to create reader for root %x: %v", srcRoot, err) + } - for len(queue) > 0 { + for len(nodeElements)+len(codeElements) > 0 { // Sync only half of the scheduled nodes - results := make([]trie.SyncResult, len(queue)/2+1) - for i, hash := range queue[:len(results)] { - data, err := srcDb.TrieDB().Node(hash) - if err != nil { - data, err = srcDb.ContractCode(common.Hash{}, hash) + var nodeProcessd int + var codeProcessd int + if len(codeElements) > 0 { + codeResults := make([]trie.CodeSyncResult, len(codeElements)/2+1) + for i, element := range codeElements[:len(codeResults)] { + data, err := srcDb.ContractCode(common.Hash{}, element.code) + if err != nil { + t.Fatalf("failed to retrieve contract bytecode for %x", element.code) + } + codeResults[i] = trie.CodeSyncResult{Hash: element.code, Data: data} } - if err != nil { - t.Fatalf("failed to retrieve node data for %x", hash) + for _, result := range codeResults { + if err := sched.ProcessCode(result); err != nil { + t.Fatalf("failed to process result %v", err) + } } - results[i] = trie.SyncResult{Hash: hash, Data: data} + codeProcessd = len(codeResults) } - for _, result := range results { - if err := sched.Process(result); err != nil { - t.Fatalf("failed to process result %v", err) + if len(nodeElements) > 0 { + nodeResults := make([]trie.NodeSyncResult, len(nodeElements)/2+1) + for i, element := range nodeElements[:len(nodeResults)] { + owner, inner := trie.ResolvePath([]byte(element.path)) + data, err := reader.Node(owner, inner, element.hash) + if err != nil { + t.Fatalf("failed to retrieve contract bytecode for %x", element.code) + } + nodeResults[i] = trie.NodeSyncResult{Path: element.path, Data: data} + } + for _, result := range nodeResults { + if err := sched.ProcessNode(result); err != nil { + t.Fatalf("failed to process result %v", err) + } } + nodeProcessd = len(nodeResults) } batch := dstDb.NewBatch() if err := sched.Commit(batch); err != nil { @@ -279,105 +390,197 @@ func TestIterativeDelayedStateSync(t *testing.T) { } batch.Write() - nodes, _, codes = sched.Missing(0) - queue = append(append(queue[len(results):], nodes...), codes...) + paths, nodes, codes = sched.Missing(0) + nodeElements = nodeElements[nodeProcessd:] + for i := 0; i < len(paths); i++ { + nodeElements = append(nodeElements, stateElement{ + path: paths[i], + hash: nodes[i], + syncPath: trie.NewSyncPath([]byte(paths[i])), + }) + } + codeElements = codeElements[codeProcessd:] + for i := 0; i < len(codes); i++ { + codeElements = append(codeElements, stateElement{ + code: codes[i], + }) + } } // Cross check that the two states are in sync - checkStateAccounts(t, dstDb, srcRoot, srcAccounts) + checkStateAccounts(t, dstDb, ndb.Scheme(), srcRoot, srcAccounts) } // Tests that given a root hash, a trie can sync iteratively on a single thread, // requesting retrieval tasks and returning all of them in one go, however in a // random order. -func TestIterativeRandomStateSyncIndividual(t *testing.T) { testIterativeRandomStateSync(t, 1) } -func TestIterativeRandomStateSyncBatched(t *testing.T) { testIterativeRandomStateSync(t, 100) } +func TestIterativeRandomStateSyncIndividual(t *testing.T) { + testIterativeRandomStateSync(t, 1, rawdb.HashScheme) + testIterativeRandomStateSync(t, 1, rawdb.PathScheme) +} +func TestIterativeRandomStateSyncBatched(t *testing.T) { + testIterativeRandomStateSync(t, 100, rawdb.HashScheme) + testIterativeRandomStateSync(t, 100, rawdb.PathScheme) +} -func testIterativeRandomStateSync(t *testing.T, count int) { +func testIterativeRandomStateSync(t *testing.T, count int, scheme string) { // Create a random state to copy - srcDb, srcRoot, srcAccounts := makeTestState() + _, srcDb, ndb, srcRoot, srcAccounts := makeTestState(scheme) // Create a destination state and sync with the scheduler dstDb := rawdb.NewMemoryDatabase() - sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb), nil) - - queue := make(map[common.Hash]struct{}) - nodes, _, codes := sched.Missing(count) - for _, hash := range append(nodes, codes...) { - queue[hash] = struct{}{} + sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb), nil, ndb.Scheme()) + + nodeQueue := make(map[string]stateElement) + codeQueue := make(map[common.Hash]struct{}) + paths, nodes, codes := sched.Missing(count) + for i, path := range paths { + nodeQueue[path] = stateElement{ + path: path, + hash: nodes[i], + syncPath: trie.NewSyncPath([]byte(path)), + } + } + for _, hash := range codes { + codeQueue[hash] = struct{}{} + } + reader, err := ndb.Reader(srcRoot) + if err != nil { + t.Fatalf("failed to create reader for root %x: %v", srcRoot, err) } - for len(queue) > 0 { + for len(nodeQueue)+len(codeQueue) > 0 { // Fetch all the queued nodes in a random order - results := make([]trie.SyncResult, 0, len(queue)) - for hash := range queue { - data, err := srcDb.TrieDB().Node(hash) - if err != nil { - data, err = srcDb.ContractCode(common.Hash{}, hash) + if len(codeQueue) > 0 { + results := make([]trie.CodeSyncResult, 0, len(codeQueue)) + for hash := range codeQueue { + data, err := srcDb.ContractCode(common.Hash{}, hash) + if err != nil { + t.Fatalf("failed to retrieve node data for %x", hash) + } + results = append(results, trie.CodeSyncResult{Hash: hash, Data: data}) } - if err != nil { - t.Fatalf("failed to retrieve node data for %x", hash) + for _, result := range results { + if err := sched.ProcessCode(result); err != nil { + t.Fatalf("failed to process result %v", err) + } } - results = append(results, trie.SyncResult{Hash: hash, Data: data}) } - // Feed the retrieved results back and queue new tasks - for _, result := range results { - if err := sched.Process(result); err != nil { - t.Fatalf("failed to process result %v", err) + if len(nodeQueue) > 0 { + results := make([]trie.NodeSyncResult, 0, len(nodeQueue)) + for path, element := range nodeQueue { + owner, inner := trie.ResolvePath([]byte(element.path)) + data, err := reader.Node(owner, inner, element.hash) + if err != nil { + t.Fatalf("failed to retrieve node data for %x %v %v", element.hash, []byte(element.path), element.path) + } + results = append(results, trie.NodeSyncResult{Path: path, Data: data}) + } + for _, result := range results { + if err := sched.ProcessNode(result); err != nil { + t.Fatalf("failed to process result %v", err) + } } } + // Feed the retrieved results back and queue new tasks batch := dstDb.NewBatch() if err := sched.Commit(batch); err != nil { t.Fatalf("failed to commit data: %v", err) } batch.Write() - queue = make(map[common.Hash]struct{}) - nodes, _, codes = sched.Missing(count) - for _, hash := range append(nodes, codes...) { - queue[hash] = struct{}{} + nodeQueue = make(map[string]stateElement) + codeQueue = make(map[common.Hash]struct{}) + paths, nodes, codes := sched.Missing(count) + for i, path := range paths { + nodeQueue[path] = stateElement{ + path: path, + hash: nodes[i], + syncPath: trie.NewSyncPath([]byte(path)), + } + } + for _, hash := range codes { + codeQueue[hash] = struct{}{} } } // Cross check that the two states are in sync - checkStateAccounts(t, dstDb, srcRoot, srcAccounts) + checkStateAccounts(t, dstDb, ndb.Scheme(), srcRoot, srcAccounts) } // Tests that the trie scheduler can correctly reconstruct the state even if only // partial results are returned (Even those randomly), others sent only later. func TestIterativeRandomDelayedStateSync(t *testing.T) { + testIterativeRandomDelayedStateSync(t, rawdb.HashScheme) + testIterativeRandomDelayedStateSync(t, rawdb.PathScheme) +} + +func testIterativeRandomDelayedStateSync(t *testing.T, scheme string) { // Create a random state to copy - srcDb, srcRoot, srcAccounts := makeTestState() + _, srcDb, ndb, srcRoot, srcAccounts := makeTestState(scheme) // Create a destination state and sync with the scheduler dstDb := rawdb.NewMemoryDatabase() - sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb), nil) - - queue := make(map[common.Hash]struct{}) - nodes, _, codes := sched.Missing(0) - for _, hash := range append(nodes, codes...) { - queue[hash] = struct{}{} + sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb), nil, ndb.Scheme()) + + nodeQueue := make(map[string]stateElement) + codeQueue := make(map[common.Hash]struct{}) + paths, nodes, codes := sched.Missing(0) + for i, path := range paths { + nodeQueue[path] = stateElement{ + path: path, + hash: nodes[i], + syncPath: trie.NewSyncPath([]byte(path)), + } + } + for _, hash := range codes { + codeQueue[hash] = struct{}{} + } + reader, err := ndb.Reader(srcRoot) + if err != nil { + t.Fatalf("failed to create reader for root %x: %v", srcRoot, err) } - for len(queue) > 0 { + for len(nodeQueue)+len(codeQueue) > 0 { // Sync only half of the scheduled nodes, even those in random order - results := make([]trie.SyncResult, 0, len(queue)/2+1) - for hash := range queue { - delete(queue, hash) + if len(codeQueue) > 0 { + results := make([]trie.CodeSyncResult, 0, len(codeQueue)/2+1) + for hash := range codeQueue { + delete(codeQueue, hash) - data, err := srcDb.TrieDB().Node(hash) - if err != nil { - data, err = srcDb.ContractCode(common.Hash{}, hash) + data, err := srcDb.ContractCode(common.Hash{}, hash) + if err != nil { + t.Fatalf("failed to retrieve node data for %x", hash) + } + results = append(results, trie.CodeSyncResult{Hash: hash, Data: data}) + + if len(results) >= cap(results) { + break + } } - if err != nil { - t.Fatalf("failed to retrieve node data for %x", hash) + for _, result := range results { + if err := sched.ProcessCode(result); err != nil { + t.Fatalf("failed to process result %v", err) + } } - results = append(results, trie.SyncResult{Hash: hash, Data: data}) + } + if len(nodeQueue) > 0 { + results := make([]trie.NodeSyncResult, 0, len(nodeQueue)/2+1) + for path, element := range nodeQueue { + delete(nodeQueue, path) - if len(results) >= cap(results) { - break + owner, inner := trie.ResolvePath([]byte(element.path)) + data, err := reader.Node(owner, inner, element.hash) + if err != nil { + t.Fatalf("failed to retrieve node data for %x", element.hash) + } + results = append(results, trie.NodeSyncResult{Path: path, Data: data}) + + if len(results) >= cap(results) { + break + } } - } - // Feed the retrieved results back and queue new tasks - for _, result := range results { - if err := sched.Process(result); err != nil { - t.Fatalf("failed to process result %v", err) + // Feed the retrieved results back and queue new tasks + for _, result := range results { + if err := sched.ProcessNode(result); err != nil { + t.Fatalf("failed to process result %v", err) + } } } batch := dstDb.NewBatch() @@ -385,23 +588,33 @@ func TestIterativeRandomDelayedStateSync(t *testing.T) { t.Fatalf("failed to commit data: %v", err) } batch.Write() - for _, result := range results { - delete(queue, result.Hash) + + paths, nodes, codes := sched.Missing(0) + for i, path := range paths { + nodeQueue[path] = stateElement{ + path: path, + hash: nodes[i], + syncPath: trie.NewSyncPath([]byte(path)), + } } - nodes, _, codes = sched.Missing(0) - for _, hash := range append(nodes, codes...) { - queue[hash] = struct{}{} + for _, hash := range codes { + codeQueue[hash] = struct{}{} } } // Cross check that the two states are in sync - checkStateAccounts(t, dstDb, srcRoot, srcAccounts) + checkStateAccounts(t, dstDb, ndb.Scheme(), srcRoot, srcAccounts) } // Tests that at any point in time during a sync, only complete sub-tries are in // the database. func TestIncompleteStateSync(t *testing.T) { + testIncompleteStateSync(t, rawdb.HashScheme) + testIncompleteStateSync(t, rawdb.PathScheme) +} + +func testIncompleteStateSync(t *testing.T, scheme string) { // Create a random state to copy - srcDb, srcRoot, srcAccounts := makeTestState() + db, srcDb, ndb, srcRoot, srcAccounts := makeTestState(scheme) // isCodeLookup to save some hashing var isCode = make(map[common.Hash]struct{}) @@ -411,34 +624,78 @@ func TestIncompleteStateSync(t *testing.T) { } } isCode[common.BytesToHash(emptyCodeHash)] = struct{}{} - checkTrieConsistency(srcDb.TrieDB().DiskDB().(ethdb.Database), srcRoot) + checkTrieConsistency(db, ndb.Scheme(), srcRoot) // Create a destination state and sync with the scheduler dstDb := rawdb.NewMemoryDatabase() - sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb), nil) - - var added []common.Hash + sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb), nil, srcDb.TrieDB().Scheme()) - nodes, _, codes := sched.Missing(1) - queue := append(append([]common.Hash{}, nodes...), codes...) - - for len(queue) > 0 { + var ( + addedCodes []common.Hash + addedPaths []string + addedHashes []common.Hash + ) + reader, err := ndb.Reader(srcRoot) + if err != nil { + t.Fatalf("state is not available %x", srcRoot) + } + nodeQueue := make(map[string]stateElement) + codeQueue := make(map[common.Hash]struct{}) + paths, nodes, codes := sched.Missing(1) + for i, path := range paths { + nodeQueue[path] = stateElement{ + path: path, + hash: nodes[i], + syncPath: trie.NewSyncPath([]byte(path)), + } + } + for _, hash := range codes { + codeQueue[hash] = struct{}{} + } + if err != nil { + t.Fatalf("failed to create reader for root %x: %v", srcRoot, err) + } + for len(nodeQueue)+len(codeQueue) > 0 { // Fetch a batch of state nodes - results := make([]trie.SyncResult, len(queue)) - for i, hash := range queue { - data, err := srcDb.TrieDB().Node(hash) - if err != nil { - data, err = srcDb.ContractCode(common.Hash{}, hash) + if len(codeQueue) > 0 { + results := make([]trie.CodeSyncResult, 0, len(codeQueue)) + for hash := range codeQueue { + data, err := srcDb.ContractCode(common.Hash{}, hash) + if err != nil { + t.Fatalf("failed to retrieve node data for %x", hash) + } + results = append(results, trie.CodeSyncResult{Hash: hash, Data: data}) + addedCodes = append(addedCodes, hash) } - if err != nil { - t.Fatalf("failed to retrieve node data for %x", hash) + // Process each of the state nodes + for _, result := range results { + if err := sched.ProcessCode(result); err != nil { + t.Fatalf("failed to process result %v", err) + } } - results[i] = trie.SyncResult{Hash: hash, Data: data} } - // Process each of the state nodes - for _, result := range results { - if err := sched.Process(result); err != nil { - t.Fatalf("failed to process result %v", err) + var nodehashes []common.Hash + if len(nodeQueue) > 0 { + results := make([]trie.NodeSyncResult, 0, len(nodeQueue)) + for path, element := range nodeQueue { + owner, inner := trie.ResolvePath([]byte(element.path)) + data, err := reader.Node(owner, inner, element.hash) + if err != nil { + t.Fatalf("failed to retrieve node data for %x", element.hash) + } + results = append(results, trie.NodeSyncResult{Path: path, Data: data}) + + if element.hash != srcRoot { + addedPaths = append(addedPaths, element.path) + addedHashes = append(addedHashes, element.hash) + } + nodehashes = append(nodehashes, element.hash) + } + // Process each of the state nodes + for _, result := range results { + if err := sched.ProcessNode(result); err != nil { + t.Fatalf("failed to process result %v", err) + } } } batch := dstDb.NewBatch() @@ -446,43 +703,52 @@ func TestIncompleteStateSync(t *testing.T) { t.Fatalf("failed to commit data: %v", err) } batch.Write() - for _, result := range results { - added = append(added, result.Hash) - // Check that all known sub-tries added so far are complete or missing entirely. - if _, ok := isCode[result.Hash]; ok { - continue - } + + for _, root := range nodehashes { // Can't use checkStateConsistency here because subtrie keys may have odd // length and crash in LeafKey. - if err := checkTrieConsistency(dstDb, result.Hash); err != nil { + if err := checkTrieConsistency(dstDb, scheme, root); err != nil { t.Fatalf("state inconsistent: %v", err) } } // Fetch the next batch to retrieve - nodes, _, codes = sched.Missing(1) - queue = append(append(queue[:0], nodes...), codes...) + nodeQueue = make(map[string]stateElement) + codeQueue = make(map[common.Hash]struct{}) + paths, nodes, codes := sched.Missing(1) + for i, path := range paths { + nodeQueue[path] = stateElement{ + path: path, + hash: nodes[i], + syncPath: trie.NewSyncPath([]byte(path)), + } + } + for _, hash := range codes { + codeQueue[hash] = struct{}{} + } } // Sanity check that removing any node from the database is detected - for _, node := range added[1:] { - var ( - key = node.Bytes() - _, code = isCode[node] - val []byte - ) - if code { - val = rawdb.ReadCode(dstDb, node) - rawdb.DeleteCode(dstDb, node) - } else { - val = rawdb.ReadTrieNode(dstDb, node) - rawdb.DeleteTrieNode(dstDb, node) + for _, node := range addedCodes { + val := rawdb.ReadCode(dstDb, node) + rawdb.DeleteCode(dstDb, node) + if err := checkStateConsistency(dstDb, ndb.Scheme(), srcRoot); err == nil { + t.Errorf("trie inconsistency not caught, missing: %x", node) } - if err := checkStateConsistency(dstDb, added[0]); err == nil { - t.Fatalf("trie inconsistency not caught, missing: %x", key) + rawdb.WriteCode(dstDb, node, val) + } + + for i, path := range addedPaths { + owner, inner := trie.ResolvePath([]byte(path)) + hash := addedHashes[i] + val := rawdb.ReadTrieNode(dstDb, owner, inner, hash, scheme) + if val == nil { + t.Error("missing trie node") } - if code { - rawdb.WriteCode(dstDb, node, val) - } else { - rawdb.WriteTrieNode(dstDb, node, val) + rawdb.DeleteTrieNode(dstDb, owner, inner, hash, scheme) + + if err := checkStateConsistency(dstDb, ndb.Scheme(), srcRoot); err == nil { + t.Errorf("trie inconsistency not caught, missing: %v", path) + } + rawdb.WriteTrieNode(dstDb, owner, inner, hash, val, scheme) } } diff --git a/core/state/trie_prefetcher.go b/core/state/trie_prefetcher.go index 472c125b77..5c85e5adc5 100644 --- a/core/state/trie_prefetcher.go +++ b/core/state/trie_prefetcher.go @@ -25,7 +25,7 @@ import ( ) var ( - // triePrefetchMetricsPrefix is the prefix under which to publis the metrics. + // triePrefetchMetricsPrefix is the prefix under which to publish the metrics. triePrefetchMetricsPrefix = "trie/prefetch/" ) @@ -35,10 +35,10 @@ var ( // // Note, the prefetcher's API is not thread safe. type triePrefetcher struct { - db Database // Database to fetch trie nodes through - root common.Hash // Root hash of theaccount trie for metrics - fetches map[common.Hash]Trie // Partially or fully fetcher tries - fetchers map[common.Hash]*subfetcher // Subfetchers for each trie + db Database // Database to fetch trie nodes through + root common.Hash // Root hash of the account trie for metrics + fetches map[string]Trie // Partially or fully fetcher tries + fetchers map[string]*subfetcher // Subfetchers for each trie deliveryMissMeter metrics.Meter accountLoadMeter metrics.Meter @@ -51,13 +51,12 @@ type triePrefetcher struct { storageWasteMeter metrics.Meter } -// newTriePrefetcher func newTriePrefetcher(db Database, root common.Hash, namespace string) *triePrefetcher { prefix := triePrefetchMetricsPrefix + namespace p := &triePrefetcher{ db: db, root: root, - fetchers: make(map[common.Hash]*subfetcher), // Active prefetchers use the fetchers map + fetchers: make(map[string]*subfetcher), // Active prefetchers use the fetchers map deliveryMissMeter: metrics.GetOrRegisterMeter(prefix+"/deliverymiss", nil), accountLoadMeter: metrics.GetOrRegisterMeter(prefix+"/account/load", nil), @@ -112,7 +111,7 @@ func (p *triePrefetcher) copy() *triePrefetcher { copy := &triePrefetcher{ db: p.db, root: p.root, - fetches: make(map[common.Hash]Trie), // Active prefetchers use the fetches map + fetches: make(map[string]Trie), // Active prefetchers use the fetches map deliveryMissMeter: p.deliveryMissMeter, accountLoadMeter: p.accountLoadMeter, @@ -135,33 +134,35 @@ func (p *triePrefetcher) copy() *triePrefetcher { return copy } // Otherwise we're copying an active fetcher, retrieve the current states - for root, fetcher := range p.fetchers { - copy.fetches[root] = fetcher.peek() + for id, fetcher := range p.fetchers { + copy.fetches[id] = fetcher.peek() } return copy } // prefetch schedules a batch of trie items to prefetch. -func (p *triePrefetcher) prefetch(root common.Hash, keys [][]byte) { +func (p *triePrefetcher) prefetch(owner common.Hash, root common.Hash, keys [][]byte) { // If the prefetcher is an inactive one, bail out if p.fetches != nil { return } // Active fetcher, schedule the retrievals - fetcher := p.fetchers[root] + id := p.trieID(owner, root) + fetcher := p.fetchers[id] if fetcher == nil { - fetcher = newSubfetcher(p.db, root) - p.fetchers[root] = fetcher + fetcher = newSubfetcher(p.db, p.root, owner, root) + p.fetchers[id] = fetcher } fetcher.schedule(keys) } // trie returns the trie matching the root hash, or nil if the prefetcher doesn't // have it. -func (p *triePrefetcher) trie(root common.Hash) Trie { +func (p *triePrefetcher) trie(owner common.Hash, root common.Hash) Trie { // If the prefetcher is inactive, return from existing deep copies + id := p.trieID(owner, root) if p.fetches != nil { - trie := p.fetches[root] + trie := p.fetches[id] if trie == nil { p.deliveryMissMeter.Mark(1) return nil @@ -169,7 +170,7 @@ func (p *triePrefetcher) trie(root common.Hash) Trie { return p.db.CopyTrie(trie) } // Otherwise the prefetcher is active, bail if no trie was prefetched for this root - fetcher := p.fetchers[root] + fetcher := p.fetchers[id] if fetcher == nil { p.deliveryMissMeter.Mark(1) return nil @@ -188,20 +189,27 @@ func (p *triePrefetcher) trie(root common.Hash) Trie { // used marks a batch of state items used to allow creating statistics as to // how useful or wasteful the prefetcher is. -func (p *triePrefetcher) used(root common.Hash, used [][]byte) { - if fetcher := p.fetchers[root]; fetcher != nil { +func (p *triePrefetcher) used(owner common.Hash, root common.Hash, used [][]byte) { + if fetcher := p.fetchers[p.trieID(owner, root)]; fetcher != nil { fetcher.used = used } } +// trieID returns an unique trie identifier consists the trie owner and root hash. +func (p *triePrefetcher) trieID(owner common.Hash, root common.Hash) string { + return string(append(owner.Bytes(), root.Bytes()...)) +} + // subfetcher is a trie fetcher goroutine responsible for pulling entries for a // single trie. It is spawned when a new root is encountered and lives until the // main prefetcher is paused and either all requested items are processed or if // the trie being worked on is retrieved from the prefetcher. type subfetcher struct { - db Database // Database to load trie nodes through - root common.Hash // Root hash of the trie to prefetch - trie Trie // Trie being populated with nodes + db Database // Database to load trie nodes through + state common.Hash // Root hash of the state to prefetch + owner common.Hash // Owner of the trie, usually account hash + root common.Hash // Root hash of the trie to prefetch + trie Trie // Trie being populated with nodes tasks [][]byte // Items queued up for retrieval lock sync.Mutex // Lock protecting the task queue @@ -218,15 +226,17 @@ type subfetcher struct { // newSubfetcher creates a goroutine to prefetch state items belonging to a // particular root hash. -func newSubfetcher(db Database, root common.Hash) *subfetcher { +func newSubfetcher(db Database, state common.Hash, owner common.Hash, root common.Hash) *subfetcher { sf := &subfetcher{ - db: db, - root: root, - wake: make(chan struct{}, 1), - stop: make(chan struct{}), - term: make(chan struct{}), - copy: make(chan chan Trie), - seen: make(map[string]struct{}), + db: db, + state: state, + owner: owner, + root: root, + wake: make(chan struct{}, 1), + stop: make(chan struct{}), + term: make(chan struct{}), + copy: make(chan chan Trie), + seen: make(map[string]struct{}), } go sf.loop() return sf @@ -282,13 +292,21 @@ func (sf *subfetcher) loop() { defer close(sf.term) // Start by opening the trie and stop processing if it fails - trie, err := sf.db.OpenTrie(sf.root) - if err != nil { - log.Warn("Trie prefetcher failed opening trie", "root", sf.root, "err", err) - return + if sf.owner == (common.Hash{}) { + trie, err := sf.db.OpenTrie(sf.root) + if err != nil { + log.Warn("Trie prefetcher failed opening trie", "root", sf.root, "err", err) + return + } + sf.trie = trie + } else { + trie, err := sf.db.OpenStorageTrie(sf.state, sf.owner, sf.root) + if err != nil { + log.Warn("Trie prefetcher failed opening trie", "root", sf.root, "err", err) + return + } + sf.trie = trie } - sf.trie = trie - // Trie opened successfully, keep prefetching items for { select { diff --git a/core/state/trie_prefetcher_test.go b/core/state/trie_prefetcher_test.go index 35dc7a2c0d..cb0b67d7ea 100644 --- a/core/state/trie_prefetcher_test.go +++ b/core/state/trie_prefetcher_test.go @@ -47,20 +47,20 @@ func TestCopyAndClose(t *testing.T) { db := filledStateDB() prefetcher := newTriePrefetcher(db.db, db.originalRoot, "") skey := common.HexToHash("aaa") - prefetcher.prefetch(db.originalRoot, [][]byte{skey.Bytes()}) - prefetcher.prefetch(db.originalRoot, [][]byte{skey.Bytes()}) + prefetcher.prefetch(common.Hash{}, db.originalRoot, [][]byte{skey.Bytes()}) + prefetcher.prefetch(common.Hash{}, db.originalRoot, [][]byte{skey.Bytes()}) time.Sleep(1 * time.Second) - a := prefetcher.trie(db.originalRoot) - prefetcher.prefetch(db.originalRoot, [][]byte{skey.Bytes()}) - b := prefetcher.trie(db.originalRoot) + a := prefetcher.trie(common.Hash{}, db.originalRoot) + prefetcher.prefetch(common.Hash{}, db.originalRoot, [][]byte{skey.Bytes()}) + b := prefetcher.trie(common.Hash{}, db.originalRoot) cpy := prefetcher.copy() - cpy.prefetch(db.originalRoot, [][]byte{skey.Bytes()}) - cpy.prefetch(db.originalRoot, [][]byte{skey.Bytes()}) - c := cpy.trie(db.originalRoot) + cpy.prefetch(common.Hash{}, db.originalRoot, [][]byte{skey.Bytes()}) + cpy.prefetch(common.Hash{}, db.originalRoot, [][]byte{skey.Bytes()}) + c := cpy.trie(common.Hash{}, db.originalRoot) prefetcher.close() cpy2 := cpy.copy() - cpy2.prefetch(db.originalRoot, [][]byte{skey.Bytes()}) - d := cpy2.trie(db.originalRoot) + cpy2.prefetch(common.Hash{}, db.originalRoot, [][]byte{skey.Bytes()}) + d := cpy2.trie(common.Hash{}, db.originalRoot) cpy.close() cpy2.close() if a.Hash() != b.Hash() || a.Hash() != c.Hash() || a.Hash() != d.Hash() { @@ -72,10 +72,10 @@ func TestUseAfterClose(t *testing.T) { db := filledStateDB() prefetcher := newTriePrefetcher(db.db, db.originalRoot, "") skey := common.HexToHash("aaa") - prefetcher.prefetch(db.originalRoot, [][]byte{skey.Bytes()}) - a := prefetcher.trie(db.originalRoot) + prefetcher.prefetch(common.Hash{}, db.originalRoot, [][]byte{skey.Bytes()}) + a := prefetcher.trie(common.Hash{}, db.originalRoot) prefetcher.close() - b := prefetcher.trie(db.originalRoot) + b := prefetcher.trie(common.Hash{}, db.originalRoot) if a == nil { t.Fatal("Prefetching before close should not return nil") } @@ -88,13 +88,13 @@ func TestCopyClose(t *testing.T) { db := filledStateDB() prefetcher := newTriePrefetcher(db.db, db.originalRoot, "") skey := common.HexToHash("aaa") - prefetcher.prefetch(db.originalRoot, [][]byte{skey.Bytes()}) + prefetcher.prefetch(common.Hash{}, db.originalRoot, [][]byte{skey.Bytes()}) cpy := prefetcher.copy() - a := prefetcher.trie(db.originalRoot) - b := cpy.trie(db.originalRoot) + a := prefetcher.trie(common.Hash{}, db.originalRoot) + b := cpy.trie(common.Hash{}, db.originalRoot) prefetcher.close() - c := prefetcher.trie(db.originalRoot) - d := cpy.trie(db.originalRoot) + c := prefetcher.trie(common.Hash{}, db.originalRoot) + d := cpy.trie(common.Hash{}, db.originalRoot) if a == nil { t.Fatal("Prefetching before close should not return nil") } diff --git a/core/state_processor_test.go b/core/state_processor_test.go index e92f506c7d..c4556bd622 100644 --- a/core/state_processor_test.go +++ b/core/state_processor_test.go @@ -106,8 +106,8 @@ func TestStateProcessorErrors(t *testing.T) { }, }, } - genesis = gspec.MustCommit(db) - blockchain, _ = NewBlockChain(db, nil, gspec.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + genesis = gspec.MustCommit(db, trie.NewDatabase(db, newDbConfig(rawdb.HashScheme))) + blockchain, _ = NewBlockChain(db, nil, gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) ) defer blockchain.Stop() bigNumber := new(big.Int).SetBytes(common.FromHex("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff")) @@ -241,8 +241,8 @@ func TestStateProcessorErrors(t *testing.T) { }, }, } - genesis = gspec.MustCommit(db) - blockchain, _ = NewBlockChain(db, nil, gspec.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + genesis = gspec.MustCommit(db, trie.NewDatabase(db, newDbConfig(rawdb.HashScheme))) + blockchain, _ = NewBlockChain(db, nil, gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) ) defer blockchain.Stop() for i, tt := range []struct { @@ -281,8 +281,8 @@ func TestStateProcessorErrors(t *testing.T) { }, }, } - genesis = gspec.MustCommit(db) - blockchain, _ = NewBlockChain(db, nil, gspec.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + genesis = gspec.MustCommit(db, trie.NewDatabase(db, newDbConfig(rawdb.HashScheme))) + blockchain, _ = NewBlockChain(db, nil, gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) ) defer blockchain.Stop() for i, tt := range []struct { @@ -335,8 +335,8 @@ func TestStateProcessorErrors(t *testing.T) { }, }, } - genesis = gspec.MustCommit(db) - blockchain, _ = NewBlockChain(db, nil, gspec.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + genesis = gspec.MustCommit(db, trie.NewDatabase(db, newDbConfig(rawdb.HashScheme))) + blockchain, _ = NewBlockChain(db, nil, gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) tooBigInitCode = [params.MaxInitCodeSize + 1]byte{} smallInitCode = [320]byte{} ) @@ -398,8 +398,8 @@ func TestStateProcessorErrors(t *testing.T) { }, }, } - genesis = gspec.MustCommit(db) - blockchain, _ = NewBlockChain(db, nil, gspec.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + genesis = gspec.MustCommit(db, trie.NewDatabase(db, newDbConfig(rawdb.HashScheme))) + blockchain, _ = NewBlockChain(db, nil, gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) ) defer blockchain.Stop() for i, tt := range []struct { @@ -496,12 +496,13 @@ func TestBlobTxStateTransition(t *testing.T) { Alloc: GenesisAlloc{addr: {Balance: funds}}, BaseFee: big.NewInt(params.InitialBaseFee), } - genesis = gspec.MustCommit(gendb) + triedb = trie.NewDatabase(gendb, nil) + genesis = gspec.MustCommit(gendb, triedb) signer = types.LatestSigner(gspec.Config) ) gspec.Config.ConsortiumV2Block = common.Big0 gspec.Config.RoninTreasuryAddress = roninTreasuryAddress - chain, _ := NewBlockChain(gendb, nil, params.TestChainConfig, ethash.NewFullFaker(), vm.Config{}, nil, nil) + chain, _ := NewBlockChain(gendb, nil, gspec, nil, ethash.NewFullFaker(), vm.Config{}, nil, nil) blocks, _ := GenerateChain(gspec.Config, genesis, ethash.NewFaker(), gendb, 1, func(i int, block *BlockGen) { blobHashes := make([]common.Hash, nBlobs) for i := 0; i < nBlobs; i++ { @@ -574,13 +575,14 @@ func TestBaseFee(t *testing.T) { Alloc: GenesisAlloc{addr: {Balance: big.NewInt(int64(initialFund))}}, BaseFee: big.NewInt(params.InitialBaseFee), } - genesis = gspec.MustCommit(gendb) + triedb = trie.NewDatabase(gendb, nil) + genesis = gspec.MustCommit(gendb, triedb) signer = types.LatestSigner(gspec.Config) ) gspec.Config.ConsortiumV2Block = common.Big0 gspec.Config.RoninTreasuryAddress = roninTreasuryAddress gspec.Config.VenokiBlock = common.Big0 - chain, _ := NewBlockChain(gendb, nil, &chainConfig, ethash.NewFullFaker(), vm.Config{}, nil, nil) + chain, _ := NewBlockChain(gendb, nil, gspec, nil, ethash.NewFullFaker(), vm.Config{}, nil, nil) blocks, _ := GenerateChain(gspec.Config, genesis, ethash.NewFaker(), gendb, 1, func(i int, block *BlockGen) { tx, _ := types.SignTx(types.NewTx(&types.LegacyTx{ To: &addr, diff --git a/core/txpool/blobpool/blobpool_test.go b/core/txpool/blobpool/blobpool_test.go index 02cb9ec6e9..1ecba877c8 100644 --- a/core/txpool/blobpool/blobpool_test.go +++ b/core/txpool/blobpool/blobpool_test.go @@ -559,7 +559,7 @@ func TestOpenDrops(t *testing.T) { statedb.AddBalance(crypto.PubkeyToAddress(overcapper.PublicKey), big.NewInt(10000000)) statedb.AddBalance(crypto.PubkeyToAddress(duplicater.PublicKey), big.NewInt(1000000)) statedb.AddBalance(crypto.PubkeyToAddress(repeater.PublicKey), big.NewInt(1000000)) - statedb.Commit(true) + statedb.Commit(0, true) chain := &testBlockChain{ config: testChainConfig, @@ -719,7 +719,7 @@ func TestOpenIndex(t *testing.T) { // Create a blob pool out of the pre-seeded data statedb, _ := state.New(types.EmptyRootHash, state.NewDatabase(rawdb.NewDatabase(memorydb.New())), nil) statedb.AddBalance(addr, big.NewInt(1_000_000_000)) - statedb.Commit(true) + statedb.Commit(0, true) chain := &testBlockChain{ config: testChainConfig, @@ -820,7 +820,7 @@ func TestOpenHeap(t *testing.T) { statedb.AddBalance(addr1, big.NewInt(1_000_000_000)) statedb.AddBalance(addr2, big.NewInt(1_000_000_000)) statedb.AddBalance(addr3, big.NewInt(1_000_000_000)) - statedb.Commit(true) + statedb.Commit(0, true) chain := &testBlockChain{ config: testChainConfig, @@ -899,7 +899,7 @@ func TestOpenCap(t *testing.T) { statedb.AddBalance(addr1, big.NewInt(1_000_000_000)) statedb.AddBalance(addr2, big.NewInt(1_000_000_000)) statedb.AddBalance(addr3, big.NewInt(1_000_000_000)) - statedb.Commit(true) + statedb.Commit(0, true) chain := &testBlockChain{ config: testChainConfig, @@ -1321,7 +1321,7 @@ func TestAdd(t *testing.T) { store.Put(blob) } } - statedb.Commit(true) + statedb.Commit(0, true) store.Close() // Create a blob pool out of the pre-seeded dats @@ -1394,7 +1394,7 @@ func benchmarkPoolPending(b *testing.B, datacap uint64) { statedb.AddBalance(addr, big.NewInt(1_000_000_000)) pool.add(tx) } - statedb.Commit(true) + statedb.Commit(0, true) defer pool.Close() // Benchmark assembling the pending diff --git a/core/types/hashes.go b/core/types/hashes.go new file mode 100644 index 0000000000..0ce1835b51 --- /dev/null +++ b/core/types/hashes.go @@ -0,0 +1,32 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package types + +import ( + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/log" +) + +// TrieRootHash returns the hash itself if it's non-empty or the predefined +// emptyHash one instead. +func TrieRootHash(hash common.Hash) common.Hash { + if hash == (common.Hash{}) { + log.Error("Zero trie root hash!") + return EmptyRootHash + } + return hash +} diff --git a/core/types/hashing_test.go b/core/types/hashing_test.go index 6d1ebf897c..b9df0c1e84 100644 --- a/core/types/hashing_test.go +++ b/core/types/hashing_test.go @@ -26,6 +26,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/hexutil" + "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/rlp" @@ -38,7 +39,7 @@ func TestDeriveSha(t *testing.T) { t.Fatal(err) } for len(txs) < 1000 { - exp := types.DeriveSha(txs, new(trie.Trie)) + exp := types.DeriveSha(txs, trie.NewEmpty(trie.NewDatabase(rawdb.NewMemoryDatabase(), nil))) got := types.DeriveSha(txs, trie.NewStackTrie(nil)) if !bytes.Equal(got[:], exp[:]) { t.Fatalf("%d txs: got %x exp %x", len(txs), got, exp) @@ -85,7 +86,7 @@ func BenchmarkDeriveSha200(b *testing.B) { b.ResetTimer() b.ReportAllocs() for i := 0; i < b.N; i++ { - exp = types.DeriveSha(txs, new(trie.Trie)) + exp = types.DeriveSha(txs, trie.NewEmpty(trie.NewDatabase(rawdb.NewMemoryDatabase(), nil))) } }) @@ -106,7 +107,7 @@ func TestFuzzDeriveSha(t *testing.T) { rndSeed := mrand.Int() for i := 0; i < 10; i++ { seed := rndSeed + i - exp := types.DeriveSha(newDummy(i), new(trie.Trie)) + exp := types.DeriveSha(newDummy(i), trie.NewEmpty(trie.NewDatabase(rawdb.NewMemoryDatabase(), nil))) got := types.DeriveSha(newDummy(i), trie.NewStackTrie(nil)) if !bytes.Equal(got[:], exp[:]) { printList(newDummy(seed)) @@ -134,7 +135,7 @@ func TestDerivableList(t *testing.T) { }, } for i, tc := range tcs[1:] { - exp := types.DeriveSha(flatList(tc), new(trie.Trie)) + exp := types.DeriveSha(flatList(tc), trie.NewEmpty(trie.NewDatabase(rawdb.NewMemoryDatabase(), nil))) got := types.DeriveSha(flatList(tc), trie.NewStackTrie(nil)) if !bytes.Equal(got[:], exp[:]) { t.Fatalf("case %d: got %x exp %x", i, got, exp) diff --git a/core/types/state_account.go b/core/types/state_account.go index a80a048f16..95ee1954d2 100644 --- a/core/types/state_account.go +++ b/core/types/state_account.go @@ -17,11 +17,16 @@ package types import ( + "bytes" "math/big" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/rlp" ) +var emptyCodeHash = crypto.Keccak256(nil) + // StateAccount is the Ethereum consensus representation of accounts. // These objects are stored in the main account trie. type StateAccount struct { @@ -31,6 +36,29 @@ type StateAccount struct { CodeHash []byte } +// NewEmptyStateAccount constructs an empty state account. +func NewEmptyStateAccount() *StateAccount { + return &StateAccount{ + Balance: new(big.Int), + Root: EmptyRootHash, + CodeHash: emptyCodeHash, + } +} + +// Copy returns a deep-copied state account object. +func (acct *StateAccount) Copy() *StateAccount { + var balance *big.Int + if acct.Balance != nil { + balance = new(big.Int).Set(acct.Balance) + } + return &StateAccount{ + Nonce: acct.Nonce, + Balance: balance, + Root: acct.Root, + CodeHash: common.CopyBytes(acct.CodeHash), + } +} + type DirtyStateAccount struct { Address common.Address `json:"address"` Nonce uint64 `json:"nonce"` @@ -48,3 +76,66 @@ type DirtyStateAccountsAndBlock struct { BlockHash common.Hash DirtyAccounts []*DirtyStateAccount } + +// SlimAccount is a modified version of an Account, where the root is replaced +// with a byte slice. This format can be used to represent full-consensus format +// or slim format which replaces the empty root and code hash as nil byte slice. +type SlimAccount struct { + Nonce uint64 + Balance *big.Int + Root []byte // Nil if root equals to types.EmptyRootHash + CodeHash []byte // Nil if hash equals to types.EmptyCodeHash +} + +// SlimAccountRLP encodes the state account in 'slim RLP' format. +func SlimAccountRLP(account StateAccount) []byte { + slim := SlimAccount{ + Nonce: account.Nonce, + Balance: account.Balance, + } + if account.Root != EmptyRootHash { + slim.Root = account.Root[:] + } + if !bytes.Equal(account.CodeHash, emptyCodeHash[:]) { + slim.CodeHash = account.CodeHash + } + data, err := rlp.EncodeToBytes(slim) + if err != nil { + panic(err) + + } + return data +} + +// FullAccount decodes the data on the 'slim RLP' format and return +// the consensus format account. +func FullAccount(data []byte) (*StateAccount, error) { + var slim SlimAccount + if err := rlp.DecodeBytes(data, &slim); err != nil { + return nil, err + } + var account StateAccount + account.Nonce, account.Balance = slim.Nonce, slim.Balance + + // Interpret the storage root and code hash in slim format. + if len(slim.Root) == 0 { + account.Root = EmptyRootHash + } else { + account.Root = common.BytesToHash(slim.Root) + } + if len(slim.CodeHash) == 0 { + account.CodeHash = emptyCodeHash[:] + } else { + account.CodeHash = slim.CodeHash + } + return &account, nil +} + +// FullAccountRLP converts data on the 'slim RLP' format into the full RLP-format. +func FullAccountRLP(data []byte) ([]byte, error) { + account, err := FullAccount(data) + if err != nil { + return nil, err + } + return rlp.EncodeToBytes(account) +} diff --git a/core/vote/vote_pool_test.go b/core/vote/vote_pool_test.go index 695db84cb7..3c049d4e2c 100644 --- a/core/vote/vote_pool_test.go +++ b/core/vote/vote_pool_test.go @@ -15,6 +15,7 @@ import ( wallet "github.com/ethereum/go-ethereum/accounts/bls" "github.com/ethereum/go-ethereum/crypto/bls" + "github.com/ethereum/go-ethereum/trie" "github.com/google/uuid" keystorev4 "github.com/wealdtech/go-eth2-wallet-encryptor-keystorev4" @@ -95,12 +96,13 @@ func testVotePool(t *testing.T, isValidRules bool) { // Create a database pre-initialize with a genesis block db := rawdb.NewMemoryDatabase() - genesis := (&core.Genesis{ + gspec := &core.Genesis{ Config: params.TestChainConfig, Alloc: core.GenesisAlloc{testAddr: {Balance: big.NewInt(1000000)}}, BaseFee: big.NewInt(params.InitialBaseFee), - }).MustCommit(db) - chain, _ := core.NewBlockChain(db, nil, params.TestChainConfig, ethash.NewFullFaker(), vm.Config{}, nil, nil) + } + genesis := gspec.MustCommit(db, trie.NewDatabase(db, trie.HashDefaults)) + chain, _ := core.NewBlockChain(db, nil, gspec, nil, ethash.NewFullFaker(), vm.Config{}, nil, nil) mux := new(event.TypeMux) mockEngine := &mockPOSA{} @@ -390,12 +392,13 @@ func TestVotePoolDosProtection(t *testing.T) { // Create a database pre-initialize with a genesis block db := rawdb.NewMemoryDatabase() - genesis := (&core.Genesis{ + gspec := &core.Genesis{ Config: params.TestChainConfig, Alloc: core.GenesisAlloc{testAddr: {Balance: big.NewInt(1000000)}}, BaseFee: big.NewInt(params.InitialBaseFee), - }).MustCommit(db) - chain, _ := core.NewBlockChain(db, nil, params.TestChainConfig, ethash.NewFullFaker(), vm.Config{}, nil, nil) + } + genesis := gspec.MustCommit(db, trie.NewDatabase(db, trie.HashDefaults)) + chain, _ := core.NewBlockChain(db, nil, gspec, nil, ethash.NewFullFaker(), vm.Config{}, nil, nil) bs, _ := core.GenerateChain(params.TestChainConfig, genesis, ethash.NewFaker(), db, 25, nil, true) if _, err := chain.InsertChain(bs[:1], nil); err != nil { @@ -516,12 +519,13 @@ func TestVotePoolWrongTargetNumber(t *testing.T) { // Create a database pre-initialize with a genesis block db := rawdb.NewMemoryDatabase() - genesis := (&core.Genesis{ + gspec := &core.Genesis{ Config: params.TestChainConfig, Alloc: core.GenesisAlloc{testAddr: {Balance: big.NewInt(1000000)}}, BaseFee: big.NewInt(params.InitialBaseFee), - }).MustCommit(db) - chain, _ := core.NewBlockChain(db, nil, params.TestChainConfig, ethash.NewFullFaker(), vm.Config{}, nil, nil) + } + genesis := gspec.MustCommit(db, trie.NewDatabase(db, trie.HashDefaults)) + chain, _ := core.NewBlockChain(db, nil, gspec, nil, ethash.NewFullFaker(), vm.Config{}, nil, nil) bs, _ := core.GenerateChain(params.TestChainConfig, genesis, ethash.NewFaker(), db, 1, nil, true) if _, err := chain.InsertChain(bs[:1], nil); err != nil { diff --git a/docker/chainnode/entrypoint.sh b/docker/chainnode/entrypoint.sh index b1825f88fd..85f9469451 100755 --- a/docker/chainnode/entrypoint.sh +++ b/docker/chainnode/entrypoint.sh @@ -29,6 +29,7 @@ params="" syncmode="snap" mine="true" blsParams="" +state_scheme="hash" set -e @@ -48,6 +49,11 @@ if [[ ! -z $WS_PORT ]]; then ws_port="$WS_PORT" fi +if [[ ! -z $STATE_SCHEME ]]; then + state_scheme="$STATE_SCHEME" +fi + + # networkid if [[ ! -z $NETWORK_ID ]]; then case $NETWORK_ID in @@ -78,14 +84,14 @@ fi # data dir if [[ ! -d $datadir/ronin ]]; then - echo "No blockchain data, creating genesis block." - ronin init $dbEngine --datadir $datadir $genesisPath 2> /dev/null + echo "No blockchain data, creating genesis block with $genesisPath, state_scheme $state_scheme ..." + ronin init $dbEngine --datadir $datadir --state.scheme $state_scheme $genesisPath elif [[ "$FORCE_INIT" = "true" && "$INIT_FORCE_OVERRIDE_CHAIN_CONFIG" = "true" ]]; then - echo "Forcing update chain config with force overriding chain config." - ronin init $dbEngine --overrideChainConfig --datadir $datadir $genesisPath 2> /dev/null + echo "Forcing update chain config with force overriding chain config with $genesisPath, state_scheme $state_scheme ..." + ronin init $dbEngine --overrideChainConfig --datadir $datadir --state.scheme $state_scheme $genesisPath elif [ "$FORCE_INIT" = "true" ]; then - echo "Forcing update chain config." - ronin init $dbEngine --datadir $datadir $genesisPath 2> /dev/null + echo "Forcing update chain config with $genesisPath, state_scheme $state_scheme ..." + ronin init $dbEngine --datadir $datadir --state.scheme $state_scheme $genesisPath fi # password file @@ -333,6 +339,9 @@ if [[ "$BLS_SHOW_PRIVATE_KEY" = "true" ]]; then --finality.blswalletpath $BLS_PRIVATE_KEY_DIR \ --secret fi +echo "---------------------------------" +echo "Starting the Ronin Node" +echo "---------------------------------" exec ronin $params \ --syncmode $syncmode \ diff --git a/eth/api.go b/eth/api.go index 635b6fcda0..01c59e2f1e 100644 --- a/eth/api.go +++ b/eth/api.go @@ -470,7 +470,12 @@ func (api *PrivateDebugAPI) StorageRangeAt(ctx context.Context, blockHash common } func storageRangeAt(st state.Trie, start []byte, maxResult int) (StorageRangeResult, error) { - it := trie.NewIterator(st.NodeIterator(start)) + + trieIt, err := st.NodeIterator(start) + if err != nil { + return StorageRangeResult{}, err + } + it := trie.NewIterator(trieIt) result := StorageRangeResult{Storage: storageMap{}} for i := 0; i < maxResult && it.Next(); i++ { _, content, _, err := rlp.Split(it.Value) @@ -551,17 +556,25 @@ func (api *PrivateDebugAPI) getModifiedAccounts(startBlock, endBlock *types.Bloc if startBlock.Number().Uint64() >= endBlock.Number().Uint64() { return nil, fmt.Errorf("start block height (%d) must be less than end block height (%d)", startBlock.Number().Uint64(), endBlock.Number().Uint64()) } - triedb := api.eth.BlockChain().StateCache().TrieDB() + triedb := api.eth.BlockChain().TrieDB() - oldTrie, err := trie.NewSecure(startBlock.Root(), triedb) + oldTrie, err := trie.NewSecure(trie.StateTrieID(startBlock.Root()), triedb) + if err != nil { + return nil, err + } + newTrie, err := trie.NewSecure(trie.StateTrieID(endBlock.Root()), triedb) + if err != nil { + return nil, err + } + oldIt, err := oldTrie.NodeIterator([]byte{}) if err != nil { return nil, err } - newTrie, err := trie.NewSecure(endBlock.Root(), triedb) + newIt, err := newTrie.NodeIterator([]byte{}) if err != nil { return nil, err } - diff, _ := trie.NewDifferenceIterator(oldTrie.NodeIterator([]byte{}), newTrie.NodeIterator([]byte{})) + diff, _ := trie.NewDifferenceIterator(oldIt, newIt) iter := trie.NewIterator(diff) var dirty []common.Address diff --git a/eth/api_backend.go b/eth/api_backend.go index 93637e7c17..60a1869e76 100644 --- a/eth/api_backend.go +++ b/eth/api_backend.go @@ -168,7 +168,10 @@ func (b *EthAPIBackend) StateAndHeaderByNumber(ctx context.Context, number rpc.B return nil, nil, errors.New("header not found") } stateDb, err := b.eth.BlockChain().StateAt(header.Root) - return stateDb, header, err + if err != nil { + return nil, nil, err + } + return stateDb, header, nil } func (b *EthAPIBackend) StateAndHeaderByNumberOrHash(ctx context.Context, blockNrOrHash rpc.BlockNumberOrHash) (*state.StateDB, *types.Header, error) { @@ -187,7 +190,10 @@ func (b *EthAPIBackend) StateAndHeaderByNumberOrHash(ctx context.Context, blockN return nil, nil, errors.New("hash is not currently canonical") } stateDb, err := b.eth.BlockChain().StateAt(header.Root) - return stateDb, header, err + if err != nil { + return nil, nil, err + } + return stateDb, header, nil } return nil, nil, errors.New("invalid arguments; neither block nor hash specified") } @@ -417,7 +423,7 @@ func (b *EthAPIBackend) StartMining(threads int) error { } func (b *EthAPIBackend) StateAtBlock(ctx context.Context, block *types.Block, reexec uint64, base *state.StateDB, checkLive, preferDisk bool) (*state.StateDB, tracers.StateReleaseFunc, error) { - return b.eth.StateAtBlock(ctx, block, reexec, base, checkLive, preferDisk) + return b.eth.stateAtBlock(ctx, block, reexec, base, checkLive, preferDisk) } func (b *EthAPIBackend) StateAtTransaction(ctx context.Context, block *types.Block, txIndex int, reexec uint64) (core.Message, vm.BlockContext, *state.StateDB, tracers.StateReleaseFunc, error) { diff --git a/eth/api_test.go b/eth/api_test.go index 39a1d58460..01b5d1b2b3 100644 --- a/eth/api_test.go +++ b/eth/api_test.go @@ -28,7 +28,9 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/state" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/trie" ) var dumper = spew.ConfigState{Indent: " "} @@ -66,34 +68,34 @@ func TestAccountRange(t *testing.T) { t.Parallel() var ( - statedb = state.NewDatabaseWithConfig(rawdb.NewMemoryDatabase(), nil) - state, _ = state.New(common.Hash{}, statedb, nil) - addrs = [AccountRangeMaxResults * 2]common.Address{} - m = map[common.Address]bool{} + statedb = state.NewDatabaseWithConfig(rawdb.NewMemoryDatabase(), &trie.Config{Preimages: true}) + sdb, _ = state.New(types.EmptyRootHash, statedb, nil) + addrs = [AccountRangeMaxResults * 2]common.Address{} + m = map[common.Address]bool{} ) for i := range addrs { hash := common.HexToHash(fmt.Sprintf("%x", i)) addr := common.BytesToAddress(crypto.Keccak256Hash(hash.Bytes()).Bytes()) addrs[i] = addr - state.SetBalance(addrs[i], big.NewInt(1)) + sdb.SetBalance(addrs[i], big.NewInt(1)) if _, ok := m[addr]; ok { t.Fatalf("bad") } else { m[addr] = true } } - state.Commit(true) - root := state.IntermediateRoot(true) + root, _ := sdb.Commit(0, true) + sdb, _ = state.New(root, statedb, nil) trie, err := statedb.OpenTrie(root) if err != nil { t.Fatal(err) } - accountRangeTest(t, &trie, state, common.Hash{}, AccountRangeMaxResults/2, AccountRangeMaxResults/2) + accountRangeTest(t, &trie, sdb, common.Hash{}, AccountRangeMaxResults/2, AccountRangeMaxResults/2) // test pagination - firstResult := accountRangeTest(t, &trie, state, common.Hash{}, AccountRangeMaxResults, AccountRangeMaxResults) - secondResult := accountRangeTest(t, &trie, state, common.BytesToHash(firstResult.Next), AccountRangeMaxResults, AccountRangeMaxResults) + firstResult := accountRangeTest(t, &trie, sdb, common.Hash{}, AccountRangeMaxResults, AccountRangeMaxResults) + secondResult := accountRangeTest(t, &trie, sdb, common.BytesToHash(firstResult.Next), AccountRangeMaxResults, AccountRangeMaxResults) hList := make(resultHash, 0) for addr1 := range firstResult.Accounts { @@ -111,7 +113,7 @@ func TestAccountRange(t *testing.T) { // set and get an even split between the first and second sets. sort.Sort(hList) middleH := hList[AccountRangeMaxResults/2] - middleResult := accountRangeTest(t, &trie, state, middleH, AccountRangeMaxResults, AccountRangeMaxResults) + middleResult := accountRangeTest(t, &trie, sdb, middleH, AccountRangeMaxResults, AccountRangeMaxResults) missing, infirst, insecond := 0, 0, 0 for h := range middleResult.Accounts { if _, ok := firstResult.Accounts[h]; ok { @@ -140,8 +142,9 @@ func TestEmptyAccountRange(t *testing.T) { statedb = state.NewDatabase(rawdb.NewMemoryDatabase()) st, _ = state.New(common.Hash{}, statedb, nil) ) - st.Commit(true) - st.IntermediateRoot(true) + // Commit(although nothing to flush) and re-init the statedb + st.Commit(0, true) + st, _ = state.New(types.EmptyRootHash, statedb, nil) results := st.IteratorDump(&state.DumpConfig{ SkipCode: true, SkipStorage: true, diff --git a/eth/backend.go b/eth/backend.go index 52f1b0f6bf..cc6e36b331 100644 --- a/eth/backend.go +++ b/eth/backend.go @@ -140,14 +140,12 @@ func New(stack *node.Node, config *ethconfig.Config) (*Ethereum, error) { if err != nil { return nil, err } - chainConfig, genesisHash, genesisErr := core.SetupGenesisBlockWithOverride(chainDb, config.Genesis, config.OverrideArrowGlacier, false) - if _, ok := genesisErr.(*params.ConfigCompatError); genesisErr != nil && !ok { - return nil, genesisErr - } - log.Info("Initialised chain configuration", "config", chainConfig) - if err := pruner.RecoverPruning(stack.ResolvePath(""), chainDb, stack.ResolvePath(config.TrieCleanCacheJournal)); err != nil { - log.Error("Failed to recover state", "error", err) + // Recover the pruning data only in hash scheme + if config.StateScheme == rawdb.HashScheme { + if err := pruner.RecoverPruning(stack.ResolvePath(""), chainDb); err != nil { + log.Error("Failed to recover state", "error", err) + } } eth := &Ethereum{ config: config, @@ -167,7 +165,11 @@ func New(stack *node.Node, config *ethconfig.Config) (*Ethereum, error) { log.Info("Unprotected transactions allowed") } ethAPI := ethapi.NewPublicBlockChainAPI(eth.APIBackend) - eth.engine = ethconfig.CreateConsensusEngine(stack, chainConfig, ðashConfig, config.Miner.Notify, config.Miner.Noverify, + loadedChainConfig, err := core.LoadChainConfig(chainDb, config.Genesis) + if err != nil { + return nil, err + } + eth.engine = ethconfig.CreateConsensusEngine(stack, loadedChainConfig, ðashConfig, config.Miner.Notify, config.Miner.Noverify, chainDb, ethAPI, config.SyncMode) bcVersion := rawdb.ReadDatabaseVersion(chainDb) @@ -193,8 +195,6 @@ func New(stack *node.Node, config *ethconfig.Config) (*Ethereum, error) { } cacheConfig = &core.CacheConfig{ TrieCleanLimit: config.TrieCleanCache, - TrieCleanJournal: stack.ResolvePath(config.TrieCleanCacheJournal), - TrieCleanRejournal: config.TrieCleanCacheRejournal, TrieCleanNoPrefetch: config.NoPrefetch, TrieDirtyLimit: config.TrieDirtyCache, TrieDirtyDisabled: config.NoPruning, @@ -202,12 +202,17 @@ func New(stack *node.Node, config *ethconfig.Config) (*Ethereum, error) { SnapshotLimit: config.SnapshotCache, Preimages: config.Preimages, TriesInMemory: config.TriesInMemory, + StateHistory: config.StateHistory, + StateScheme: config.StateScheme, } ) - eth.blockchain, err = core.NewBlockChain(chainDb, cacheConfig, chainConfig, eth.engine, vmConfig, eth.shouldPreserve, &config.TxLookupLimit) + eth.blockchain, err = core.NewBlockChain(chainDb, cacheConfig, config.Genesis, config.OverrideArrowGlacier, eth.engine, vmConfig, eth.shouldPreserve, &config.TransactionHistory) if err != nil { return nil, err } + chainConfig := eth.blockchain.Config() + genesisHash := eth.blockchain.Genesis().Hash() + if config.EnableMonitorDoubleSign { go eth.blockchain.StartDoubleSignMonitor() } @@ -219,13 +224,6 @@ func New(stack *node.Node, config *ethconfig.Config) (*Ethereum, error) { } StartENRFilter(eth.blockchain, eth.p2pServer) - - // Rewind the chain in case of an incompatible config upgrade. - if compat, ok := genesisErr.(*params.ConfigCompatError); ok { - log.Warn("Rewinding chain to upgrade configuration", "err", compat) - eth.blockchain.SetHead(compat.RewindTo) - rawdb.WriteChainConfig(chainDb, genesisHash, chainConfig) - } eth.bloomIndexer.Start(eth.blockchain) if config.BlobPool.Datadir != "" { @@ -607,7 +605,7 @@ func (s *Ethereum) StartMining(threads int) error { } // If mining is started, we can disable the transaction rejection mechanism // introduced to speed sync times. - atomic.StoreUint32(&s.handler.acceptTxs, 1) + s.handler.enableSyncedFeatures() go s.miner.Start(eb) } @@ -639,7 +637,8 @@ func (s *Ethereum) Engine() consensus.Engine { return s.engine } func (s *Ethereum) ChainDb() ethdb.Database { return s.chainDb } func (s *Ethereum) IsListening() bool { return true } // Always listening func (s *Ethereum) Downloader() *downloader.Downloader { return s.handler.downloader } -func (s *Ethereum) Synced() bool { return atomic.LoadUint32(&s.handler.acceptTxs) == 1 } +func (s *Ethereum) Synced() bool { return atomic.LoadUint32(&s.handler.synced) == 1 } +func (s *Ethereum) SetSynced() { s.handler.enableSyncedFeatures() } func (s *Ethereum) ArchiveMode() bool { return s.config.NoPruning } func (s *Ethereum) BloomIndexer() *core.ChainIndexer { return s.bloomIndexer } diff --git a/eth/catalyst/api_test.go b/eth/catalyst/api_test.go index b09b3af1db..990a1d30ba 100644 --- a/eth/catalyst/api_test.go +++ b/eth/catalyst/api_test.go @@ -29,6 +29,7 @@ import ( "github.com/ethereum/go-ethereum/eth/ethconfig" "github.com/ethereum/go-ethereum/node" "github.com/ethereum/go-ethereum/params" + "github.com/ethereum/go-ethereum/trie" ) var ( @@ -55,8 +56,9 @@ func generateTestChain() (*core.Genesis, []*types.Block) { g.OffsetTime(5) g.SetExtra([]byte("test")) } - gblock := genesis.ToBlock(db) + gblock := genesis.ToBlock() engine := ethash.NewFaker() + genesis.MustCommit(db, trie.NewDatabase(db, trie.HashDefaults)) blocks, _ := core.GenerateChain(config, gblock, engine, db, 10, generate, true) blocks = append([]*types.Block{gblock}, blocks...) return genesis, blocks @@ -100,7 +102,7 @@ func generateTestChainWithFork(n int, fork int) (*core.Genesis, []*types.Block, g.OffsetTime(5) g.SetExtra([]byte("testF")) } - gblock := genesis.ToBlock(db) + gblock := genesis.MustCommit(db) engine := ethash.NewFaker() blocks, _ := core.GenerateChain(config, gblock, engine, db, n, generate) blocks = append([]*types.Block{gblock}, blocks...) diff --git a/eth/downloader/downloader.go b/eth/downloader/downloader.go index 4623205baa..e7a9502e03 100644 --- a/eth/downloader/downloader.go +++ b/eth/downloader/downloader.go @@ -202,6 +202,10 @@ type BlockChain interface { // Snapshots returns the blockchain snapshot tree to paused it during sync. Snapshots() *snapshot.Tree + + // TrieDB retrieves the low level trie database used for interacting + // with the trie nodes. + TrieDB() *trie.Database } // New creates a new downloader to fetch hashes and blocks from remote peers. @@ -230,7 +234,7 @@ func New(checkpoint uint64, stateDb ethdb.Database, stateBloom *trie.SyncBloom, headerProcCh: make(chan []*types.Header, 1), quitCh: make(chan struct{}), stateCh: make(chan dataPack), - SnapSyncer: snap.NewSyncer(stateDb), + SnapSyncer: snap.NewSyncer(stateDb, chain.TrieDB().Scheme()), stateSyncStart: make(chan *stateSync), syncStatsState: stateSyncStats{ processed: rawdb.ReadFastTrieProgress(stateDb), @@ -379,6 +383,15 @@ func (d *Downloader) synchronise(id string, hash common.Hash, td *big.Int, mode // but until snap becomes prevalent, we should support both. TODO(karalabe). if mode == SnapSync { if !d.snapSync { + // Snap sync will directly modify the persistent state, making the entire + // trie database unusable until the state is fully synced. To prevent any + // subsequent state reads, explicitly disable the trie database and state + // syncer is responsible to address and correct any state missing. + if d.blockchain.TrieDB().Scheme() == rawdb.PathScheme { + if err := d.blockchain.TrieDB().Disable(); err != nil { + return err + } + } // Snap sync uses the snapshot namespace to store potentially flakey data until // sync completely heals and finishes. Pause snapshot maintenance in the mean // time to prevent access. @@ -1773,17 +1786,30 @@ func (d *Downloader) processFastSyncContent() error { // To cater for moving pivot points, track the pivot block and subsequently // accumulated download results separately. + // + // These will be nil up to the point where we reach the pivot, and will only + // be set temporarily if the synced blocks are piling up, but the pivot is + // still busy downloading. In that case, we need to occasionally check for + // pivot moves, so need to unblock the loop. These fields will accumulate + // the results in the meantime. + // + // Note, there's no issue with memory piling up since after 64 blocks the + // pivot will forcefully move so these accumulators will be dropped. var ( oldPivot *fetchResult // Locked in pivot block, might change eventually oldTail []*fetchResult // Downloaded content after the pivot ) for { - // Wait for the next batch of downloaded data to be available, and if the pivot - // block became stale, move the goalpost - results := d.queue.Results(oldPivot == nil) // Block if we're not monitoring pivot staleness + // Wait for the next batch of downloaded data to be available. If we have + // not yet reached the pivot point, wait blockingly as there's no need to + // spin-loop check for pivot moves. If we reached the pivot but have not + // yet processed it, check for results async, so we might notice pivot + // moves while state syncing. If the pivot was passed fully, block again + // as there's no more reason to check for pivot moves at all. + results := d.queue.Results(oldPivot == nil) if len(results) == 0 { // If pivot sync is done, stop - if oldPivot == nil { + if atomic.LoadInt32(&d.committed) == 1 { return sync.Cancel() } // If sync failed, stop @@ -1803,21 +1829,23 @@ func (d *Downloader) processFastSyncContent() error { pivot := d.pivotHeader d.pivotLock.RUnlock() - if oldPivot == nil { - if pivot.Root != sync.root { - sync.Cancel() - sync = d.syncState(pivot.Root) + if oldPivot == nil { // no results piling up, we can move the pivot + if atomic.LoadInt32(&d.committed) == 0 { // not yet passed the pivot, we can move the pivot + if pivot.Root != sync.root { // pivot position changed, we can move the pivot + sync.Cancel() + sync = d.syncState(pivot.Root) - go closeOnErr(sync) + go closeOnErr(sync) + } } - } else { + } else { // results already piled up, consume before handling pivot move results = append(append([]*fetchResult{oldPivot}, oldTail...), results...) } // Split around the pivot block and process the two sides via fast/full sync if atomic.LoadInt32(&d.committed) == 0 { latest := results[len(results)-1].Header // If the height is above the pivot block by 2 sets, it means the pivot - // become stale in the network and it was garbage collected, move to a + // become stale in the network, and it was garbage collected, move to a // new pivot. // // Note, we have `reorgProtHeaderDelay` number of blocks withheld, Those diff --git a/eth/downloader/downloader_test.go b/eth/downloader/downloader_test.go index 7dce33950f..8c660d0e86 100644 --- a/eth/downloader/downloader_test.go +++ b/eth/downloader/downloader_test.go @@ -48,11 +48,11 @@ func init() { // downloadTester is a test simulator for mocking out local block chain. type downloadTester struct { downloader *Downloader - - genesis *types.Block // Genesis blocks used by the tester and peers - stateDb ethdb.Database // Database used by the tester for syncing from peers - peerDb ethdb.Database // Database of the peers containing all data - peers map[string]*downloadTesterPeer + triedb *trie.Database + genesis *types.Block // Genesis blocks used by the tester and peers + stateDb ethdb.Database // Database used by the tester for syncing from peers + peerDb ethdb.Database // Database of the peers containing all data + peers map[string]*downloadTesterPeer ownHashes []common.Hash // Hash chain belonging to the tester ownHeaders map[common.Hash]*types.Header // Headers belonging to the tester @@ -88,11 +88,16 @@ func newTester() *downloadTester { } tester.stateDb = rawdb.NewMemoryDatabase() tester.stateDb.Put(testGenesis.Root().Bytes(), []byte{0x00}) + tester.triedb = trie.NewDatabase(tester.stateDb, nil) tester.downloader = New(0, tester.stateDb, trie.NewSyncBloom(1, tester.stateDb), new(event.TypeMux), tester, nil, tester.dropPeer, tester.verifyBlobHeader) return tester } +func (dl *downloadTester) TrieDB() *trie.Database { + return dl.triedb +} + // terminate aborts any operations on the embedded downloader and releases all // held resources. func (dl *downloadTester) terminate() { @@ -229,7 +234,7 @@ func (dl *downloadTester) CurrentFastBlock() *types.Block { func (dl *downloadTester) FastSyncCommitHead(hash common.Hash) error { // For now only check that the state trie is correct if block := dl.GetBlockByHash(hash); block != nil { - _, err := trie.NewSecure(block.Root(), trie.NewDatabase(dl.stateDb)) + _, err := trie.NewSecure(trie.StateTrieID(block.Root()), trie.NewDatabase(dl.stateDb, nil)) return err } return fmt.Errorf("non existent block: %x", hash[:4]) diff --git a/eth/downloader/queue_test.go b/eth/downloader/queue_test.go index 481bfe0431..7d3e9bb788 100644 --- a/eth/downloader/queue_test.go +++ b/eth/downloader/queue_test.go @@ -27,23 +27,17 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/consensus/ethash" "github.com/ethereum/go-ethereum/core" - "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/params" ) -var ( - testdb = rawdb.NewMemoryDatabase() - genesis = core.GenesisBlockForTesting(testdb, testAddress, big.NewInt(1000000000000000)) -) - // makeChain creates a chain of n blocks starting at and including parent. // the returned hash chain is ordered head->parent. In addition, every 3rd block // contains a transaction and every 5th an uncle to allow testing correct block // reassembly. func makeChain(n int, seed byte, parent *types.Block, empty bool) ([]*types.Block, []types.Receipts) { - blocks, receipts := core.GenerateChain(params.TestChainConfig, parent, ethash.NewFaker(), testdb, n, func(i int, block *core.BlockGen) { + blocks, receipts := core.GenerateChain(params.TestChainConfig, parent, ethash.NewFaker(), testDB, n, func(i int, block *core.BlockGen) { block.SetCoinbase(common.Address{seed}) // Add one tx to every secondblock if !empty && i%2 == 0 { @@ -69,10 +63,10 @@ var emptyChain *chainData func init() { // Create a chain of blocks to import targetBlocks := 128 - blocks, _ := makeChain(targetBlocks, 0, genesis, false) + blocks, _ := makeChain(targetBlocks, 0, testGenesis, false) chain = &chainData{blocks, 0} - blocks, _ = makeChain(targetBlocks, 0, genesis, true) + blocks, _ = makeChain(targetBlocks, 0, testGenesis, true) emptyChain = &chainData{blocks, 0} } @@ -261,7 +255,7 @@ func TestEmptyBlocks(t *testing.T) { // some more advanced scenarios func XTestDelivery(t *testing.T) { // the outside network, holding blocks - blo, rec := makeChain(128, 0, genesis, false) + blo, rec := makeChain(128, 0, testGenesis, false) world := newNetwork() world.receipts = rec world.chain = blo diff --git a/eth/downloader/statesync.go b/eth/downloader/statesync.go index 6c53e5577a..c50ed367b0 100644 --- a/eth/downloader/statesync.go +++ b/eth/downloader/statesync.go @@ -35,7 +35,7 @@ import ( // a single data retrieval network packet. type stateReq struct { nItems uint16 // Number of items requested for download (max is 384, so uint16 is sufficient) - trieTasks map[common.Hash]*trieTask // Trie node download tasks to track previous attempts + trieTasks map[string]*trieTask // Trie node download tasks to track previous attempts codeTasks map[common.Hash]*codeTask // Byte code download tasks to track previous attempts timeout time.Duration // Maximum round trip time for this to complete timer *time.Timer // Timer to fire when the RTT timeout expires @@ -264,7 +264,7 @@ type stateSync struct { sched *trie.Sync // State trie sync scheduler defining the tasks keccak crypto.KeccakState // Keccak256 hasher to verify deliveries with - trieTasks map[common.Hash]*trieTask // Set of trie node tasks currently queued for retrieval + trieTasks map[string]*trieTask // Set of trie node tasks currently queued for retrieval codeTasks map[common.Hash]*codeTask // Set of byte code tasks currently queued for retrieval numUncommitted int @@ -282,6 +282,7 @@ type stateSync struct { // trieTask represents a single trie node download task, containing a set of // peers already attempted retrieval from to detect stalled syncs and abort. type trieTask struct { + hash common.Hash path [][]byte attempts map[string]struct{} } @@ -295,12 +296,15 @@ type codeTask struct { // newStateSync creates a new state trie download scheduler. This method does not // yet start the sync. The user needs to call run to initiate. func newStateSync(d *Downloader, root common.Hash) *stateSync { + // Hack the node scheme here. It's a dead code is not used + // by light client at all. Just aim for passing tests. + scheme := trie.NewDatabase(rawdb.NewMemoryDatabase(), nil).Scheme() return &stateSync{ d: d, root: root, - sched: state.NewStateSync(root, d.stateDB, d.stateBloom, nil), + sched: state.NewStateSync(root, d.stateDB, d.stateBloom, nil, scheme), keccak: sha3.NewLegacyKeccak256().(crypto.KeccakState), - trieTasks: make(map[common.Hash]*trieTask), + trieTasks: make(map[string]*trieTask), codeTasks: make(map[common.Hash]*codeTask), deliver: make(chan *stateReq), cancel: make(chan struct{}), @@ -456,10 +460,11 @@ func (s *stateSync) assignTasks() { func (s *stateSync) fillTasks(n int, req *stateReq) (nodes []common.Hash, paths []trie.SyncPath, codes []common.Hash) { // Refill available tasks from the scheduler. if fill := n - (len(s.trieTasks) + len(s.codeTasks)); fill > 0 { - nodes, paths, codes := s.sched.Missing(fill) - for i, hash := range nodes { - s.trieTasks[hash] = &trieTask{ - path: paths[i], + paths, hashes, codes := s.sched.Missing(fill) + for i, path := range paths { + s.trieTasks[path] = &trieTask{ + hash: hashes[i], + path: trie.NewSyncPath([]byte(path)), attempts: make(map[string]struct{}), } } @@ -475,7 +480,7 @@ func (s *stateSync) fillTasks(n int, req *stateReq) (nodes []common.Hash, paths paths = make([]trie.SyncPath, 0, n) codes = make([]common.Hash, 0, n) - req.trieTasks = make(map[common.Hash]*trieTask, n) + req.trieTasks = make(map[string]*trieTask, n) req.codeTasks = make(map[common.Hash]*codeTask, n) for hash, t := range s.codeTasks { @@ -493,7 +498,7 @@ func (s *stateSync) fillTasks(n int, req *stateReq) (nodes []common.Hash, paths req.codeTasks[hash] = t delete(s.codeTasks, hash) } - for hash, t := range s.trieTasks { + for path, t := range s.trieTasks { // Stop when we've gathered enough requests if len(nodes)+len(codes) == n { break @@ -505,11 +510,11 @@ func (s *stateSync) fillTasks(n int, req *stateReq) (nodes []common.Hash, paths // Assign the request to this peer t.attempts[req.peer.id] = struct{}{} - nodes = append(nodes, hash) + nodes = append(nodes, t.hash) paths = append(paths, t.path) - req.trieTasks[hash] = t - delete(s.trieTasks, hash) + req.trieTasks[path] = t + delete(s.trieTasks, path) } req.nItems = uint16(len(nodes) + len(codes)) return nodes, paths, codes @@ -531,7 +536,7 @@ func (s *stateSync) process(req *stateReq) (int, error) { // Iterate over all the delivered data and inject one-by-one into the trie for _, blob := range req.response { - hash, err := s.processNodeData(blob) + hash, err := s.processNodeData(req.trieTasks, req.codeTasks, blob) switch err { case nil: s.numUncommitted++ @@ -544,13 +549,10 @@ func (s *stateSync) process(req *stateReq) (int, error) { default: return successful, fmt.Errorf("invalid state node %s: %v", hash.TerminalString(), err) } - // Delete from both queues (one delivery is enough for the syncer) - delete(req.trieTasks, hash) - delete(req.codeTasks, hash) } // Put unfulfilled tasks back into the retry queue npeers := s.d.peers.Len() - for hash, task := range req.trieTasks { + for path, task := range req.trieTasks { // If the node did deliver something, missing items may be due to a protocol // limit or a previous timeout + delayed delivery. Both cases should permit // the node to retry the missing items (to avoid single-peer stalls). @@ -560,10 +562,10 @@ func (s *stateSync) process(req *stateReq) (int, error) { // If we've requested the node too many times already, it may be a malicious // sync where nobody has the right data. Abort. if len(task.attempts) >= npeers { - return successful, fmt.Errorf("trie node %s failed with all peers (%d tries, %d peers)", hash.TerminalString(), len(task.attempts), npeers) + return successful, fmt.Errorf("trie node %s failed with all peers (%d tries, %d peers)", task.hash.TerminalString(), len(task.attempts), npeers) } // Missing item, place into the retry queue. - s.trieTasks[hash] = task + s.trieTasks[path] = task } for hash, task := range req.codeTasks { // If the node did deliver something, missing items may be due to a protocol @@ -586,13 +588,35 @@ func (s *stateSync) process(req *stateReq) (int, error) { // processNodeData tries to inject a trie node data blob delivered from a remote // peer into the state trie, returning whether anything useful was written or any // error occurred. -func (s *stateSync) processNodeData(blob []byte) (common.Hash, error) { - res := trie.SyncResult{Data: blob} +// +// If multiple requests correspond to the same hash, this method will inject the +// blob as a result for the first one only, leaving the remaining duplicates to +// be fetched again. +func (s *stateSync) processNodeData(nodeTasks map[string]*trieTask, codeTasks map[common.Hash]*codeTask, blob []byte) (common.Hash, error) { + var hash common.Hash s.keccak.Reset() s.keccak.Write(blob) - s.keccak.Read(res.Hash[:]) - err := s.sched.Process(res) - return res.Hash, err + s.keccak.Read(hash[:]) + + if _, present := codeTasks[hash]; present { + err := s.sched.ProcessCode(trie.CodeSyncResult{ + Hash: hash, + Data: blob, + }) + delete(codeTasks, hash) + return hash, err + } + for path, task := range nodeTasks { + if task.hash == hash { + err := s.sched.ProcessNode(trie.NodeSyncResult{ + Path: path, + Data: blob, + }) + delete(nodeTasks, path) + return hash, err + } + } + return common.Hash{}, trie.ErrNotRequested } // updateStats bumps the various state sync progress counters and displays a log diff --git a/eth/downloader/testchain_test.go b/eth/downloader/testchain_test.go index 485bbdc54a..2f081a080f 100644 --- a/eth/downloader/testchain_test.go +++ b/eth/downloader/testchain_test.go @@ -28,6 +28,7 @@ import ( "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/params" + "github.com/ethereum/go-ethereum/trie" ) // Test chain parameters. @@ -35,7 +36,12 @@ var ( testKey, _ = crypto.HexToECDSA("b71c71a67e1177ad4e901695e1b4b9ee17ae16c6668d313eac2f96dbcda3f291") testAddress = crypto.PubkeyToAddress(testKey.PublicKey) testDB = rawdb.NewMemoryDatabase() - testGenesis = core.GenesisBlockForTesting(testDB, testAddress, big.NewInt(1000000000000000)) + + testGspec = core.Genesis{ + Alloc: core.GenesisAlloc{testAddress: {Balance: big.NewInt(1000000000000000)}}, + BaseFee: big.NewInt(params.InitialBaseFee), + } + testGenesis = testGspec.MustCommit(testDB, trie.NewDatabase(testDB, nil)) ) // The common prefix of all test chains: diff --git a/eth/ethconfig/config.go b/eth/ethconfig/config.go index 5e64069127..d0e8d22093 100644 --- a/eth/ethconfig/config.go +++ b/eth/ethconfig/config.go @@ -33,6 +33,7 @@ import ( "github.com/ethereum/go-ethereum/consensus/clique" "github.com/ethereum/go-ethereum/consensus/ethash" "github.com/ethereum/go-ethereum/core" + "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/txpool/blobpool" "github.com/ethereum/go-ethereum/core/txpool/legacypool" "github.com/ethereum/go-ethereum/eth/downloader" @@ -76,17 +77,18 @@ var Defaults = Config{ DatasetsOnDisk: 2, DatasetsLockMmap: false, }, - NetworkId: 1, - TxLookupLimit: 2350000, - LightPeers: 100, - UltraLightFraction: 75, - DatabaseCache: 512, - TrieCleanCache: 154, - TrieCleanCacheJournal: "triecache", - TrieCleanCacheRejournal: 60 * time.Minute, - TrieDirtyCache: 256, - TrieTimeout: 60 * time.Minute, - SnapshotCache: 102, + NetworkId: 1, + TxLookupLimit: 2350000, + TransactionHistory: 2350000, + StateHistory: params.FullImmutabilityThreshold, + StateScheme: rawdb.HashScheme, + LightPeers: 100, + UltraLightFraction: 75, + DatabaseCache: 512, + TrieCleanCache: 154, + TrieDirtyCache: 256, + TrieTimeout: 60 * time.Minute, + SnapshotCache: 102, Miner: miner.Config{ GasCeil: 8000000, GasPrice: big.NewInt(params.GWei), @@ -143,8 +145,13 @@ type Config struct { NoPruning bool // Whether to disable pruning and flush everything to disk NoPrefetch bool // Whether to disable prefetching and only load state on demand + // Deprecated, use 'TransactionHistory' instead. TxLookupLimit uint64 `toml:",omitempty"` // The maximum number of blocks from head whose tx indices are reserved. + TransactionHistory uint64 `toml:",omitempty"` // The maximum number of blocks from head whose tx indices are reserved. + StateHistory uint64 `toml:",omitempty"` // The maximum number of blocks from head whose state histories are reserved. + StateScheme string `toml:",omitempty"` // State scheme used to store ethereum state and merkle trie nodes on top + // Whitelist of required block number -> hash values to accept Whitelist map[uint64]common.Hash `toml:"-"` diff --git a/eth/ethconfig/gen_config.go b/eth/ethconfig/gen_config.go index 6e1302bc88..3f86095dae 100644 --- a/eth/ethconfig/gen_config.go +++ b/eth/ethconfig/gen_config.go @@ -27,6 +27,9 @@ func (c Config) MarshalTOML() (interface{}, error) { NoPruning bool NoPrefetch bool TxLookupLimit uint64 `toml:",omitempty"` + TransactionHistory uint64 `toml:",omitempty"` + StateHistory uint64 `toml:",omitempty"` + StateScheme string `toml:",omitempty"` Whitelist map[uint64]common.Hash `toml:"-"` LightServ int `toml:",omitempty"` LightIngress int `toml:",omitempty"` @@ -43,8 +46,6 @@ func (c Config) MarshalTOML() (interface{}, error) { DatabaseCache int DatabaseFreezer string TrieCleanCache int - TrieCleanCacheJournal string `toml:",omitempty"` - TrieCleanCacheRejournal time.Duration `toml:",omitempty"` TrieDirtyCache int TrieTimeout time.Duration SnapshotCache int @@ -71,6 +72,9 @@ func (c Config) MarshalTOML() (interface{}, error) { enc.NoPruning = c.NoPruning enc.NoPrefetch = c.NoPrefetch enc.TxLookupLimit = c.TxLookupLimit + enc.TransactionHistory = c.TransactionHistory + enc.StateHistory = c.StateHistory + enc.StateScheme = c.StateScheme enc.Whitelist = c.Whitelist enc.LightServ = c.LightServ enc.LightIngress = c.LightIngress @@ -87,8 +91,6 @@ func (c Config) MarshalTOML() (interface{}, error) { enc.DatabaseCache = c.DatabaseCache enc.DatabaseFreezer = c.DatabaseFreezer enc.TrieCleanCache = c.TrieCleanCache - enc.TrieCleanCacheJournal = c.TrieCleanCacheJournal - enc.TrieCleanCacheRejournal = c.TrieCleanCacheRejournal enc.TrieDirtyCache = c.TrieDirtyCache enc.TrieTimeout = c.TrieTimeout enc.SnapshotCache = c.SnapshotCache @@ -119,6 +121,9 @@ func (c *Config) UnmarshalTOML(unmarshal func(interface{}) error) error { NoPruning *bool NoPrefetch *bool TxLookupLimit *uint64 `toml:",omitempty"` + TransactionHistory *uint64 `toml:",omitempty"` + StateHistory *uint64 `toml:",omitempty"` + StateScheme *string `toml:",omitempty"` Whitelist map[uint64]common.Hash `toml:"-"` LightServ *int `toml:",omitempty"` LightIngress *int `toml:",omitempty"` @@ -135,8 +140,6 @@ func (c *Config) UnmarshalTOML(unmarshal func(interface{}) error) error { DatabaseCache *int DatabaseFreezer *string TrieCleanCache *int - TrieCleanCacheJournal *string `toml:",omitempty"` - TrieCleanCacheRejournal *time.Duration `toml:",omitempty"` TrieDirtyCache *int TrieTimeout *time.Duration SnapshotCache *int @@ -182,6 +185,15 @@ func (c *Config) UnmarshalTOML(unmarshal func(interface{}) error) error { if dec.TxLookupLimit != nil { c.TxLookupLimit = *dec.TxLookupLimit } + if dec.TransactionHistory != nil { + c.TransactionHistory = *dec.TransactionHistory + } + if dec.StateHistory != nil { + c.StateHistory = *dec.StateHistory + } + if dec.StateScheme != nil { + c.StateScheme = *dec.StateScheme + } if dec.Whitelist != nil { c.Whitelist = dec.Whitelist } @@ -230,12 +242,6 @@ func (c *Config) UnmarshalTOML(unmarshal func(interface{}) error) error { if dec.TrieCleanCache != nil { c.TrieCleanCache = *dec.TrieCleanCache } - if dec.TrieCleanCacheJournal != nil { - c.TrieCleanCacheJournal = *dec.TrieCleanCacheJournal - } - if dec.TrieCleanCacheRejournal != nil { - c.TrieCleanCacheRejournal = *dec.TrieCleanCacheRejournal - } if dec.TrieDirtyCache != nil { c.TrieDirtyCache = *dec.TrieDirtyCache } diff --git a/eth/fetcher/block_fetcher_test.go b/eth/fetcher/block_fetcher_test.go index 29b3b03788..bfe5269b11 100644 --- a/eth/fetcher/block_fetcher_test.go +++ b/eth/fetcher/block_fetcher_test.go @@ -35,10 +35,14 @@ import ( ) var ( - testdb = rawdb.NewMemoryDatabase() - testKey, _ = crypto.HexToECDSA("b71c71a67e1177ad4e901695e1b4b9ee17ae16c6668d313eac2f96dbcda3f291") - testAddress = crypto.PubkeyToAddress(testKey.PublicKey) - genesis = core.GenesisBlockForTesting(testdb, testAddress, big.NewInt(1000000000000000)) + testdb = rawdb.NewMemoryDatabase() + testKey, _ = crypto.HexToECDSA("b71c71a67e1177ad4e901695e1b4b9ee17ae16c6668d313eac2f96dbcda3f291") + testAddress = crypto.PubkeyToAddress(testKey.PublicKey) + gspec = core.Genesis{ + Alloc: core.GenesisAlloc{testAddress: {Balance: big.NewInt(1000000000000000)}}, + BaseFee: big.NewInt(params.InitialBaseFee), + } + genesis = gspec.MustCommit(testdb, trie.NewDatabase(testdb, nil)) unknownBlock = types.NewBlock(&types.Header{GasLimit: params.GenesisGasLimit, BaseFee: big.NewInt(params.InitialBaseFee)}, nil, nil, nil, trie.NewStackTrie(nil)) ) diff --git a/eth/filters/filter_system_test.go b/eth/filters/filter_system_test.go index d30c9f9592..9aa34d9a98 100644 --- a/eth/filters/filter_system_test.go +++ b/eth/filters/filter_system_test.go @@ -38,6 +38,7 @@ import ( "github.com/ethereum/go-ethereum/event" "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/rpc" + "github.com/ethereum/go-ethereum/trie" ) var ( @@ -164,7 +165,7 @@ func TestFinalizedBlockSubscription(t *testing.T) { db = rawdb.NewMemoryDatabase() backend = &testBackend{db: db} api = NewPublicFilterAPI(backend, false, deadline) - genesis = (&core.Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db) + genesis = (&core.Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db, trie.NewDatabase(db, nil)) chain, _ = core.GenerateChain(params.TestChainConfig, genesis, ethash.NewFaker(), db, 10, func(i int, gen *core.BlockGen) {}, true) chainEvents = []core.ChainEvent{} ) @@ -225,7 +226,7 @@ func TestBlockSubscription(t *testing.T) { db = rawdb.NewMemoryDatabase() backend = &testBackend{db: db} api = NewPublicFilterAPI(backend, false, deadline) - genesis = (&core.Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db) + genesis = (&core.Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db, trie.NewDatabase(db, nil)) chain, _ = core.GenerateChain(params.TestChainConfig, genesis, ethash.NewFaker(), db, 10, func(i int, gen *core.BlockGen) {}, true) chainEvents = []core.ChainEvent{} ) diff --git a/eth/filters/filter_test.go b/eth/filters/filter_test.go index c451604a4e..4cd9961006 100644 --- a/eth/filters/filter_test.go +++ b/eth/filters/filter_test.go @@ -30,6 +30,7 @@ import ( "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/params" + "github.com/ethereum/go-ethereum/trie" ) func makeReceipt(addr common.Address) *types.Receipt { @@ -56,10 +57,17 @@ func BenchmarkFilters(b *testing.B) { addr2 = common.BytesToAddress([]byte("jeff")) addr3 = common.BytesToAddress([]byte("ethereum")) addr4 = common.BytesToAddress([]byte("random addresses please")) + + gspec = core.Genesis{ + Alloc: core.GenesisAlloc{addr1: {Balance: big.NewInt(1000000)}}, + BaseFee: big.NewInt(params.InitialBaseFee), + } + genesis = gspec.ToBlock() ) defer db.Close() - genesis := core.GenesisBlockForTesting(db, addr1, big.NewInt(1000000)) + gspec.MustCommit(db, trie.NewDatabase(db, nil)) + chain, receipts := core.GenerateChain(params.TestChainConfig, genesis, ethash.NewFaker(), db, 100010, func(i int, gen *core.BlockGen) { switch i { case 2403: @@ -116,10 +124,17 @@ func TestFilters(t *testing.T) { hash2 = common.BytesToHash([]byte("topic2")) hash3 = common.BytesToHash([]byte("topic3")) hash4 = common.BytesToHash([]byte("topic4")) + + gspec = core.Genesis{ + Alloc: core.GenesisAlloc{addr: {Balance: big.NewInt(1000000)}}, + BaseFee: big.NewInt(params.InitialBaseFee), + } + genesis = gspec.ToBlock() ) defer db.Close() - genesis := core.GenesisBlockForTesting(db, addr, big.NewInt(1000000)) + gspec.MustCommit(db, trie.NewDatabase(db, nil)) + chain, receipts := core.GenerateChain(params.TestChainConfig, genesis, ethash.NewFaker(), db, 1000, func(i int, gen *core.BlockGen) { switch i { case 1: diff --git a/eth/gasprice/gasprice_test.go b/eth/gasprice/gasprice_test.go index 7438321e2a..cc3ca5a04e 100644 --- a/eth/gasprice/gasprice_test.go +++ b/eth/gasprice/gasprice_test.go @@ -32,6 +32,7 @@ import ( "github.com/ethereum/go-ethereum/event" "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/rpc" + "github.com/ethereum/go-ethereum/trie" ) const testHead = 32 @@ -110,10 +111,8 @@ func newTestBackend(t *testing.T, londonBlock *big.Int, pending bool) *testBacke config.ArrowGlacierBlock = londonBlock engine := ethash.NewFaker() db := rawdb.NewMemoryDatabase() - genesis, err := gspec.Commit(db) - if err != nil { - t.Fatal(err) - } + genesis := gspec.MustCommit(db, trie.NewDatabase(db, nil)) + // Generate testing blocks blocks, _ := core.GenerateChain(gspec.Config, genesis, engine, db, testHead+1, func(i int, b *core.BlockGen) { b.SetCoinbase(common.Address{1}) @@ -143,8 +142,8 @@ func newTestBackend(t *testing.T, londonBlock *big.Int, pending bool) *testBacke }, true) // Construct testing chain diskdb := rawdb.NewMemoryDatabase() - gspec.Commit(diskdb) - chain, err := core.NewBlockChain(diskdb, &core.CacheConfig{TrieCleanNoPrefetch: true}, &config, engine, vm.Config{}, nil, nil) + gspec.MustCommit(diskdb, trie.NewDatabase(diskdb, nil)) + chain, err := core.NewBlockChain(diskdb, &core.CacheConfig{TrieCleanNoPrefetch: true}, gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to create local chain, %v", err) } diff --git a/eth/handler.go b/eth/handler.go index 29efa79309..0c0e5f276a 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -27,6 +27,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core" "github.com/ethereum/go-ethereum/core/forkid" + "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/txpool" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/core/vote" @@ -44,6 +45,7 @@ import ( "github.com/ethereum/go-ethereum/p2p/enode" "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/triedb/pathdb" "golang.org/x/crypto/sha3" ) @@ -107,9 +109,9 @@ type handler struct { networkID uint64 forkFilter forkid.Filter // Fork ID filter, constant across the lifetime of the node - fastSync uint32 // Flag whether fast sync is enabled (gets disabled if we already have blocks) - snapSync uint32 // Flag whether fast sync should operate on top of the snap protocol - acceptTxs uint32 // Flag whether we're considered synchronised (enables transaction processing) + fastSync uint32 // Flag whether fast sync is enabled (gets disabled if we already have blocks) + snapSync uint32 // Flag whether fast sync should operate on top of the snap protocol + synced uint32 // Flag whether we're considered synchronised (enables transaction processing) checkpointNumber uint64 // Block number for the sync progress validator to cross reference checkpointHash common.Hash // Block hash for the sync progress validator to cross reference @@ -181,17 +183,23 @@ func newHandler(config *handlerConfig) (*handler, error) { fullBlock, fastBlock := h.chain.CurrentBlock(), h.chain.CurrentFastBlock() if fullBlock.NumberU64() == 0 && fastBlock.NumberU64() > 0 { h.fastSync = uint32(1) - log.Warn("Switch sync mode from full sync to fast sync") + log.Warn("Switch sync mode from full sync to fast sync", "reason", "snap sync incomplete") + } else if !h.chain.HasState(fullBlock.Root()) { + h.fastSync = uint32(1) + log.Warn("Switch sync mode from full sync to snap sync", "reason", "head state missing") } } else { - if h.chain.CurrentBlock().NumberU64() > 0 { + head := h.chain.CurrentBlock() + if head.NumberU64() > 0 { // Print warning log if database is not empty to run fast sync. log.Warn("Switch sync mode from fast sync to full sync") } else { // If fast sync was requested and our database is empty, grant it h.fastSync = uint32(1) + log.Info("Enabled fast sync", "head", head.Number, "hash", head.Hash()) if config.Sync == downloader.SnapSync { h.snapSync = uint32(1) + log.Info("Enabled snap sync", "head", head.Number, "hash", head.Hash()) } } } @@ -235,15 +243,11 @@ func newHandler(config *handlerConfig) (*handler, error) { // accept each others' blocks until a restart. Unfortunately we haven't figured // out a way yet where nodes can decide unilaterally whether the network is new // or not. This should be fixed if we figure out a solution. - if atomic.LoadUint32(&h.fastSync) == 1 { - log.Warn("Fast syncing, discarded propagated block", "number", blocks[0].Number(), "hash", blocks[0].Hash()) + if atomic.LoadUint32(&h.synced) == 0 { + log.Warn("Syncing, discarded propagated block", "number", blocks[0].Number(), "hash", blocks[0].Hash()) return 0, nil } - n, err := h.chain.InsertChain(blocks, sidecars) - if err == nil { - atomic.StoreUint32(&h.acceptTxs, 1) // Mark initial sync done on any fetcher import - } - return n, err + return h.chain.InsertChain(blocks, sidecars) } h.blockFetcher = fetcher.NewBlockFetcher(false, nil, h.chain.GetBlockByHash, validator, h.chain.Engine().VerifyBlobHeader, h.BroadcastBlock, heighter, nil, inserter, h.removePeer) @@ -700,3 +704,23 @@ func (h *handler) voteBroadcastLoop() { } } } + +// enableSyncedFeatures enables the post-sync functionalities when the initial +// sync is finished. +func (h *handler) enableSyncedFeatures() { + atomic.StoreUint32(&h.synced, 1) + + // If we were running fast/snap sync and it finished, disable doing another + // round on next sync cycle + if atomic.LoadUint32(&h.fastSync) == 1 { + log.Info("Fast sync complete, auto disabling") + atomic.StoreUint32(&h.fastSync, 0) + } + if atomic.LoadUint32(&h.snapSync) == 1 { + log.Info("Snap sync complete, auto disabling") + atomic.StoreUint32(&h.snapSync, 0) + } + if h.chain.TrieDB().Scheme() == rawdb.PathScheme { + h.chain.TrieDB().SetBufferSize(pathdb.DefaultBufferSize) + } +} diff --git a/eth/handler_eth.go b/eth/handler_eth.go index cfad1fdfa1..6be4a5cd7b 100644 --- a/eth/handler_eth.go +++ b/eth/handler_eth.go @@ -56,7 +56,7 @@ func (h *ethHandler) PeerInfo(id enode.ID) interface{} { // AcceptTxs retrieves whether transaction processing is enabled on the node // or if inbound transactions should simply be dropped. func (h *ethHandler) AcceptTxs() bool { - return atomic.LoadUint32(&h.acceptTxs) == 1 + return atomic.LoadUint32(&h.synced) == 1 } // Handle is invoked from a peer's message handler when it receives a new remote diff --git a/eth/handler_eth_test.go b/eth/handler_eth_test.go index c4f7ca1c8d..0661825ae1 100644 --- a/eth/handler_eth_test.go +++ b/eth/handler_eth_test.go @@ -117,11 +117,11 @@ func testForkIDSplit(t *testing.T, protocol uint) { gspecNoFork = &core.Genesis{Config: configNoFork} gspecProFork = &core.Genesis{Config: configProFork} - genesisNoFork = gspecNoFork.MustCommit(dbNoFork) - genesisProFork = gspecProFork.MustCommit(doFork) + genesisNoFork = gspecNoFork.MustCommit(dbNoFork, trie.NewDatabase(dbNoFork, nil)) + genesisProFork = gspecProFork.MustCommit(doFork, trie.NewDatabase(doFork, nil)) - chainNoFork, _ = core.NewBlockChain(dbNoFork, nil, configNoFork, engine, vm.Config{}, nil, nil) - chainProFork, _ = core.NewBlockChain(doFork, nil, configProFork, engine, vm.Config{}, nil, nil) + chainNoFork, _ = core.NewBlockChain(dbNoFork, nil, gspecNoFork, nil, engine, vm.Config{}, nil, nil) + chainProFork, _ = core.NewBlockChain(doFork, nil, gspecProFork, nil, engine, vm.Config{}, nil, nil) blocksNoFork, _ = core.GenerateChain(configNoFork, genesisNoFork, engine, dbNoFork, 2, nil, true) blocksProFork, _ = core.GenerateChain(configProFork, genesisProFork, engine, doFork, 2, nil, true) @@ -259,7 +259,7 @@ func testRecvTransactions(t *testing.T, protocol uint) { handler := newTestHandler() defer handler.close() - handler.handler.acceptTxs = 1 // mark synced to accept transactions + handler.handler.synced = 1 // mark synced to accept transactions txs := make(chan core.NewTxsEvent) sub := handler.txpool.SubscribeTransactions(txs, false) @@ -408,7 +408,7 @@ func testTransactionPropagation(t *testing.T, protocol uint) { sinks[i] = newTestHandler() defer sinks[i].close() - sinks[i].handler.acceptTxs = 1 // mark synced to accept transactions + sinks[i].handler.synced = 1 // mark synced to accept transactions } // Interconnect all the sink handlers with the source handler for i, sink := range sinks { diff --git a/eth/handler_test.go b/eth/handler_test.go index b9f6e3928c..58dc896a37 100644 --- a/eth/handler_test.go +++ b/eth/handler_test.go @@ -37,6 +37,7 @@ import ( "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/event" "github.com/ethereum/go-ethereum/params" + "github.com/ethereum/go-ethereum/trie" "github.com/holiman/uint256" ) @@ -143,12 +144,13 @@ func newTestHandler() *testHandler { func newTestHandlerWithBlocks(blocks int) *testHandler { // Create a database pre-initialize with a genesis block db := rawdb.NewMemoryDatabase() - (&core.Genesis{ + gspec := &core.Genesis{ Config: params.TestChainConfig, Alloc: core.GenesisAlloc{testAddr: {Balance: big.NewInt(1000000)}}, - }).MustCommit(db) + } + gspec.MustCommit(db, trie.NewDatabase(db, nil)) - chain, _ := core.NewBlockChain(db, nil, params.TestChainConfig, ethash.NewFaker(), vm.Config{}, nil, nil) + chain, _ := core.NewBlockChain(db, nil, gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) bs, _ := core.GenerateChain(params.TestChainConfig, chain.Genesis(), ethash.NewFaker(), db, blocks, nil, true) if _, err := chain.InsertChain(bs, nil); err != nil { @@ -221,8 +223,8 @@ func newTestHandlerWithBlocks100(blocks int) (*testHandler, []*types.BlobTxSidec }, }, } - gspec.MustCommit(db) - chain, err := core.NewBlockChain(db, nil, &chainConfig, engine, vm.Config{}, nil, nil) + gspec.MustCommit(db, trie.NewDatabase(db, nil)) + chain, err := core.NewBlockChain(db, nil, gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { panic(err) } diff --git a/eth/protocols/eth/handler_test.go b/eth/protocols/eth/handler_test.go index d6cc356587..56f465774e 100644 --- a/eth/protocols/eth/handler_test.go +++ b/eth/protocols/eth/handler_test.go @@ -66,12 +66,13 @@ func newTestBackend(blocks int) *testBackend { func newTestBackendWithGenerator(blocks int, generator func(int, *core.BlockGen)) *testBackend { // Create a database pre-initialize with a genesis block db := rawdb.NewMemoryDatabase() - (&core.Genesis{ + gspec := &core.Genesis{ Config: params.TestChainConfig, Alloc: core.GenesisAlloc{testAddr: {Balance: big.NewInt(100_000_000_000_000_000)}}, - }).MustCommit(db) + } + gspec.MustCommit(db, trie.NewDatabase(db, nil)) - chain, _ := core.NewBlockChain(db, nil, params.TestChainConfig, ethash.NewFaker(), vm.Config{}, nil, nil) + chain, _ := core.NewBlockChain(db, nil, gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) bs, _ := core.GenerateChain(params.TestChainConfig, chain.Genesis(), ethash.NewFaker(), db, blocks, generator, true) if _, err := chain.InsertChain(bs, nil); err != nil { @@ -501,7 +502,7 @@ func testGetNodeData(t *testing.T, protocol uint, drop bool) { // Reconstruct state tree from the received data. reconstructDB := rawdb.NewMemoryDatabase() for i := 0; i < len(data); i++ { - rawdb.WriteTrieNode(reconstructDB, hashes[i], data[i]) + rawdb.WriteLegacyTrieNode(reconstructDB, hashes[i], data[i]) } // Sanity check whether all state matches. diff --git a/eth/protocols/eth/handlers.go b/eth/protocols/eth/handlers.go index 8444f56be3..95743577fb 100644 --- a/eth/protocols/eth/handlers.go +++ b/eth/protocols/eth/handlers.go @@ -21,6 +21,7 @@ import ( "fmt" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rlp" @@ -202,6 +203,10 @@ func handleGetNodeData66(backend Backend, msg Decoder, peer *Peer) error { } func answerGetNodeDataQuery(backend Backend, query GetNodeDataPacket, peer *Peer) [][]byte { + // Request nodes by hash is not supported in path-based scheme. + if backend.Chain().TrieDB().Scheme() == rawdb.PathScheme { + return nil + } // Gather state data until the fetch or network limits is reached var ( bytes int @@ -217,7 +222,7 @@ func answerGetNodeDataQuery(backend Backend, query GetNodeDataPacket, peer *Peer // Only lookup the trie node if there's chance that we actually have it continue } - entry, err := backend.Chain().TrieNode(hash) + entry, err := backend.Chain().TrieDB().Node(hash) if len(entry) == 0 || err != nil { // Read the contract code with prefix only to save unnecessary lookups. entry, err = backend.Chain().ContractCodeWithPrefix(hash) diff --git a/eth/protocols/snap/handler.go b/eth/protocols/snap/handler.go index c62f9cfca5..0cc6f0d1de 100644 --- a/eth/protocols/snap/handler.go +++ b/eth/protocols/snap/handler.go @@ -165,7 +165,7 @@ func handleMessage(backend Backend, peer *Peer) error { req.Bytes = softResponseLimit } // Retrieve the requested state and bail out if non existent - tr, err := trie.New(req.Root, backend.Chain().StateCache().TrieDB()) + tr, err := trie.New(trie.StateTrieID(req.Root), backend.Chain().TrieDB()) if err != nil { return p2p.Send(peer.rw, AccountRangeMsg, &AccountRangePacket{ID: req.ID}) } @@ -315,7 +315,7 @@ func handleMessage(backend Backend, peer *Peer) error { if origin != (common.Hash{}) || abort { // Request started at a non-zero hash or was capped prematurely, add // the endpoint Merkle proofs - accTrie, err := trie.New(req.Root, backend.Chain().StateCache().TrieDB()) + accTrie, err := trie.New(trie.StateTrieID(req.Root), backend.Chain().TrieDB()) if err != nil { return p2p.Send(peer.rw, StorageRangesMsg, &StorageRangesPacket{ID: req.ID}) } @@ -323,7 +323,7 @@ func handleMessage(backend Backend, peer *Peer) error { if err := rlp.DecodeBytes(accTrie.Get(account[:]), &acc); err != nil { return p2p.Send(peer.rw, StorageRangesMsg, &StorageRangesPacket{ID: req.ID}) } - stTrie, err := trie.New(acc.Root, backend.Chain().StateCache().TrieDB()) + stTrie, err := trie.New(trie.StorageTrieID(req.Root, account, acc.Root), backend.Chain().TrieDB()) if err != nil { return p2p.Send(peer.rw, StorageRangesMsg, &StorageRangesPacket{ID: req.ID}) } @@ -428,9 +428,9 @@ func handleMessage(backend Backend, peer *Peer) error { req.Bytes = softResponseLimit } // Make sure we have the state associated with the request - triedb := backend.Chain().StateCache().TrieDB() + triedb := backend.Chain().TrieDB() - accTrie, err := trie.NewSecure(req.Root, triedb) + accTrie, err := trie.NewSecure(trie.StateTrieID(req.Root), triedb) if err != nil { // We don't have the requested state available, bail out return p2p.Send(peer.rw, TrieNodesMsg, &TrieNodesPacket{ID: req.ID}) @@ -472,7 +472,7 @@ func handleMessage(backend Backend, peer *Peer) error { if err != nil || account == nil { break } - stTrie, err := trie.NewSecure(common.BytesToHash(account.Root), triedb) + stTrie, err := trie.NewSecure(trie.StorageTrieID(req.Root, common.BytesToHash(pathset[0]), common.BytesToHash(account.Root)), triedb) loads++ // always account database reads, even for failures if err != nil { break diff --git a/eth/protocols/snap/metrics.go b/eth/protocols/snap/metrics.go index a8ea143b54..ffc9a6a5f3 100644 --- a/eth/protocols/snap/metrics.go +++ b/eth/protocols/snap/metrics.go @@ -8,4 +8,32 @@ var ( IngressRegistrationErrorMeter = metrics.NewRegisteredMeter(ingressRegistrationErrorName, nil) EgressRegistrationErrorMeter = metrics.NewRegisteredMeter(egressRegistrationErrorName, nil) + + // deletionGauge is the metric to track how many trie node deletions + // are performed in total during the sync process. + deletionGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete", nil) + + // lookupGauge is the metric to track how many trie node lookups are + // performed to determine if node needs to be deleted. + lookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/lookup", nil) + + // boundaryAccountNodesGauge is the metric to track how many boundary trie + // nodes in account trie are met. + boundaryAccountNodesGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/boundary/account", nil) + + // boundaryAccountNodesGauge is the metric to track how many boundary trie + // nodes in storage tries are met. + boundaryStorageNodesGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/boundary/storage", nil) + + // smallStorageGauge is the metric to track how many storages are small enough + // to retrieved in one or two request. + smallStorageGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/small", nil) + + // largeStorageGauge is the metric to track how many storages are large enough + // to retrieved concurrently. + largeStorageGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/large", nil) + + // skipStorageHealingGauge is the metric to track how many storages are retrieved + // in multiple requests but healing is not necessary. + skipStorageHealingGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/noheal", nil) ) diff --git a/eth/protocols/snap/protocol.go b/eth/protocols/snap/protocol.go index 5528e9212e..57deb54772 100644 --- a/eth/protocols/snap/protocol.go +++ b/eth/protocols/snap/protocol.go @@ -21,7 +21,7 @@ import ( "fmt" "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/core/state/snapshot" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/rlp" ) @@ -104,7 +104,7 @@ func (p *AccountRangePacket) Unpack() ([]common.Hash, [][]byte, error) { accounts = make([][]byte, len(p.Accounts)) ) for i, acc := range p.Accounts { - val, err := snapshot.FullAccountRLP(acc.Body) + val, err := types.FullAccountRLP(acc.Body) if err != nil { return nil, nil, fmt.Errorf("invalid account %x: %v", acc.Body, err) } diff --git a/eth/protocols/snap/sort_test.go b/eth/protocols/snap/sort_test.go index c625be09ea..be0a8c5706 100644 --- a/eth/protocols/snap/sort_test.go +++ b/eth/protocols/snap/sort_test.go @@ -22,7 +22,6 @@ import ( "testing" "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/trie" ) func hexToNibbles(s string) []byte { @@ -38,22 +37,17 @@ func hexToNibbles(s string) []byte { } func TestRequestSorting(t *testing.T) { - // - Path 0x9 -> {0x19} // - Path 0x99 -> {0x0099} // - Path 0x01234567890123456789012345678901012345678901234567890123456789019 -> {0x0123456789012345678901234567890101234567890123456789012345678901, 0x19} // - Path 0x012345678901234567890123456789010123456789012345678901234567890199 -> {0x0123456789012345678901234567890101234567890123456789012345678901, 0x0099} - var f = func(path string) (trie.SyncPath, TrieNodePathSet, common.Hash) { + var f = func(path string) string { data := hexToNibbles(path) - sp := trie.NewSyncPath(data) - tnps := TrieNodePathSet([][]byte(sp)) - hash := common.Hash{} - return sp, tnps, hash + return string(data) } var ( - hashes []common.Hash - paths []trie.SyncPath - pathsets []TrieNodePathSet + hashes []common.Hash + paths []string ) for _, x := range []string{ "0x9", @@ -67,16 +61,14 @@ func TestRequestSorting(t *testing.T) { "0x01234567890123456789012345678901012345678901234567890123456789010", "0x01234567890123456789012345678901012345678901234567890123456789011", } { - sp, tnps, hash := f(x) - hashes = append(hashes, hash) - paths = append(paths, sp) - pathsets = append(pathsets, tnps) + paths = append(paths, f(x)) + hashes = append(hashes, common.Hash{}) } - _, paths, pathsets = sortByAccountPath(hashes, paths) + _, _, syncPaths, pathsets := sortByAccountPath(paths, hashes) { var b = new(bytes.Buffer) - for i := 0; i < len(paths); i++ { - fmt.Fprintf(b, "\n%d. paths %x", i, paths[i]) + for i := 0; i < len(syncPaths); i++ { + fmt.Fprintf(b, "\n%d. paths %x", i, syncPaths[i]) } want := ` 0. paths [0099] diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 9e82682fb9..e1d9406f21 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -33,7 +33,6 @@ import ( "github.com/ethereum/go-ethereum/common/math" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/state" - "github.com/ethereum/go-ethereum/core/state/snapshot" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb" @@ -255,8 +254,8 @@ type trienodeHealRequest struct { timeout *time.Timer // Timer to track delivery timeout stale chan struct{} // Channel to signal the request was dropped - hashes []common.Hash // Trie node hashes to validate responses - paths []trie.SyncPath // Trie node paths requested for rescheduling + paths []string // Trie node paths for identifying trie node + hashes []common.Hash // Trie node hashes to validate responses task *healTask // Task which this request is filling (only access fields through the runloop!!) } @@ -265,9 +264,9 @@ type trienodeHealRequest struct { type trienodeHealResponse struct { task *healTask // Task which this request is filling - hashes []common.Hash // Hashes of the trie nodes to avoid double hashing - paths []trie.SyncPath // Trie node paths requested for rescheduling missing ones - nodes [][]byte // Actual trie nodes to store into the database (nil = missing) + paths []string // Paths of the trie nodes + hashes []common.Hash // Hashes of the trie nodes to avoid double hashing + nodes [][]byte // Actual trie nodes to store into the database (nil = missing) } // bytecodeHealRequest tracks a pending bytecode request to ensure responses are to @@ -346,8 +345,8 @@ type storageTask struct { type healTask struct { scheduler *trie.Sync // State trie sync scheduler defining the tasks - trieTasks map[common.Hash]trie.SyncPath // Set of trie node tasks currently queued for retrieval - codeTasks map[common.Hash]struct{} // Set of byte code tasks currently queued for retrieval + trieTasks map[string]common.Hash // Set of trie node tasks currently queued for retrieval, indexed by node path + codeTasks map[common.Hash]struct{} // Set of byte code tasks currently queued for retrieval, indexed by code hash } // syncProgress is a database entry to allow suspending and resuming a snapshot state @@ -414,7 +413,8 @@ type SyncPeer interface { // - The peer delivers a stale response after a previous timeout // - The peer delivers a refusal to serve the requested state type Syncer struct { - db ethdb.KeyValueStore // Database to store the trie nodes into (and dedup) + db ethdb.KeyValueStore // Database to store the trie nodes into (and dedup) + scheme string // Node scheme used in node database root common.Hash // Current state trie root being synced tasks []*accountTask // Current account task set being synced @@ -480,10 +480,10 @@ type Syncer struct { // NewSyncer creates a new snapshot syncer to download the Ethereum state over the // snap protocol. -func NewSyncer(db ethdb.KeyValueStore) *Syncer { +func NewSyncer(db ethdb.KeyValueStore, scheme string) *Syncer { return &Syncer{ - db: db, - + db: db, + scheme: scheme, peers: make(map[string]SyncPeer), peerJoin: new(event.Feed), peerDrop: new(event.Feed), @@ -564,7 +564,7 @@ func (s *Syncer) Unregister(id string) error { return nil } -// Sync starts (or resumes a previous) sync cycle to iterate over an state trie +// Sync starts (or resumes a previous) sync cycle to iterate over a state trie // with the given root and reconstruct the nodes based on the snapshot leaves. // Previously downloaded segments will not be redownloaded of fixed, rather any // errors will be healed after the leaves are fully accumulated. @@ -574,8 +574,8 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error { s.lock.Lock() s.root = root s.healer = &healTask{ - scheduler: state.NewStateSync(root, s.db, nil, s.onHealState), - trieTasks: make(map[common.Hash]trie.SyncPath), + scheduler: state.NewStateSync(root, s.db, nil, s.onHealState, s.scheme), + trieTasks: make(map[string]common.Hash), codeTasks: make(map[common.Hash]struct{}), } s.statelessPeers = make(map[string]struct{}) @@ -699,6 +699,19 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error { } } +// cleanPath is used to remove the dangling nodes in the stackTrie. +func (s *Syncer) cleanPath(batch ethdb.Batch, owner common.Hash, path []byte) { + if owner == (common.Hash{}) && rawdb.ExistsAccountTrieNode(s.db, path) { + rawdb.DeleteAccountTrieNode(batch, path) + deletionGauge.Inc(1) + } + if owner != (common.Hash{}) && rawdb.ExistsStorageTrieNode(s.db, owner, path) { + rawdb.DeleteStorageTrieNode(batch, owner, path) + deletionGauge.Inc(1) + } + lookupGauge.Inc(1) +} + // loadSyncStatus retrieves a previously aborted sync status from the database, // or generates a fresh one if none is available. func (s *Syncer) loadSyncStatus() { @@ -713,23 +726,59 @@ func (s *Syncer) loadSyncStatus() { } s.tasks = progress.Tasks for _, task := range s.tasks { + task := task // closure for task.genBatch in the stacktrie writer callback + task.genBatch = ethdb.HookedBatch{ Batch: s.db.NewBatch(), OnPut: func(key []byte, value []byte) { s.accountBytes += common.StorageSize(len(key) + len(value)) }, } - task.genTrie = trie.NewStackTrie(task.genBatch) + options := trie.NewStackTrieOptions() + options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + rawdb.WriteTrieNode(task.genBatch, common.Hash{}, path, hash, blob, s.scheme) + }) + if s.scheme == rawdb.PathScheme { + // Configure the dangling node cleaner and also filter out boundary nodes + // only in the context of the path scheme. Deletion is forbidden in the + // hash scheme, as it can disrupt state completeness. + options = options.WithCleaner(func(path []byte) { + s.cleanPath(task.genBatch, common.Hash{}, path) + }) + // Skip the left boundary if it's not the first range. + // Skip the right boundary if it's not the last range. + options = options.WithSkipBoundary(task.Next != (common.Hash{}), task.Last != common.MaxHash, boundaryAccountNodesGauge) + } + task.genTrie = trie.NewStackTrie(options) - for _, subtasks := range task.SubTasks { + for accountHash, subtasks := range task.SubTasks { for _, subtask := range subtasks { + subtask := subtask // closure for subtask.genBatch in the stacktrie writer callback + subtask.genBatch = ethdb.HookedBatch{ Batch: s.db.NewBatch(), OnPut: func(key []byte, value []byte) { s.storageBytes += common.StorageSize(len(key) + len(value)) }, } - subtask.genTrie = trie.NewStackTrie(subtask.genBatch) + owner := accountHash // local assignment for stacktrie writer closure + options := trie.NewStackTrieOptions() + + options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + rawdb.WriteTrieNode(subtask.genBatch, owner, path, hash, blob, s.scheme) + }) + if s.scheme == rawdb.PathScheme { + // Configure the dangling node cleaner and also filter out boundary nodes + // only in the context of the path scheme. Deletion is forbidden in the + // hash scheme, as it can disrupt state completeness. + options = options.WithCleaner(func(path []byte) { + s.cleanPath(subtask.genBatch, owner, path) + }) + // Skip the left boundary if it's not the first range. + // Skip the right boundary if it's not the last range. + options = options.WithSkipBoundary(subtask.Next != common.Hash{}, subtask.Last != common.MaxHash, boundaryStorageNodesGauge) + } + subtask.genTrie = trie.NewStackTrie(options) } } } @@ -749,7 +798,7 @@ func (s *Syncer) loadSyncStatus() { return } } - // Either we've failed to decode the previus state, or there was none. + // Either we've failed to decode the previous state, or there was none. // Start a fresh sync by chunking up the account range and scheduling // them for retrieval. s.tasks = nil @@ -778,12 +827,27 @@ func (s *Syncer) loadSyncStatus() { s.accountBytes += common.StorageSize(len(key) + len(value)) }, } + options := trie.NewStackTrieOptions() + options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + rawdb.WriteTrieNode(batch, common.Hash{}, path, hash, blob, s.scheme) + }) + if s.scheme == rawdb.PathScheme { + // Configure the dangling node cleaner and also filter out boundary nodes + // only in the context of the path scheme. Deletion is forbidden in the + // hash scheme, as it can disrupt state completeness. + options = options.WithCleaner(func(path []byte) { + s.cleanPath(batch, common.Hash{}, path) + }) + // Skip the left boundary if it's not the first range. + // Skip the right boundary if it's not the last range. + options = options.WithSkipBoundary(next != common.Hash{}, last != common.MaxHash, boundaryAccountNodesGauge) + } s.tasks = append(s.tasks, &accountTask{ Next: next, Last: last, SubTasks: make(map[common.Hash][]*storageTask), genBatch: batch, - genTrie: trie.NewStackTrie(batch), + genTrie: trie.NewStackTrie(options), }) log.Debug("Created account sync task", "from", next, "last", last) next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1)) @@ -1274,9 +1338,9 @@ func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fai want = maxTrieRequestCount + maxCodeRequestCount ) if have < want { - nodes, paths, codes := s.healer.scheduler.Missing(want - have) - for i, hash := range nodes { - s.healer.trieTasks[hash] = paths[i] + paths, hashes, codes := s.healer.scheduler.Missing(want - have) + for i, path := range paths { + s.healer.trieTasks[path] = hashes[i] } for _, hash := range codes { s.healer.codeTasks[hash] = struct{}{} @@ -1321,21 +1385,20 @@ func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fai } var ( hashes = make([]common.Hash, 0, cap) - paths = make([]trie.SyncPath, 0, cap) + paths = make([]string, 0, cap) pathsets = make([]TrieNodePathSet, 0, cap) ) - for hash, pathset := range s.healer.trieTasks { - delete(s.healer.trieTasks, hash) + for path, hash := range s.healer.trieTasks { + delete(s.healer.trieTasks, path) + paths = append(paths, path) hashes = append(hashes, hash) - paths = append(paths, pathset) - - if len(hashes) >= cap { + if len(paths) >= cap { break } } // Group requests by account hash - hashes, paths, pathsets = sortByAccountPath(hashes, paths) + paths, hashes, _, pathsets = sortByAccountPath(paths, hashes) req := &trienodeHealRequest{ peer: idle, id: reqid, @@ -1344,8 +1407,8 @@ func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fai revert: fail, cancel: cancel, stale: make(chan struct{}), - hashes: hashes, paths: paths, + hashes: hashes, task: s.healer, } req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() { @@ -1403,9 +1466,9 @@ func (s *Syncer) assignBytecodeHealTasks(success chan *bytecodeHealResponse, fai want = maxTrieRequestCount + maxCodeRequestCount ) if have < want { - nodes, paths, codes := s.healer.scheduler.Missing(want - have) - for i, hash := range nodes { - s.healer.trieTasks[hash] = paths[i] + paths, hashes, codes := s.healer.scheduler.Missing(want - have) + for i, path := range paths { + s.healer.trieTasks[path] = hashes[i] } for _, hash := range codes { s.healer.codeTasks[hash] = struct{}{} @@ -1701,10 +1764,10 @@ func (s *Syncer) revertTrienodeHealRequest(req *trienodeHealRequest) { s.lock.Unlock() // If there's a timeout timer still running, abort it and mark the trie node - // retrievals as not-pending, ready for resheduling + // retrievals as not-pending, ready for rescheduling req.timeout.Stop() - for i, hash := range req.hashes { - req.task.trieTasks[hash] = req.paths[i] + for i, path := range req.paths { + req.task.trieTasks[path] = req.hashes[i] } } @@ -1797,7 +1860,7 @@ func (s *Syncer) processAccountResponse(res *accountResponse) { } // Check if the account is a contract with an unknown storage trie if account.Root != emptyRoot { - if node, err := s.db.Get(account.Root[:]); err != nil || node == nil { + if !rawdb.HasTrieNode(s.db, res.hashes[i], nil, account.Root, s.scheme) { // If there was a previous large state retrieval in progress, // don't restart it from scratch. This happens if a sync cycle // is interrupted and resumed later. However, *do* update the @@ -1921,6 +1984,7 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { if res.subTask == nil && res.mainTask.needState[j] && (i < len(res.hashes)-1 || !res.cont) { res.mainTask.needState[j] = false res.mainTask.pend-- + smallStorageGauge.Inc(1) } // If the last contract was chunked, mark it as needing healing // to avoid writing it out to disk prematurely. @@ -1956,7 +2020,11 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { log.Debug("Chunked large contract", "initiators", len(keys), "tail", lastKey, "chunks", chunks) } r := newHashRange(lastKey, chunks) - + if chunks == 1 { + smallStorageGauge.Inc(1) + } else { + largeStorageGauge.Inc(1) + } // Our first task is the one that was just filled by this response. batch := ethdb.HookedBatch{ Batch: s.db.NewBatch(), @@ -1964,12 +2032,25 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { s.storageBytes += common.StorageSize(len(key) + len(value)) }, } + owner := account // local assignment for stacktrie writer closure + options := trie.NewStackTrieOptions() + options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + rawdb.WriteTrieNode(batch, owner, path, hash, blob, s.scheme) + }) + if s.scheme == rawdb.PathScheme { + options = options.WithCleaner(func(path []byte) { + s.cleanPath(batch, owner, path) + }) + // Keep the left boundary as it's the first range. + // Skip the right boundary if it's not the last range. + options = options.WithSkipBoundary(false, r.End() != common.MaxHash, boundaryStorageNodesGauge) + } tasks = append(tasks, &storageTask{ Next: common.Hash{}, Last: r.End(), root: acc.Root, genBatch: batch, - genTrie: trie.NewStackTrie(batch), + genTrie: trie.NewStackTrie(options), }) for r.Next() { batch := ethdb.HookedBatch{ @@ -1978,12 +2059,27 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { s.storageBytes += common.StorageSize(len(key) + len(value)) }, } + options := trie.NewStackTrieOptions() + options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + rawdb.WriteTrieNode(batch, owner, path, hash, blob, s.scheme) + }) + if s.scheme == rawdb.PathScheme { + // Configure the dangling node cleaner and also filter out boundary nodes + // only in the context of the path scheme. Deletion is forbidden in the + // hash scheme, as it can disrupt state completeness. + options = options.WithCleaner(func(path []byte) { + s.cleanPath(batch, owner, path) + }) + // Skip the left boundary as it's not the first range + // Skip the right boundary if it's not the last range. + options = options.WithSkipBoundary(true, r.End() != common.MaxHash, boundaryStorageNodesGauge) + } tasks = append(tasks, &storageTask{ Next: r.Start(), Last: r.End(), root: acc.Root, genBatch: batch, - genTrie: trie.NewStackTrie(batch), + genTrie: trie.NewStackTrie(options), }) } for _, task := range tasks { @@ -2028,7 +2124,23 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { slots += len(res.hashes[i]) if i < len(res.hashes)-1 || res.subTask == nil { - tr := trie.NewStackTrie(batch) + // no need to make local reassignment of account: this closure does not outlive the loop + options := trie.NewStackTrieOptions() + options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + rawdb.WriteTrieNode(batch, account, path, hash, blob, s.scheme) + }) + if s.scheme == rawdb.PathScheme { + // Configure the dangling node cleaner only in the context of the + // path scheme. Deletion is forbidden in the hash scheme, as it can + // disrupt state completeness. + // + // Notably, boundary nodes can be also kept because the whole storage + // trie is complete. + options = options.WithCleaner(func(path []byte) { + s.cleanPath(batch, account, path) + }) + } + tr := trie.NewStackTrie(options) for j := 0; j < len(res.hashes[i]); j++ { tr.Update(res.hashes[i][j][:], res.slots[i][j]) } @@ -2050,18 +2162,25 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { // Large contracts could have generated new trie nodes, flush them to disk if res.subTask != nil { if res.subTask.done { - if root, err := res.subTask.genTrie.Commit(); err != nil { - log.Error("Failed to commit stack slots", "err", err) - } else if root == res.subTask.root { - // If the chunk's root is an overflown but full delivery, clear the heal request + root := res.subTask.genTrie.Commit() + if err := res.subTask.genBatch.Write(); err != nil { + log.Error("Failed to persist stack slots", "err", err) + } + res.subTask.genBatch.Reset() + + // If the chunk's root is an overflown but full delivery, + // clear the heal request. + accountHash := res.accounts[len(res.accounts)-1] + if root == res.subTask.root && rawdb.HasStorageTrieNode(s.db, accountHash, nil, root) { for i, account := range res.mainTask.res.hashes { - if account == res.accounts[len(res.accounts)-1] { + if account == accountHash { res.mainTask.needHeal[i] = false + skipStorageHealingGauge.Inc(1) } } } } - if res.subTask.genBatch.ValueSize() > ethdb.IdealBatchSize || res.subTask.done { + if res.subTask.genBatch.ValueSize() > ethdb.IdealBatchSize { if err := res.subTask.genBatch.Write(); err != nil { log.Error("Failed to persist stack slots", "err", err) } @@ -2098,7 +2217,7 @@ func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) { // If the trie node was not delivered, reschedule it if node == nil { - res.task.trieTasks[hash] = res.paths[i] + res.task.trieTasks[res.paths[i]] = res.hashes[i] continue } fills++ @@ -2107,7 +2226,7 @@ func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) { s.trienodeHealSynced++ s.trienodeHealBytes += common.StorageSize(len(node)) - err := s.healer.scheduler.Process(trie.SyncResult{Hash: hash, Data: node}) + err := s.healer.scheduler.ProcessNode(trie.NodeSyncResult{Path: res.paths[i], Data: node}) switch err { case nil: case trie.ErrAlreadyProcessed: @@ -2187,7 +2306,7 @@ func (s *Syncer) processBytecodeHealResponse(res *bytecodeHealResponse) { s.bytecodeHealSynced++ s.bytecodeHealBytes += common.StorageSize(len(node)) - err := s.healer.scheduler.Process(trie.SyncResult{Hash: hash, Data: node}) + err := s.healer.scheduler.ProcessCode(trie.CodeSyncResult{Hash: hash, Data: node}) switch err { case nil: case trie.ErrAlreadyProcessed: @@ -2234,13 +2353,13 @@ func (s *Syncer) forwardAccountTask(task *accountTask) { if task.needCode[i] || task.needState[i] { break } - slim := snapshot.SlimAccountRLP(res.accounts[i].Nonce, res.accounts[i].Balance, res.accounts[i].Root, res.accounts[i].CodeHash) + slim := types.SlimAccountRLP(*res.accounts[i]) rawdb.WriteAccountSnapshot(batch, hash, slim) // If the task is complete, drop it into the stack trie to generate // account trie nodes for it if !task.needHeal[i] { - full, err := snapshot.FullAccountRLP(slim) // TODO(karalabe): Slim parsing can be omitted + full, err := types.FullAccountRLP(slim) // TODO(karalabe): Slim parsing can be omitted if err != nil { panic(err) // Really shouldn't ever happen } @@ -2268,9 +2387,7 @@ func (s *Syncer) forwardAccountTask(task *accountTask) { // flush after finalizing task.done. It's fine even if we crash and lose this // write as it will only cause more data to be downloaded during heal. if task.done { - if _, err := task.genTrie.Commit(); err != nil { - log.Error("Failed to commit stack account", "err", err) - } + task.genTrie.Commit() } if task.genBatch.ValueSize() > ethdb.IdealBatchSize || task.done { if err := task.genBatch.Write(); err != nil { @@ -2738,9 +2855,9 @@ func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error atomic.AddUint64(&s.trienodeHealPend, ^(fills - 1)) }() response := &trienodeHealResponse{ + paths: req.paths, task: req.task, hashes: req.hashes, - paths: req.paths, nodes: nodes, } select { @@ -2859,7 +2976,7 @@ func (s *Syncer) onHealState(paths [][]byte, value []byte) error { if err := rlp.DecodeBytes(value, &account); err != nil { return nil } - blob := snapshot.SlimAccountRLP(account.Nonce, account.Balance, account.Root, account.CodeHash) + blob := types.SlimAccountRLP(account) rawdb.WriteAccountSnapshot(s.stateWriter, common.BytesToHash(paths[0]), blob) s.accountHealed += 1 s.accountHealedBytes += common.StorageSize(1 + common.HashLength + len(blob)) @@ -2986,8 +3103,9 @@ func (s *capacitySort) Swap(i, j int) { // healRequestSort implements the Sort interface, allowing sorting trienode // heal requests, which is a prerequisite for merging storage-requests. type healRequestSort struct { - hashes []common.Hash - paths []trie.SyncPath + paths []string + hashes []common.Hash + syncPaths []trie.SyncPath } func (t *healRequestSort) Len() int { @@ -2995,8 +3113,8 @@ func (t *healRequestSort) Len() int { } func (t *healRequestSort) Less(i, j int) bool { - a := t.paths[i] - b := t.paths[j] + a := t.syncPaths[i] + b := t.syncPaths[j] switch bytes.Compare(a[0], b[0]) { case -1: return true @@ -3017,8 +3135,9 @@ func (t *healRequestSort) Less(i, j int) bool { } func (t *healRequestSort) Swap(i, j int) { - t.hashes[i], t.hashes[j] = t.hashes[j], t.hashes[i] t.paths[i], t.paths[j] = t.paths[j], t.paths[i] + t.hashes[i], t.hashes[j] = t.hashes[j], t.hashes[i] + t.syncPaths[i], t.syncPaths[j] = t.syncPaths[j], t.syncPaths[i] } // Merge merges the pathsets, so that several storage requests concerning the @@ -3026,7 +3145,7 @@ func (t *healRequestSort) Swap(i, j int) { // OBS: This operation is moot if t has not first been sorted. func (t *healRequestSort) Merge() []TrieNodePathSet { var result []TrieNodePathSet - for _, path := range t.paths { + for _, path := range t.syncPaths { pathset := TrieNodePathSet([][]byte(path)) if len(path) == 1 { // It's an account reference. @@ -3035,7 +3154,7 @@ func (t *healRequestSort) Merge() []TrieNodePathSet { // It's a storage reference. end := len(result) - 1 if len(result) == 0 || !bytes.Equal(pathset[0], result[end][0]) { - // The account doesn't doesn't match last, create a new entry. + // The account doesn't match last, create a new entry. result = append(result, pathset) } else { // It's the same account as the previous one, add to the storage @@ -3049,9 +3168,13 @@ func (t *healRequestSort) Merge() []TrieNodePathSet { // sortByAccountPath takes hashes and paths, and sorts them. After that, it generates // the TrieNodePaths and merges paths which belongs to the same account path. -func sortByAccountPath(hashes []common.Hash, paths []trie.SyncPath) ([]common.Hash, []trie.SyncPath, []TrieNodePathSet) { - n := &healRequestSort{hashes, paths} +func sortByAccountPath(paths []string, hashes []common.Hash) ([]string, []common.Hash, []trie.SyncPath, []TrieNodePathSet) { + var syncPaths []trie.SyncPath + for _, path := range paths { + syncPaths = append(syncPaths, trie.NewSyncPath([]byte(path))) + } + n := &healRequestSort{paths, hashes, syncPaths} sort.Sort(n) pathsets := n.Merge() - return n.hashes, n.paths, pathsets + return n.paths, n.hashes, n.syncPaths, pathsets } diff --git a/eth/protocols/snap/sync_test.go b/eth/protocols/snap/sync_test.go index 47ab1f026d..2ee04c7d29 100644 --- a/eth/protocols/snap/sync_test.go +++ b/eth/protocols/snap/sync_test.go @@ -36,6 +36,8 @@ import ( "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/triedb/pathdb" + "github.com/ethereum/go-ethereum/trie/trienode" "golang.org/x/crypto/sha3" ) @@ -159,6 +161,13 @@ func newTestPeer(id string, t *testing.T, term func()) *testPeer { return peer } +func (t *testPeer) setStorageTries(tries map[common.Hash]*trie.Trie) { + t.storageTries = make(map[common.Hash]*trie.Trie) + for root, trie := range tries { + t.storageTries[root] = trie.Copy() + } +} + func (t *testPeer) ID() string { return t.id } func (t *testPeer) Log() log.Logger { return t.logger } @@ -367,7 +376,8 @@ func createStorageRequestResponse(t *testPeer, root common.Hash, accounts []comm return hashes, slots, proofs } -// the createStorageRequestResponseAlwaysProve tests a cornercase, where it always +// the createStorageRequestResponseAlwaysProve tests a cornercase, where it always +// // supplies the proof for the last account, even if it is 'complete'.h func createStorageRequestResponseAlwaysProve(t *testPeer, root common.Hash, accounts []common.Hash, bOrigin, bLimit []byte, max uint64) (hashes [][]common.Hash, slots [][][]byte, proofs [][]byte) { var size uint64 @@ -551,6 +561,11 @@ func noProofStorageRequestHandler(t *testPeer, requestId uint64, root common.Has // the remote side does not do any follow-up requests func TestSyncBloatedProof(t *testing.T) { t.Parallel() + testSyncBloatedProof(t, rawdb.HashScheme) + testSyncBloatedProof(t, rawdb.PathScheme) +} + +func testSyncBloatedProof(t *testing.T, scheme string) { var ( once sync.Once @@ -561,9 +576,10 @@ func TestSyncBloatedProof(t *testing.T) { }) } ) - sourceAccountTrie, elems := makeAccountTrieNoStorage(100) + + nodeScheme, sourceAccountTrie, elems := makeAccountTrieNoStorage(scheme, 100) source := newTestPeer("source", t, term) - source.accountTrie = sourceAccountTrie + source.accountTrie = sourceAccountTrie.Copy() source.accountValues = elems source.accountRequestHandler = func(t *testPeer, requestId uint64, root common.Hash, origin common.Hash, limit common.Hash, cap uint64) error { @@ -609,15 +625,15 @@ func TestSyncBloatedProof(t *testing.T) { } return nil } - syncer := setupSyncer(source) + syncer := setupSyncer(nodeScheme, source) if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err == nil { t.Fatal("No error returned from incomplete/cancelled sync") } } -func setupSyncer(peers ...*testPeer) *Syncer { +func setupSyncer(scheme string, peers ...*testPeer) *Syncer { stateDb := rawdb.NewMemoryDatabase() - syncer := NewSyncer(stateDb) + syncer := NewSyncer(stateDb, scheme) for _, peer := range peers { syncer.Register(peer) peer.remote = syncer @@ -629,6 +645,11 @@ func setupSyncer(peers ...*testPeer) *Syncer { func TestSync(t *testing.T) { t.Parallel() + testSync(t, rawdb.HashScheme) + testSync(t, rawdb.PathScheme) +} + +func testSync(t *testing.T, scheme string) { var ( once sync.Once cancel = make(chan struct{}) @@ -638,26 +659,30 @@ func TestSync(t *testing.T) { }) } ) - sourceAccountTrie, elems := makeAccountTrieNoStorage(100) + nodeScheme, sourceAccountTrie, elems := makeAccountTrieNoStorage(scheme, 100) mkSource := func(name string) *testPeer { source := newTestPeer(name, t, term) - source.accountTrie = sourceAccountTrie + source.accountTrie = sourceAccountTrie.Copy() source.accountValues = elems return source } - syncer := setupSyncer(mkSource("source")) + syncer := setupSyncer(nodeScheme, mkSource("source")) if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { t.Fatalf("sync failed: %v", err) } - verifyTrie(syncer.db, sourceAccountTrie.Hash(), t) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } // TestSyncTinyTriePanic tests a basic sync with one peer, and a tiny trie. This caused a // panic within the prover func TestSyncTinyTriePanic(t *testing.T) { t.Parallel() + testSyncTinyTriePanic(t, rawdb.HashScheme) + testSyncTinyTriePanic(t, rawdb.PathScheme) +} +func testSyncTinyTriePanic(t *testing.T, scheme string) { var ( once sync.Once cancel = make(chan struct{}) @@ -667,27 +692,31 @@ func TestSyncTinyTriePanic(t *testing.T) { }) } ) - sourceAccountTrie, elems := makeAccountTrieNoStorage(1) + nodeScheme, sourceAccountTrie, elems := makeAccountTrieNoStorage(scheme, 1) mkSource := func(name string) *testPeer { source := newTestPeer(name, t, term) - source.accountTrie = sourceAccountTrie + source.accountTrie = sourceAccountTrie.Copy() source.accountValues = elems return source } - syncer := setupSyncer(mkSource("source")) + syncer := setupSyncer(nodeScheme, mkSource("source")) done := checkStall(t, term) if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) - verifyTrie(syncer.db, sourceAccountTrie.Hash(), t) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } // TestMultiSync tests a basic sync with multiple peers func TestMultiSync(t *testing.T) { t.Parallel() + testMultiSync(t, rawdb.HashScheme) + testMultiSync(t, rawdb.PathScheme) +} +func testMultiSync(t *testing.T, scheme string) { var ( once sync.Once cancel = make(chan struct{}) @@ -697,27 +726,33 @@ func TestMultiSync(t *testing.T) { }) } ) - sourceAccountTrie, elems := makeAccountTrieNoStorage(100) + nodeScheme, sourceAccountTrie, elems := makeAccountTrieNoStorage(scheme, 100) mkSource := func(name string) *testPeer { source := newTestPeer(name, t, term) - source.accountTrie = sourceAccountTrie + source.accountTrie = sourceAccountTrie.Copy() source.accountValues = elems return source } - syncer := setupSyncer(mkSource("sourceA"), mkSource("sourceB")) + syncer := setupSyncer(nodeScheme, mkSource("sourceA"), mkSource("sourceB")) done := checkStall(t, term) if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) - verifyTrie(syncer.db, sourceAccountTrie.Hash(), t) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } // TestSyncWithStorage tests basic sync using accounts + storage + code func TestSyncWithStorage(t *testing.T) { t.Parallel() + testSyncWithStorage(t, rawdb.HashScheme) + testSyncWithStorage(t, rawdb.PathScheme) +} + +func testSyncWithStorage(t *testing.T, scheme string) { + var ( once sync.Once cancel = make(chan struct{}) @@ -727,29 +762,34 @@ func TestSyncWithStorage(t *testing.T) { }) } ) - sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(3, 3000, true, false) + // Create 3 accounts with 3000 storage slots each, code is true, and boundary is false. + nodeScheme, sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(scheme, 3, 3000, true, false) mkSource := func(name string) *testPeer { source := newTestPeer(name, t, term) - source.accountTrie = sourceAccountTrie + source.accountTrie = sourceAccountTrie.Copy() source.accountValues = elems source.storageTries = storageTries source.storageValues = storageElems return source } - syncer := setupSyncer(mkSource("sourceA")) + syncer := setupSyncer(nodeScheme, mkSource("sourceA")) done := checkStall(t, term) if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { t.Fatalf("sync failed: %v", err) } close(done) - verifyTrie(syncer.db, sourceAccountTrie.Hash(), t) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } // TestMultiSyncManyUseless contains one good peer, and many which doesn't return anything valuable at all func TestMultiSyncManyUseless(t *testing.T) { t.Parallel() + testMultiSyncManyUseless(t, rawdb.HashScheme) + testMultiSyncManyUseless(t, rawdb.PathScheme) +} +func testMultiSyncManyUseless(t *testing.T, scheme string) { var ( once sync.Once cancel = make(chan struct{}) @@ -759,13 +799,13 @@ func TestMultiSyncManyUseless(t *testing.T) { }) } ) - sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(100, 3000, true, false) + nodeScheme, sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(scheme, 100, 3000, true, false) mkSource := func(name string, noAccount, noStorage, noTrieNode bool) *testPeer { source := newTestPeer(name, t, term) - source.accountTrie = sourceAccountTrie + source.accountTrie = sourceAccountTrie.Copy() source.accountValues = elems - source.storageTries = storageTries + source.setStorageTries(storageTries) source.storageValues = storageElems if !noAccount { @@ -780,7 +820,7 @@ func TestMultiSyncManyUseless(t *testing.T) { return source } - syncer := setupSyncer( + syncer := setupSyncer(nodeScheme, mkSource("full", true, true, true), mkSource("noAccounts", false, true, true), mkSource("noStorage", true, false, true), @@ -791,11 +831,17 @@ func TestMultiSyncManyUseless(t *testing.T) { t.Fatalf("sync failed: %v", err) } close(done) - verifyTrie(syncer.db, sourceAccountTrie.Hash(), t) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } // TestMultiSyncManyUseless contains one good peer, and many which doesn't return anything valuable at all func TestMultiSyncManyUselessWithLowTimeout(t *testing.T) { + t.Parallel() + testMultiSyncManyUselessWithLowTimeout(t, rawdb.HashScheme) + testMultiSyncManyUselessWithLowTimeout(t, rawdb.PathScheme) +} + +func testMultiSyncManyUselessWithLowTimeout(t *testing.T, scheme string) { var ( once sync.Once cancel = make(chan struct{}) @@ -805,13 +851,13 @@ func TestMultiSyncManyUselessWithLowTimeout(t *testing.T) { }) } ) - sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(100, 3000, true, false) + nodeScheme, sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(scheme, 100, 3000, true, false) mkSource := func(name string, noAccount, noStorage, noTrieNode bool) *testPeer { source := newTestPeer(name, t, term) - source.accountTrie = sourceAccountTrie + source.accountTrie = sourceAccountTrie.Copy() source.accountValues = elems - source.storageTries = storageTries + source.setStorageTries(storageTries) source.storageValues = storageElems if !noAccount { @@ -827,6 +873,7 @@ func TestMultiSyncManyUselessWithLowTimeout(t *testing.T) { } syncer := setupSyncer( + nodeScheme, mkSource("full", true, true, true), mkSource("noAccounts", false, true, true), mkSource("noStorage", true, false, true), @@ -842,11 +889,17 @@ func TestMultiSyncManyUselessWithLowTimeout(t *testing.T) { t.Fatalf("sync failed: %v", err) } close(done) - verifyTrie(syncer.db, sourceAccountTrie.Hash(), t) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } // TestMultiSyncManyUnresponsive contains one good peer, and many which doesn't respond at all func TestMultiSyncManyUnresponsive(t *testing.T) { + t.Parallel() + testMultiSyncManyUnresponsive(t, rawdb.HashScheme) + testMultiSyncManyUnresponsive(t, rawdb.PathScheme) +} + +func testMultiSyncManyUnresponsive(t *testing.T, scheme string) { var ( once sync.Once cancel = make(chan struct{}) @@ -856,13 +909,13 @@ func TestMultiSyncManyUnresponsive(t *testing.T) { }) } ) - sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(100, 3000, true, false) + nodeScheme, sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(scheme, 100, 3000, true, false) mkSource := func(name string, noAccount, noStorage, noTrieNode bool) *testPeer { source := newTestPeer(name, t, term) - source.accountTrie = sourceAccountTrie + source.accountTrie = sourceAccountTrie.Copy() source.accountValues = elems - source.storageTries = storageTries + source.setStorageTries(storageTries) source.storageValues = storageElems if !noAccount { @@ -878,6 +931,7 @@ func TestMultiSyncManyUnresponsive(t *testing.T) { } syncer := setupSyncer( + nodeScheme, mkSource("full", true, true, true), mkSource("noAccounts", false, true, true), mkSource("noStorage", true, false, true), @@ -891,7 +945,7 @@ func TestMultiSyncManyUnresponsive(t *testing.T) { t.Fatalf("sync failed: %v", err) } close(done) - verifyTrie(syncer.db, sourceAccountTrie.Hash(), t) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } func checkStall(t *testing.T, term func()) chan struct{} { @@ -912,7 +966,11 @@ func checkStall(t *testing.T, term func()) chan struct{} { // account trie has a few boundary elements. func TestSyncBoundaryAccountTrie(t *testing.T) { t.Parallel() + testSyncBoundaryAccountTrie(t, rawdb.HashScheme) + testSyncBoundaryAccountTrie(t, rawdb.PathScheme) +} +func testSyncBoundaryAccountTrie(t *testing.T, scheme string) { var ( once sync.Once cancel = make(chan struct{}) @@ -922,15 +980,15 @@ func TestSyncBoundaryAccountTrie(t *testing.T) { }) } ) - sourceAccountTrie, elems := makeBoundaryAccountTrie(3000) + nodeScheme, sourceAccountTrie, elems := makeBoundaryAccountTrie(scheme, 3000) mkSource := func(name string) *testPeer { source := newTestPeer(name, t, term) - source.accountTrie = sourceAccountTrie + source.accountTrie = sourceAccountTrie.Copy() source.accountValues = elems return source } - syncer := setupSyncer( + syncer := setupSyncer(nodeScheme, mkSource("peer-a"), mkSource("peer-b"), ) @@ -939,14 +997,18 @@ func TestSyncBoundaryAccountTrie(t *testing.T) { t.Fatalf("sync failed: %v", err) } close(done) - verifyTrie(syncer.db, sourceAccountTrie.Hash(), t) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } // TestSyncNoStorageAndOneCappedPeer tests sync using accounts and no storage, where one peer is // consistently returning very small results func TestSyncNoStorageAndOneCappedPeer(t *testing.T) { t.Parallel() + testSyncNoStorageAndOneCappedPeer(t, rawdb.HashScheme) + testSyncNoStorageAndOneCappedPeer(t, rawdb.PathScheme) +} +func testSyncNoStorageAndOneCappedPeer(t *testing.T, scheme string) { var ( once sync.Once cancel = make(chan struct{}) @@ -956,11 +1018,11 @@ func TestSyncNoStorageAndOneCappedPeer(t *testing.T) { }) } ) - sourceAccountTrie, elems := makeAccountTrieNoStorage(3000) + nodeScheme, sourceAccountTrie, elems := makeAccountTrieNoStorage(scheme, 3000) mkSource := func(name string, slow bool) *testPeer { source := newTestPeer(name, t, term) - source.accountTrie = sourceAccountTrie + source.accountTrie = sourceAccountTrie.Copy() source.accountValues = elems if slow { @@ -969,7 +1031,7 @@ func TestSyncNoStorageAndOneCappedPeer(t *testing.T) { return source } - syncer := setupSyncer( + syncer := setupSyncer(nodeScheme, mkSource("nice-a", false), mkSource("nice-b", false), mkSource("nice-c", false), @@ -980,14 +1042,18 @@ func TestSyncNoStorageAndOneCappedPeer(t *testing.T) { t.Fatalf("sync failed: %v", err) } close(done) - verifyTrie(syncer.db, sourceAccountTrie.Hash(), t) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } // TestSyncNoStorageAndOneCodeCorruptPeer has one peer which doesn't deliver // code requests properly. func TestSyncNoStorageAndOneCodeCorruptPeer(t *testing.T) { t.Parallel() + testSyncNoStorageAndOneCodeCorruptPeer(t, rawdb.HashScheme) + testSyncNoStorageAndOneCodeCorruptPeer(t, rawdb.PathScheme) +} +func testSyncNoStorageAndOneCodeCorruptPeer(t *testing.T, scheme string) { var ( once sync.Once cancel = make(chan struct{}) @@ -997,11 +1063,11 @@ func TestSyncNoStorageAndOneCodeCorruptPeer(t *testing.T) { }) } ) - sourceAccountTrie, elems := makeAccountTrieNoStorage(3000) + nodeScheme, sourceAccountTrie, elems := makeAccountTrieNoStorage(scheme, 3000) mkSource := func(name string, codeFn codeHandlerFunc) *testPeer { source := newTestPeer(name, t, term) - source.accountTrie = sourceAccountTrie + source.accountTrie = sourceAccountTrie.Copy() source.accountValues = elems source.codeRequestHandler = codeFn return source @@ -1010,7 +1076,7 @@ func TestSyncNoStorageAndOneCodeCorruptPeer(t *testing.T) { // chance that the full set of codes requested are sent only to the // non-corrupt peer, which delivers everything in one go, and makes the // test moot - syncer := setupSyncer( + syncer := setupSyncer(nodeScheme, mkSource("capped", cappedCodeRequestHandler), mkSource("corrupt", corruptCodeRequestHandler), ) @@ -1019,12 +1085,16 @@ func TestSyncNoStorageAndOneCodeCorruptPeer(t *testing.T) { t.Fatalf("sync failed: %v", err) } close(done) - verifyTrie(syncer.db, sourceAccountTrie.Hash(), t) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } func TestSyncNoStorageAndOneAccountCorruptPeer(t *testing.T) { t.Parallel() + testSyncNoStorageAndOneAccountCorruptPeer(t, rawdb.HashScheme) + testSyncNoStorageAndOneAccountCorruptPeer(t, rawdb.PathScheme) +} +func testSyncNoStorageAndOneAccountCorruptPeer(t *testing.T, scheme string) { var ( once sync.Once cancel = make(chan struct{}) @@ -1034,11 +1104,11 @@ func TestSyncNoStorageAndOneAccountCorruptPeer(t *testing.T) { }) } ) - sourceAccountTrie, elems := makeAccountTrieNoStorage(3000) + nodeScheme, sourceAccountTrie, elems := makeAccountTrieNoStorage(scheme, 3000) mkSource := func(name string, accFn accountHandlerFunc) *testPeer { source := newTestPeer(name, t, term) - source.accountTrie = sourceAccountTrie + source.accountTrie = sourceAccountTrie.Copy() source.accountValues = elems source.accountRequestHandler = accFn return source @@ -1047,7 +1117,7 @@ func TestSyncNoStorageAndOneAccountCorruptPeer(t *testing.T) { // chance that the full set of codes requested are sent only to the // non-corrupt peer, which delivers everything in one go, and makes the // test moot - syncer := setupSyncer( + syncer := setupSyncer(nodeScheme, mkSource("capped", defaultAccountRequestHandler), mkSource("corrupt", corruptAccountRequestHandler), ) @@ -1056,14 +1126,18 @@ func TestSyncNoStorageAndOneAccountCorruptPeer(t *testing.T) { t.Fatalf("sync failed: %v", err) } close(done) - verifyTrie(syncer.db, sourceAccountTrie.Hash(), t) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } // TestSyncNoStorageAndOneCodeCappedPeer has one peer which delivers code hashes // one by one func TestSyncNoStorageAndOneCodeCappedPeer(t *testing.T) { t.Parallel() + testSyncNoStorageAndOneCodeCappedPeer(t, rawdb.HashScheme) + testSyncNoStorageAndOneCodeCappedPeer(t, rawdb.PathScheme) +} +func testSyncNoStorageAndOneCodeCappedPeer(t *testing.T, scheme string) { var ( once sync.Once cancel = make(chan struct{}) @@ -1073,11 +1147,11 @@ func TestSyncNoStorageAndOneCodeCappedPeer(t *testing.T) { }) } ) - sourceAccountTrie, elems := makeAccountTrieNoStorage(3000) + nodeScheme, sourceAccountTrie, elems := makeAccountTrieNoStorage(scheme, 3000) mkSource := func(name string, codeFn codeHandlerFunc) *testPeer { source := newTestPeer(name, t, term) - source.accountTrie = sourceAccountTrie + source.accountTrie = sourceAccountTrie.Copy() source.accountValues = elems source.codeRequestHandler = codeFn return source @@ -1086,6 +1160,7 @@ func TestSyncNoStorageAndOneCodeCappedPeer(t *testing.T) { // so it shouldn't be more than that var counter int syncer := setupSyncer( + nodeScheme, mkSource("capped", func(t *testPeer, id uint64, hashes []common.Hash, max uint64) error { counter++ return cappedCodeRequestHandler(t, id, hashes, max) @@ -1104,14 +1179,18 @@ func TestSyncNoStorageAndOneCodeCappedPeer(t *testing.T) { if threshold := 100; counter > threshold { t.Fatalf("Error, expected < %d invocations, got %d", threshold, counter) } - verifyTrie(syncer.db, sourceAccountTrie.Hash(), t) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } // TestSyncBoundaryStorageTrie tests sync against a few normal peers, but the // storage trie has a few boundary elements. func TestSyncBoundaryStorageTrie(t *testing.T) { t.Parallel() + testSyncBoundaryStorageTrie(t, rawdb.HashScheme) + testSyncBoundaryStorageTrie(t, rawdb.PathScheme) +} +func testSyncBoundaryStorageTrie(t *testing.T, scheme string) { var ( once sync.Once cancel = make(chan struct{}) @@ -1121,17 +1200,17 @@ func TestSyncBoundaryStorageTrie(t *testing.T) { }) } ) - sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(10, 1000, false, true) + nodeScheme, sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(scheme, 10, 1000, false, true) mkSource := func(name string) *testPeer { source := newTestPeer(name, t, term) - source.accountTrie = sourceAccountTrie + source.accountTrie = sourceAccountTrie.Copy() source.accountValues = elems source.storageTries = storageTries source.storageValues = storageElems return source } - syncer := setupSyncer( + syncer := setupSyncer(nodeScheme, mkSource("peer-a"), mkSource("peer-b"), ) @@ -1140,14 +1219,18 @@ func TestSyncBoundaryStorageTrie(t *testing.T) { t.Fatalf("sync failed: %v", err) } close(done) - verifyTrie(syncer.db, sourceAccountTrie.Hash(), t) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } // TestSyncWithStorageAndOneCappedPeer tests sync using accounts + storage, where one peer is // consistently returning very small results func TestSyncWithStorageAndOneCappedPeer(t *testing.T) { t.Parallel() + testSyncWithStorageAndOneCappedPeer(t, rawdb.HashScheme) + testSyncWithStorageAndOneCappedPeer(t, rawdb.PathScheme) +} +func testSyncWithStorageAndOneCappedPeer(t *testing.T, scheme string) { var ( once sync.Once cancel = make(chan struct{}) @@ -1157,11 +1240,11 @@ func TestSyncWithStorageAndOneCappedPeer(t *testing.T) { }) } ) - sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(300, 1000, false, false) + nodeScheme, sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(scheme, 300, 1000, false, false) mkSource := func(name string, slow bool) *testPeer { source := newTestPeer(name, t, term) - source.accountTrie = sourceAccountTrie + source.accountTrie = sourceAccountTrie.Copy() source.accountValues = elems source.storageTries = storageTries source.storageValues = storageElems @@ -1172,7 +1255,7 @@ func TestSyncWithStorageAndOneCappedPeer(t *testing.T) { return source } - syncer := setupSyncer( + syncer := setupSyncer(nodeScheme, mkSource("nice-a", false), mkSource("slow", true), ) @@ -1181,14 +1264,19 @@ func TestSyncWithStorageAndOneCappedPeer(t *testing.T) { t.Fatalf("sync failed: %v", err) } close(done) - verifyTrie(syncer.db, sourceAccountTrie.Hash(), t) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } // TestSyncWithStorageAndCorruptPeer tests sync using accounts + storage, where one peer is // sometimes sending bad proofs func TestSyncWithStorageAndCorruptPeer(t *testing.T) { t.Parallel() + testSyncWithStorageAndCorruptPeer(t, rawdb.HashScheme) + + testSyncWithStorageAndCorruptPeer(t, rawdb.PathScheme) +} +func testSyncWithStorageAndCorruptPeer(t *testing.T, scheme string) { var ( once sync.Once cancel = make(chan struct{}) @@ -1198,11 +1286,11 @@ func TestSyncWithStorageAndCorruptPeer(t *testing.T) { }) } ) - sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(100, 3000, true, false) + nodeScheme, sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(scheme, 100, 3000, true, false) mkSource := func(name string, handler storageHandlerFunc) *testPeer { source := newTestPeer(name, t, term) - source.accountTrie = sourceAccountTrie + source.accountTrie = sourceAccountTrie.Copy() source.accountValues = elems source.storageTries = storageTries source.storageValues = storageElems @@ -1210,7 +1298,7 @@ func TestSyncWithStorageAndCorruptPeer(t *testing.T) { return source } - syncer := setupSyncer( + syncer := setupSyncer(nodeScheme, mkSource("nice-a", defaultStorageRequestHandler), mkSource("nice-b", defaultStorageRequestHandler), mkSource("nice-c", defaultStorageRequestHandler), @@ -1221,12 +1309,16 @@ func TestSyncWithStorageAndCorruptPeer(t *testing.T) { t.Fatalf("sync failed: %v", err) } close(done) - verifyTrie(syncer.db, sourceAccountTrie.Hash(), t) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } func TestSyncWithStorageAndNonProvingPeer(t *testing.T) { t.Parallel() + testSyncWithStorageAndNonProvingPeer(t, rawdb.HashScheme) + testSyncWithStorageAndNonProvingPeer(t, rawdb.PathScheme) +} +func testSyncWithStorageAndNonProvingPeer(t *testing.T, scheme string) { var ( once sync.Once cancel = make(chan struct{}) @@ -1236,18 +1328,18 @@ func TestSyncWithStorageAndNonProvingPeer(t *testing.T) { }) } ) - sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(100, 3000, true, false) + nodeScheme, sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(scheme, 100, 3000, true, false) mkSource := func(name string, handler storageHandlerFunc) *testPeer { source := newTestPeer(name, t, term) - source.accountTrie = sourceAccountTrie + source.accountTrie = sourceAccountTrie.Copy() source.accountValues = elems source.storageTries = storageTries source.storageValues = storageElems source.storageRequestHandler = handler return source } - syncer := setupSyncer( + syncer := setupSyncer(nodeScheme, mkSource("nice-a", defaultStorageRequestHandler), mkSource("nice-b", defaultStorageRequestHandler), mkSource("nice-c", defaultStorageRequestHandler), @@ -1258,7 +1350,7 @@ func TestSyncWithStorageAndNonProvingPeer(t *testing.T) { t.Fatalf("sync failed: %v", err) } close(done) - verifyTrie(syncer.db, sourceAccountTrie.Hash(), t) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } // TestSyncWithStorage tests basic sync using accounts + storage + code, against @@ -1267,6 +1359,11 @@ func TestSyncWithStorageAndNonProvingPeer(t *testing.T) { // did not mark the account for healing. func TestSyncWithStorageMisbehavingProve(t *testing.T) { t.Parallel() + testSyncWithStorageMisbehavingProve(t, rawdb.HashScheme) + testSyncWithStorageMisbehavingProve(t, rawdb.PathScheme) +} + +func testSyncWithStorageMisbehavingProve(t *testing.T, scheme string) { var ( once sync.Once cancel = make(chan struct{}) @@ -1276,22 +1373,22 @@ func TestSyncWithStorageMisbehavingProve(t *testing.T) { }) } ) - sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorageWithUniqueStorage(10, 30, false) + nodeScheme, sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorageWithUniqueStorage(scheme, 10, 30, false) mkSource := func(name string) *testPeer { source := newTestPeer(name, t, term) - source.accountTrie = sourceAccountTrie + source.accountTrie = sourceAccountTrie.Copy() source.accountValues = elems source.storageTries = storageTries source.storageValues = storageElems source.storageRequestHandler = proofHappyStorageRequestHandler return source } - syncer := setupSyncer(mkSource("sourceA")) + syncer := setupSyncer(nodeScheme, mkSource("sourceA")) if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { t.Fatalf("sync failed: %v", err) } - verifyTrie(syncer.db, sourceAccountTrie.Hash(), t) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) } type kv struct { @@ -1344,10 +1441,14 @@ func getCodeByHash(hash common.Hash) []byte { } // makeAccountTrieNoStorage spits out a trie, along with the leafs -func makeAccountTrieNoStorage(n int) (*trie.Trie, entrySlice) { - db := trie.NewDatabase(rawdb.NewMemoryDatabase()) - accTrie, _ := trie.New(common.Hash{}, db) - var entries entrySlice +func makeAccountTrieNoStorage(scheme string, n int) (string, *trie.Trie, entrySlice) { + // Create emptry Trie + var ( + db = trie.NewDatabase(rawdb.NewMemoryDatabase(), newDbConfig(scheme)) + accTrie = trie.NewEmpty(db) + entries entrySlice + ) + // Fill the trie with n accounts for i := uint64(1); i <= uint64(n); i++ { value, _ := rlp.EncodeToBytes(types.StateAccount{ Nonce: i, @@ -1357,24 +1458,31 @@ func makeAccountTrieNoStorage(n int) (*trie.Trie, entrySlice) { }) key := key32(i) elem := &kv{key, value} + // Update Account tries and keep the entries accTrie.Update(elem.k, elem.v) entries = append(entries, elem) } + // Sort anscending by key sort.Sort(entries) - accTrie.Commit(nil) - return accTrie, entries + // Commit the state changes into db and re-create the trie + // for accessing later. + root, nodes, _ := accTrie.Commit(false) + db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) + + accTrie, _ = trie.New(trie.StateTrieID(root), db) + return db.Scheme(), accTrie, entries } // makeBoundaryAccountTrie constructs an account trie. Instead of filling // accounts normally, this function will fill a few accounts which have // boundary hash. -func makeBoundaryAccountTrie(n int) (*trie.Trie, entrySlice) { +func makeBoundaryAccountTrie(scheme string, n int) (string, *trie.Trie, entrySlice) { var ( entries entrySlice boundaries []common.Hash - db = trie.NewDatabase(rawdb.NewMemoryDatabase()) - trie, _ = trie.New(common.Hash{}, db) + db = trie.NewDatabase(rawdb.NewMemoryDatabase(), newDbConfig(scheme)) + accTrie = trie.NewEmpty(db) ) // Initialize boundaries var next common.Hash @@ -1401,7 +1509,7 @@ func makeBoundaryAccountTrie(n int) (*trie.Trie, entrySlice) { CodeHash: getCodeHash(uint64(i)), }) elem := &kv{boundaries[i].Bytes(), value} - trie.Update(elem.k, elem.v) + accTrie.Update(elem.k, elem.v) entries = append(entries, elem) } // Fill other accounts if required @@ -1413,23 +1521,30 @@ func makeBoundaryAccountTrie(n int) (*trie.Trie, entrySlice) { CodeHash: getCodeHash(i), }) elem := &kv{key32(i), value} - trie.Update(elem.k, elem.v) + accTrie.Update(elem.k, elem.v) entries = append(entries, elem) } sort.Sort(entries) - trie.Commit(nil) - return trie, entries + // Commit the state changes into db and re-create the trie + // for accessing later. + root, nodes, _ := accTrie.Commit(false) + db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) + + accTrie, _ = trie.New(trie.StateTrieID(root), db) + return db.Scheme(), accTrie, entries } // makeAccountTrieWithStorageWithUniqueStorage creates an account trie where each accounts -// has a unique storage set. -func makeAccountTrieWithStorageWithUniqueStorage(accounts, slots int, code bool) (*trie.Trie, entrySlice, map[common.Hash]*trie.Trie, map[common.Hash]entrySlice) { +// has a unique storage set. Code is true when u pass a random code hash to the account +func makeAccountTrieWithStorageWithUniqueStorage(scheme string, accounts, slots int, code bool) (string, *trie.Trie, entrySlice, map[common.Hash]*trie.Trie, map[common.Hash]entrySlice) { var ( - db = trie.NewDatabase(rawdb.NewMemoryDatabase()) - accTrie, _ = trie.New(common.Hash{}, db) + db = trie.NewDatabase(rawdb.NewMemoryDatabase(), newDbConfig(scheme)) + accTrie = trie.NewEmpty(db) entries entrySlice + storageRoots = make(map[common.Hash]common.Hash) storageTries = make(map[common.Hash]*trie.Trie) storageEntries = make(map[common.Hash]entrySlice) + nodes = trienode.NewMergedNodeSet() ) // Create n accounts in the trie for i := uint64(1); i <= uint64(accounts); i++ { @@ -1439,9 +1554,8 @@ func makeAccountTrieWithStorageWithUniqueStorage(accounts, slots int, code bool) codehash = getCodeHash(i) } // Create a storage trie - stTrie, stEntries := makeStorageTrieWithSeed(uint64(slots), i, db) - stRoot := stTrie.Hash() - stTrie.Commit(nil) + stRoot, stNodes, stEntries := makeStorageTrieWithSeed(common.BytesToHash(key), uint64(slots), i, db) + nodes.Merge(stNodes) value, _ := rlp.EncodeToBytes(types.StateAccount{ Nonce: i, Balance: big.NewInt(int64(i)), @@ -1452,36 +1566,39 @@ func makeAccountTrieWithStorageWithUniqueStorage(accounts, slots int, code bool) accTrie.Update(elem.k, elem.v) entries = append(entries, elem) - storageTries[common.BytesToHash(key)] = stTrie + storageRoots[common.BytesToHash(key)] = stRoot storageEntries[common.BytesToHash(key)] = stEntries } sort.Sort(entries) - accTrie.Commit(nil) - return accTrie, entries, storageTries, storageEntries + // Commit account trie + root, set, _ := accTrie.Commit(true) + nodes.Merge(set) + + // Commit gathered dirty nodes into database + db.Update(root, types.EmptyRootHash, 0, nodes, nil) + + // Re-create tries with new root + accTrie, _ = trie.New(trie.StateTrieID(root), db) + for i := uint64(1); i <= uint64(accounts); i++ { + key := key32(i) + trie, _ := trie.New(trie.StorageTrieID(root, common.BytesToHash(key), storageRoots[common.BytesToHash(key)]), db) + storageTries[common.BytesToHash(key)] = trie + } + return db.Scheme(), accTrie, entries, storageTries, storageEntries } // makeAccountTrieWithStorage spits out a trie, along with the leafs -func makeAccountTrieWithStorage(accounts, slots int, code, boundary bool) (*trie.Trie, entrySlice, map[common.Hash]*trie.Trie, map[common.Hash]entrySlice) { +func makeAccountTrieWithStorage(scheme string, accounts, slots int, code, boundary bool) (string, *trie.Trie, entrySlice, map[common.Hash]*trie.Trie, map[common.Hash]entrySlice) { var ( - db = trie.NewDatabase(rawdb.NewMemoryDatabase()) - accTrie, _ = trie.New(common.Hash{}, db) + db = trie.NewDatabase(rawdb.NewMemoryDatabase(), newDbConfig(scheme)) + accTrie = trie.NewEmpty(db) entries entrySlice + storageRoots = make(map[common.Hash]common.Hash) storageTries = make(map[common.Hash]*trie.Trie) storageEntries = make(map[common.Hash]entrySlice) + nodes = trienode.NewMergedNodeSet() ) - // Make a storage trie which we reuse for the whole lot - var ( - stTrie *trie.Trie - stEntries entrySlice - ) - if boundary { - stTrie, stEntries = makeBoundaryStorageTrie(slots, db) - } else { - stTrie, stEntries = makeStorageTrieWithSeed(uint64(slots), 0, db) - } - stRoot := stTrie.Hash() - // Create n accounts in the trie for i := uint64(1); i <= uint64(accounts); i++ { key := key32(i) @@ -1489,7 +1606,20 @@ func makeAccountTrieWithStorage(accounts, slots int, code, boundary bool) (*trie if code { codehash = getCodeHash(i) } - value, _ := rlp.EncodeToBytes(types.StateAccount{ + // Make a storage trie + var ( + stRoot common.Hash + stNodes *trienode.NodeSet + stEntries entrySlice + ) + if boundary { + stRoot, stNodes, stEntries = makeBoundaryStorageTrie(common.BytesToHash(key), slots, db) + } else { + stRoot, stNodes, stEntries = makeStorageTrieWithSeed(common.BytesToHash(key), uint64(slots), 0, db) + } + nodes.Merge(stNodes) + + value, _ := rlp.EncodeToBytes(&types.StateAccount{ Nonce: i, Balance: big.NewInt(int64(i)), Root: stRoot, @@ -1499,20 +1629,39 @@ func makeAccountTrieWithStorage(accounts, slots int, code, boundary bool) (*trie accTrie.Update(elem.k, elem.v) entries = append(entries, elem) // we reuse the same one for all accounts - storageTries[common.BytesToHash(key)] = stTrie + storageRoots[common.BytesToHash(key)] = stRoot storageEntries[common.BytesToHash(key)] = stEntries } sort.Sort(entries) - stTrie.Commit(nil) - accTrie.Commit(nil) - return accTrie, entries, storageTries, storageEntries + // Commit account trie + root, set, _ := accTrie.Commit(true) + nodes.Merge(set) + + // Commit gathered dirty nodes into database + db.Update(root, types.EmptyRootHash, 0, nodes, nil) + + // Re-create tries with new root + accTrie, err := trie.New(trie.StateTrieID(root), db) + if err != nil { + panic(err) + } + for i := uint64(1); i <= uint64(accounts); i++ { + key := key32(i) + id := trie.StorageTrieID(root, common.BytesToHash(key), storageRoots[common.BytesToHash(key)]) + trie, err := trie.New(id, db) + if err != nil { + panic(err) + } + storageTries[common.BytesToHash(key)] = trie + } + return db.Scheme(), accTrie, entries, storageTries, storageEntries } // makeStorageTrieWithSeed fills a storage trie with n items, returning the // not-yet-committed trie and the sorted entries. The seeds can be used to ensure // that tries are unique. -func makeStorageTrieWithSeed(n, seed uint64, db *trie.Database) (*trie.Trie, entrySlice) { - trie, _ := trie.New(common.Hash{}, db) +func makeStorageTrieWithSeed(owner common.Hash, n, seed uint64, db *trie.Database) (common.Hash, *trienode.NodeSet, entrySlice) { + trie, _ := trie.New(trie.StorageTrieID(types.EmptyRootHash, owner, types.EmptyRootHash), db) var entries entrySlice for i := uint64(1); i <= n; i++ { // store 'x' at slot 'x' @@ -1527,18 +1676,18 @@ func makeStorageTrieWithSeed(n, seed uint64, db *trie.Database) (*trie.Trie, ent entries = append(entries, elem) } sort.Sort(entries) - trie.Commit(nil) - return trie, entries + root, nodes, _ := trie.Commit(false) + return root, nodes, entries } // makeBoundaryStorageTrie constructs a storage trie. Instead of filling // storage slots normally, this function will fill a few slots which have // boundary hash. -func makeBoundaryStorageTrie(n int, db *trie.Database) (*trie.Trie, entrySlice) { +func makeBoundaryStorageTrie(owner common.Hash, n int, db *trie.Database) (common.Hash, *trienode.NodeSet, entrySlice) { var ( entries entrySlice boundaries []common.Hash - trie, _ = trie.New(common.Hash{}, db) + trie, _ = trie.New(trie.StorageTrieID(types.EmptyRootHash, owner, types.EmptyRootHash), db) ) // Initialize boundaries var next common.Hash @@ -1578,19 +1727,19 @@ func makeBoundaryStorageTrie(n int, db *trie.Database) (*trie.Trie, entrySlice) entries = append(entries, elem) } sort.Sort(entries) - trie.Commit(nil) - return trie, entries + root, nodes, _ := trie.Commit(false) + return root, nodes, entries } -func verifyTrie(db ethdb.KeyValueStore, root common.Hash, t *testing.T) { +func verifyTrie(scheme string, db ethdb.KeyValueStore, root common.Hash, t *testing.T) { t.Helper() - triedb := trie.NewDatabase(db) - accTrie, err := trie.New(root, triedb) + triedb := trie.NewDatabase(rawdb.NewDatabase(db), newDbConfig(scheme)) + accTrie, err := trie.New(trie.StateTrieID(root), triedb) if err != nil { t.Fatal(err) } accounts, slots := 0, 0 - accIt := trie.NewIterator(accTrie.NodeIterator(nil)) + accIt := trie.NewIterator(accTrie.MustNodeIterator(nil)) for accIt.Next() { var acc struct { Nonce uint64 @@ -1602,12 +1751,14 @@ func verifyTrie(db ethdb.KeyValueStore, root common.Hash, t *testing.T) { log.Crit("Invalid account encountered during snapshot creation", "err", err) } accounts++ - if acc.Root != emptyRoot { - storeTrie, err := trie.NewSecure(acc.Root, triedb) + if acc.Root != types.EmptyRootHash { + id := trie.StorageTrieID(root, common.BytesToHash(accIt.Key), acc.Root) + storeTrie, err := trie.NewSecure(id, triedb) if err != nil { t.Fatal(err) } - storeIt := trie.NewIterator(storeTrie.NodeIterator(nil)) + + storeIt := trie.NewIterator(storeTrie.MustNodeIterator(nil)) for storeIt.Next() { slots++ } @@ -1625,6 +1776,12 @@ func verifyTrie(db ethdb.KeyValueStore, root common.Hash, t *testing.T) { // TestSyncAccountPerformance tests how efficient the snap algo is at minimizing // state healing func TestSyncAccountPerformance(t *testing.T) { + t.Parallel() + testSyncAccountPerformance(t, rawdb.HashScheme) + testSyncAccountPerformance(t, rawdb.PathScheme) +} + +func testSyncAccountPerformance(t *testing.T, scheme string) { // Set the account concurrency to 1. This _should_ result in the // range root to become correct, and there should be no healing needed defer func(old int) { accountConcurrency = old }(accountConcurrency) @@ -1639,20 +1796,20 @@ func TestSyncAccountPerformance(t *testing.T) { }) } ) - sourceAccountTrie, elems := makeAccountTrieNoStorage(100) + nodeScheme, sourceAccountTrie, elems := makeAccountTrieNoStorage(scheme, 100) mkSource := func(name string) *testPeer { source := newTestPeer(name, t, term) - source.accountTrie = sourceAccountTrie + source.accountTrie = sourceAccountTrie.Copy() source.accountValues = elems return source } src := mkSource("source") - syncer := setupSyncer(src) + syncer := setupSyncer(nodeScheme, src) if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { t.Fatalf("sync failed: %v", err) } - verifyTrie(syncer.db, sourceAccountTrie.Hash(), t) + verifyTrie(scheme, syncer.db, sourceAccountTrie.Hash(), t) // The trie root will always be requested, since it is added when the snap // sync cycle starts. When popping the queue, we do not look it up again. // Doing so would bring this number down to zero in this artificial testcase, @@ -1712,3 +1869,10 @@ func TestSlotEstimation(t *testing.T) { } } } + +func newDbConfig(scheme string) *trie.Config { + if scheme == rawdb.HashScheme { + return &trie.Config{} + } + return &trie.Config{PathDB: pathdb.Defaults} +} diff --git a/eth/state_accessor.go b/eth/state_accessor.go index d5d06da672..c4cac850bf 100644 --- a/eth/state_accessor.go +++ b/eth/state_accessor.go @@ -27,6 +27,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/consensus/consortium" "github.com/ethereum/go-ethereum/core" + "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/state" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/core/vm" @@ -38,38 +39,24 @@ import ( // for releasing state. var noopReleaser = tracers.StateReleaseFunc(func() {}) -// StateAtBlock retrieves the state database associated with a certain block. -// If no state is locally available for the given block, a number of blocks -// are attempted to be reexecuted to generate the desired state. The optional -// base layer statedb can be passed then it's regarded as the statedb of the -// parent block. -// Parameters: -// - block: The block for which we want the state (== state at the stateRoot of the parent) -// - reexec: The maximum number of blocks to reprocess trying to obtain the desired state -// - base: If the caller is tracing multiple blocks, the caller can provide the parent state -// continuously from the callsite. -// - checklive: if true, then the live 'blockchain' state database is used. If the caller want to -// perform Commit or other 'save-to-disk' changes, this should be set to false to avoid -// storing trash persistently -// - preferDisk: this arg can be used by the caller to signal that even though the 'base' is provided, -// it would be preferrable to start from a fresh state, if we have it on disk. -func (eth *Ethereum) StateAtBlock(ctx context.Context, block *types.Block, reexec uint64, base *state.StateDB, checkLive bool, preferDisk bool) (statedb *state.StateDB, release tracers.StateReleaseFunc, err error) { +func (eth *Ethereum) hashState(ctx context.Context, block *types.Block, reexec uint64, base *state.StateDB, readOnly bool, preferDisk bool) (statedb *state.StateDB, release tracers.StateReleaseFunc, err error) { var ( current *types.Block database state.Database + triedb *trie.Database report = true origin = block.NumberU64() ) // The state is only for reading purposes, check the state presence in // live database. - if checkLive { + if readOnly { // The state is available in live database, create a reference // on top to prevent garbage collection and return a release // function to deref it. - statedb, err = eth.blockchain.StateAt(block.Root()) - if err == nil { + if statedb, err = eth.blockchain.StateAt(block.Root()); err == nil { + eth.blockchain.TrieDB().Reference(block.Root(), common.Hash{}) return statedb, func() { - statedb.Database().TrieDB().Dereference(block.Root()) + eth.blockchain.TrieDB().Dereference(block.Root()) }, nil } } @@ -80,27 +67,32 @@ func (eth *Ethereum) StateAtBlock(ctx context.Context, block *types.Block, reexe if preferDisk { // Create an ephemeral trie.Database for isolating the live one. Otherwise // the internal junks created by tracing will be persisted into the disk. - database = state.NewDatabaseWithConfig(eth.chainDb, &trie.Config{Cache: 16}) + // TODO(rjl493456442), clean cache is disabled to prevent memory leak, + // please re-enable it for better performance. + database = state.NewDatabaseWithConfig(eth.chainDb, trie.HashDefaults) if statedb, err = state.New(block.Root(), database, nil); err == nil { log.Info("Found disk backend for state trie", "root", block.Root(), "number", block.Number()) return statedb, noopReleaser, nil } } // The optional base statedb is given, mark the start point as parent block - statedb, database, report = base, base.Database(), false + statedb, database, triedb, report = base, base.Database(), base.Database().TrieDB(), false current = eth.blockchain.GetBlock(block.ParentHash(), block.NumberU64()-1) } else { - // Otherwise try to reexec blocks until we find a state or reach our limit + // Otherwise, try to reexec blocks until we find a state or reach our limit current = block // Create an ephemeral trie.Database for isolating the live one. Otherwise // the internal junks created by tracing will be persisted into the disk. - database = state.NewDatabaseWithConfig(eth.chainDb, &trie.Config{Cache: 16}) + // TODO(rjl493456442), clean cache is disabled to prevent memory leak, + // please re-enable it for better performance. + triedb = trie.NewDatabase(eth.chainDb, trie.HashDefaults) + database = state.NewDatabaseWithNodeDB(eth.chainDb, triedb) // If we didn't check the live database, do check state over ephemeral database, // otherwise we would rewind past a persisted block (specific corner case is // chain tracing from the genesis). - if !checkLive { + if !readOnly { statedb, err = state.New(current.Root(), database, nil) if err == nil { return statedb, noopReleaser, nil @@ -108,6 +100,9 @@ func (eth *Ethereum) StateAtBlock(ctx context.Context, block *types.Block, reexe } // Database does not have the state for the given block, try to regenerate for i := uint64(0); i < reexec; i++ { + if err := ctx.Err(); err != nil { + return nil, nil, err + } if current.NumberU64() == 0 { return nil, nil, errors.New("genesis state is missing") } @@ -157,7 +152,7 @@ func (eth *Ethereum) StateAtBlock(ctx context.Context, block *types.Block, reexe return nil, nil, fmt.Errorf("processing block %d failed: %v", current.NumberU64(), err) } // Finalize the state so any modifications are written to the trie - root, err := statedb.Commit(eth.blockchain.Config().IsEIP158(current.Number())) + root, err := statedb.Commit(current.NumberU64(), eth.blockchain.Config().IsEIP158(current.Number())) if err != nil { return nil, nil, fmt.Errorf("stateAtBlock commit failed, number %d root %v: %w", current.NumberU64(), current.Root().Hex(), err) @@ -168,17 +163,58 @@ func (eth *Ethereum) StateAtBlock(ctx context.Context, block *types.Block, reexe } // Hold the state reference and also drop the parent state // to prevent accumulating too many nodes in memory. - database.TrieDB().Reference(root, common.Hash{}) + triedb.Reference(root, common.Hash{}) if parent != (common.Hash{}) { - database.TrieDB().Dereference(parent) + triedb.Dereference(parent) } parent = root } if report { - nodes, imgs := database.TrieDB().Size() + nodes, imgs := triedb.Size() log.Info("Historical state regenerated", "block", current.NumberU64(), "elapsed", time.Since(start), "nodes", nodes, "preimages", imgs) } - return statedb, func() { database.TrieDB().Dereference(block.Root()) }, nil + return statedb, func() { triedb.Dereference(block.Root()) }, nil +} + +func (eth *Ethereum) pathState(block *types.Block) (*state.StateDB, func(), error) { + // Check if the requested state is available in the live chain. + statedb, err := eth.blockchain.StateAt(block.Root()) + if err == nil { + return statedb, noopReleaser, nil + } + // TODO historic state is not supported in path-based scheme. + // Fully archive node in pbss will be implemented by relying + // on state history, but needs more work on top. + return nil, nil, errors.New("historical state not available in path scheme yet") +} + +// stateAtBlock retrieves the state database associated with a certain block. +// If no state is locally available for the given block, a number of blocks +// are attempted to be reexecuted to generate the desired state. The optional +// base layer statedb can be provided which is regarded as the statedb of the +// parent block. +// +// An additional release function will be returned if the requested state is +// available. Release is expected to be invoked when the returned state is no +// longer needed. Its purpose is to prevent resource leaking. Though it can be +// noop in some cases. +// +// Parameters: +// - block: The block for which we want the state(state = block.Root) +// - reexec: The maximum number of blocks to reprocess trying to obtain the desired state +// - base: If the caller is tracing multiple blocks, the caller can provide the parent +// state continuously from the callsite. +// - readOnly: If true, then the live 'blockchain' state database is used. No mutation should +// be made from caller, e.g. perform Commit or other 'save-to-disk' changes. +// Otherwise, the trash generated by caller may be persisted permanently. +// - preferDisk: This arg can be used by the caller to signal that even though the 'base' is +// provided, it would be preferable to start from a fresh state, if we have it +// on disk. +func (eth *Ethereum) stateAtBlock(ctx context.Context, block *types.Block, reexec uint64, base *state.StateDB, readOnly bool, preferDisk bool) (statedb *state.StateDB, release tracers.StateReleaseFunc, err error) { + if eth.blockchain.TrieDB().Scheme() == rawdb.HashScheme { + return eth.hashState(ctx, block, reexec, base, readOnly, preferDisk) + } + return eth.pathState(block) } // stateAtTransaction returns the execution environment of a certain transaction. @@ -194,7 +230,7 @@ func (eth *Ethereum) stateAtTransaction(ctx context.Context, block *types.Block, } // Lookup the statedb of parent block from the live database, // otherwise regenerate it on the flight. - statedb, release, err := eth.StateAtBlock(ctx, parent, reexec, nil, true, false) + statedb, release, err := eth.stateAtBlock(ctx, parent, reexec, nil, true, false) if err != nil { return nil, vm.BlockContext{}, nil, nil, err } diff --git a/eth/sync.go b/eth/sync.go index ecbbcc3e05..386132bf40 100644 --- a/eth/sync.go +++ b/eth/sync.go @@ -190,15 +190,24 @@ func (cs *chainSyncer) modeAndLocalHead() (downloader.SyncMode, *big.Int) { } // We are probably in full sync, but we might have rewound to before the // fast sync pivot, check if we should reenable + head := cs.handler.chain.CurrentBlock() if pivot := rawdb.ReadLastPivotNumber(cs.handler.database); pivot != nil { - if head := cs.handler.chain.CurrentBlock(); head.NumberU64() < *pivot { + if head.NumberU64() < *pivot { block := cs.handler.chain.CurrentFastBlock() td := cs.handler.chain.GetTd(block.Hash(), block.NumberU64()) return downloader.FastSync, td } } + // We are in a full sync, but the associated head state is missing. To complete + // the head state, forcefully rerun the snap sync. Note it doesn't mean the + // persistent state is corrupted, just mismatch with the head block. + if !cs.handler.chain.HasState(head.Root()) { + block := cs.handler.chain.CurrentFastBlock() + td := cs.handler.chain.GetTd(block.Hash(), block.NumberU64()) + log.Info("Reenabled snap sync as chain is stateless") + return downloader.SnapSync, td + } // Nope, we're really full syncing - head := cs.handler.chain.CurrentBlock() td := cs.handler.chain.GetTd(head.Hash(), head.NumberU64()) return downloader.FullSync, td } @@ -234,22 +243,14 @@ func (h *handler) doSync(op *chainSyncOp) error { if err != nil { return err } - if atomic.LoadUint32(&h.fastSync) == 1 { - log.Info("Fast sync complete, auto disabling") - atomic.StoreUint32(&h.fastSync, 0) - } - if atomic.LoadUint32(&h.snapSync) == 1 { - log.Info("Snap sync complete, auto disabling") - atomic.StoreUint32(&h.snapSync, 0) - } - // If we've successfully finished a sync cycle and passed any required checkpoint, - // enable accepting transactions from the network. + h.enableSyncedFeatures() + head := h.chain.CurrentBlock() if head.NumberU64() >= h.checkpointNumber { // Checkpoint passed, sanity check the timestamp to have a fallback mechanism // for non-checkpointed (number = 0) private networks. if head.Time() >= uint64(time.Now().AddDate(0, -1, 0).Unix()) { - atomic.StoreUint32(&h.acceptTxs, 1) + atomic.StoreUint32(&h.synced, 1) } } if head.NumberU64() > 0 { diff --git a/eth/tracers/api_test.go b/eth/tracers/api_test.go index 8f46207fab..0cc8a85cbe 100644 --- a/eth/tracers/api_test.go +++ b/eth/tracers/api_test.go @@ -46,6 +46,7 @@ import ( "github.com/ethereum/go-ethereum/internal/ethapi" "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/rpc" + "github.com/ethereum/go-ethereum/trie" ) var ( @@ -84,13 +85,13 @@ func newTestBackend(t *testing.T, n int, gspec *core.Genesis, generator func(i i TrieDirtyDisabled: true, // Archive mode } - _, _, genesisErr := core.SetupGenesisBlockWithOverride(backend.chaindb, gspec, nil, true) + _, _, genesisErr := core.SetupGenesisBlockWithOverride(backend.chaindb, trie.NewDatabase(backend.chaindb, nil), gspec, nil, true) if _, ok := genesisErr.(*params.ConfigCompatError); genesisErr != nil && !ok { t.Fatal(genesisErr.Error()) } //chainDb, cacheConfig, chainConfig, eth.engine, vmConfig, eth.shouldPreserve, &config.TxLookupLimit - chain, err := core.NewBlockChain(backend.chaindb, cacheConfig, gspec.Config, backend.engine, vm.Config{}, nil, nil) + chain, err := core.NewBlockChain(backend.chaindb, cacheConfig, gspec, nil, backend.engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("failed to create tester chain: %v", err) } diff --git a/eth/tracers/internal/tracetest/calltrace2_test.go b/eth/tracers/internal/tracetest/calltrace2_test.go index d0559e2804..c854828885 100644 --- a/eth/tracers/internal/tracetest/calltrace2_test.go +++ b/eth/tracers/internal/tracetest/calltrace2_test.go @@ -162,8 +162,9 @@ func testCallTracer2(tracerName string, dirPath string, t *testing.T) { Difficulty: (*big.Int)(test.Context.Difficulty), GasLimit: uint64(test.Context.GasLimit), } - _, statedb = tests.MakePreState(rawdb.NewMemoryDatabase(), test.Genesis.Alloc, false) + triedb, _, statedb = tests.MakePreState(rawdb.NewMemoryDatabase(), test.Genesis.Alloc, false, rawdb.HashScheme) ) + defer triedb.Close() tracer, err := tracers.DefaultDirectory.New(tracerName, new(tracers.Context), test.TracerConfig) if err != nil { t.Fatalf("failed to create call tracer: %v", err) @@ -265,7 +266,8 @@ func benchTracer2(tracerName string, test *callTracer2Test, b *testing.B) { Difficulty: (*big.Int)(test.Context.Difficulty), GasLimit: uint64(test.Context.GasLimit), } - _, statedb := tests.MakePreState(rawdb.NewMemoryDatabase(), test.Genesis.Alloc, false) + triedb, _, statedb := tests.MakePreState(rawdb.NewMemoryDatabase(), test.Genesis.Alloc, false, rawdb.HashScheme) + defer triedb.Close() b.ReportAllocs() b.ResetTimer() @@ -335,7 +337,8 @@ func TestZeroValueToNotExitCall2(t *testing.T) { Balance: big.NewInt(500000000000000), }, } - _, statedb := tests.MakePreState(rawdb.NewMemoryDatabase(), alloc, false) + triedb, _, statedb := tests.MakePreState(rawdb.NewMemoryDatabase(), alloc, false, rawdb.HashScheme) + defer triedb.Close() // Create the tracer, the EVM environment and run it tracer, err := tracers.DefaultDirectory.New("callTracer2", new(tracers.Context), nil) if err != nil { diff --git a/eth/tracers/internal/tracetest/calltrace_test.go b/eth/tracers/internal/tracetest/calltrace_test.go index 1b3c121a89..233c1eb8e7 100644 --- a/eth/tracers/internal/tracetest/calltrace_test.go +++ b/eth/tracers/internal/tracetest/calltrace_test.go @@ -137,8 +137,9 @@ func testCallTracer(tracerName string, dirPath string, t *testing.T) { GasLimit: uint64(test.Context.GasLimit), BaseFee: test.Genesis.BaseFee, } - _, statedb = tests.MakePreState(rawdb.NewMemoryDatabase(), test.Genesis.Alloc, false) + triedb, _, statedb = tests.MakePreState(rawdb.NewMemoryDatabase(), test.Genesis.Alloc, false, rawdb.HashScheme) ) + triedb.Close() tracer, err := tracers.DefaultDirectory.New(tracerName, new(tracers.Context), test.TracerConfig) if err != nil { t.Fatalf("failed to create call tracer: %v", err) @@ -237,7 +238,8 @@ func benchTracer(tracerName string, test *callTracerTest, b *testing.B) { Difficulty: (*big.Int)(test.Context.Difficulty), GasLimit: uint64(test.Context.GasLimit), } - _, statedb := tests.MakePreState(rawdb.NewMemoryDatabase(), test.Genesis.Alloc, false) + triedb, _, statedb := tests.MakePreState(rawdb.NewMemoryDatabase(), test.Genesis.Alloc, false, rawdb.HashScheme) + defer triedb.Close() b.ReportAllocs() b.ResetTimer() @@ -343,7 +345,7 @@ func TestInternals(t *testing.T) { want: `{"0x0000000000000000000000000000000000000000":{"balance":"0x0"},"0x000000000000000000000000000000000000feed":{"balance":"0x1c6bf52640350"},"0x00000000000000000000000000000000deadbeef":{"balance":"0x0","code":"0x6001600052600164ffffffffff60016000f560ff6000a0"}}`, }, } { - _, statedb := tests.MakePreState(rawdb.NewMemoryDatabase(), + triedb, _, statedb := tests.MakePreState(rawdb.NewMemoryDatabase(), core.GenesisAlloc{ to: core.GenesisAccount{ Code: tc.code, @@ -351,7 +353,8 @@ func TestInternals(t *testing.T) { origin: core.GenesisAccount{ Balance: big.NewInt(500000000000000), }, - }, false) + }, false, rawdb.HashScheme) + defer triedb.Close() evm := vm.NewEVM(context, txContext, statedb, params.MainnetChainConfig, vm.Config{Tracer: tc.tracer}) msg := types.NewMessage(origin, &to, 0, big.NewInt(0), 50000, big.NewInt(0), big.NewInt(0), big.NewInt(0), nil, nil, false, nil, nil) st := core.NewStateTransition(evm, msg, new(core.GasPool).AddGas(msg.Gas())) diff --git a/eth/tracers/internal/tracetest/flat_calltrace_test.go b/eth/tracers/internal/tracetest/flat_calltrace_test.go index 10cf1ecdcd..f2e9d741ff 100644 --- a/eth/tracers/internal/tracetest/flat_calltrace_test.go +++ b/eth/tracers/internal/tracetest/flat_calltrace_test.go @@ -100,7 +100,8 @@ func flatCallTracerTestRunner(tracerName string, filename string, dirPath string Difficulty: (*big.Int)(test.Context.Difficulty), GasLimit: uint64(test.Context.GasLimit), } - _, statedb := tests.MakePreState(rawdb.NewMemoryDatabase(), test.Genesis.Alloc, false) + triedb, _, statedb := tests.MakePreState(rawdb.NewMemoryDatabase(), test.Genesis.Alloc, false, rawdb.HashScheme) + defer triedb.Close() // Create the tracer, the EVM environment and run it tracer, err := tracers.DefaultDirectory.New(tracerName, new(tracers.Context), test.TracerConfig) diff --git a/eth/tracers/internal/tracetest/prestate_test.go b/eth/tracers/internal/tracetest/prestate_test.go index db97c6e63d..579eb9fe71 100644 --- a/eth/tracers/internal/tracetest/prestate_test.go +++ b/eth/tracers/internal/tracetest/prestate_test.go @@ -108,8 +108,9 @@ func testPrestateDiffTracer(tracerName string, dirPath string, t *testing.T) { GasLimit: uint64(test.Context.GasLimit), BaseFee: test.Genesis.BaseFee, } - _, statedb = tests.MakePreState(rawdb.NewMemoryDatabase(), test.Genesis.Alloc, false) + triedb, _, statedb = tests.MakePreState(rawdb.NewMemoryDatabase(), test.Genesis.Alloc, false, rawdb.HashScheme) ) + defer triedb.Close() tracer, err := tracers.DefaultDirectory.New(tracerName, new(tracers.Context), test.TracerConfig) if err != nil { t.Fatalf("failed to create call tracer: %v", err) diff --git a/eth/tracers/tracers_test.go b/eth/tracers/tracers_test.go index 277cb471c6..f08f156612 100644 --- a/eth/tracers/tracers_test.go +++ b/eth/tracers/tracers_test.go @@ -79,7 +79,9 @@ func BenchmarkTransactionTrace(b *testing.B) { Code: []byte{}, Balance: big.NewInt(500000000000000), } - _, statedb := tests.MakePreState(rawdb.NewMemoryDatabase(), alloc, false) + triedb, _, statedb := tests.MakePreState(rawdb.NewMemoryDatabase(), alloc, false, rawdb.HashScheme) + defer triedb.Close() + // Create the tracer, the EVM environment and run it tracer := logger.NewStructLogger(&logger.Config{ Debug: false, diff --git a/ethclient/ethclient_test.go b/ethclient/ethclient_test.go index 4a24c47e7a..3c63cab01d 100644 --- a/ethclient/ethclient_test.go +++ b/ethclient/ethclient_test.go @@ -40,6 +40,7 @@ import ( "github.com/ethereum/go-ethereum/node" "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/rpc" + "github.com/ethereum/go-ethereum/trie" "github.com/holiman/uint256" ) @@ -287,7 +288,7 @@ func generateTestChain() ([]*types.Block, [][]*types.BlobTxSidecar, [][]common.H blobTxHashes = append(blobTxHashes, []common.Hash{}) } } - gblock := genesis.ToBlock(db) + gblock := genesis.MustCommit(db, trie.NewDatabase(db, trie.HashDefaults)) engine := ethash.NewFaker() blocks, _ := core.GenerateChain(genesis.Config, gblock, engine, db, 2, generate, true) // add genesis blob/sidecars/txhash to the begining of the list @@ -724,7 +725,7 @@ func testGetBlobSidecars(t *testing.T, chain []*types.Block, blobSidecars [][]*t wantErr error }{ "first_block_blob_notfound_by_number": { - blkNum: chain[1].Number(), + blkNum: chain[1].Number(), }, "first_block_blob_notfound_by_hash": { blkHash: chain[1].Hash(), diff --git a/ethclient/gethclient/gethclient_test.go b/ethclient/gethclient/gethclient_test.go index 01837c1020..2a4954cfb1 100644 --- a/ethclient/gethclient/gethclient_test.go +++ b/ethclient/gethclient/gethclient_test.go @@ -36,6 +36,7 @@ import ( "github.com/ethereum/go-ethereum/node" "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/rpc" + "github.com/ethereum/go-ethereum/trie" ) var ( @@ -82,7 +83,7 @@ func generateTestChain() (*core.Genesis, []*types.Block) { g.OffsetTime(5) g.SetExtra([]byte("test")) } - gblock := genesis.ToBlock(db) + gblock := genesis.MustCommit(db, trie.NewDatabase(db, trie.HashDefaults)) engine := ethash.NewFaker() blocks, _ := core.GenerateChain(config, gblock, engine, db, 1, generate, true) blocks = append([]*types.Block{gblock}, blocks...) diff --git a/ethdb/database.go b/ethdb/database.go index 1057636762..52e9f30328 100644 --- a/ethdb/database.go +++ b/ethdb/database.go @@ -38,7 +38,7 @@ type KeyValueWriter interface { } // Stater wraps the Stat method of a backing data store. -type Stater interface { +type KeyValueStater interface { // Stat returns a particular internal stat of the database. Stat(property string) (string, error) } @@ -60,16 +60,16 @@ type Compacter interface { type KeyValueStore interface { KeyValueReader KeyValueWriter + KeyValueStater Batcher Iteratee - Stater Compacter Snapshotter io.Closer } // AncientReader contains the methods required to read from immutable ancient data. -type AncientReader interface { +type AncientReaderOp interface { // HasAncient returns an indicator whether the specified data exists in the // ancient store. HasAncient(kind string, number uint64) (bool, error) @@ -79,9 +79,10 @@ type AncientReader interface { // AncientRange retrieves multiple items in sequence, starting from the index 'start'. // It will return - // - at most 'count' items, - // - at least 1 item (even if exceeding the maxBytes), but will otherwise - // return as many items as fit into maxBytes. + // - at most 'count' items, + // - if maxBytes is specified: at least 1 item (even if exceeding the maxByteSize), + // but will otherwise return as many items as fit into maxByteSize. + // - if maxBytes is not specified, 'count' items will be returned if they are present AncientRange(kind string, start, count, maxBytes uint64) ([][]byte, error) // Ancients returns the ancient item numbers in the ancient store. @@ -89,15 +90,19 @@ type AncientReader interface { // AncientSize returns the ancient size of the specified category. AncientSize(kind string) (uint64, error) + + // Tail returns the number of first stored item in the freezer + // This number can also be interpreted as the total deleted item numbers (counting from 0) + Tail() (uint64, error) } -// AncientBatchReader is the interface for 'batched' or 'atomic' reading. -type AncientBatchReader interface { - AncientReader +// AncientReader is the extended ancient reader interface including 'batched' or 'atomic' reading. +type AncientReader interface { + AncientReaderOp // ReadAncients runs the given read operation while ensuring that no writes take place // on the underlying freezer. - ReadAncients(fn func(AncientReader) error) (err error) + ReadAncients(fn func(AncientReaderOp) error) (err error) } // AncientWriter contains the methods required to write to immutable ancient data. @@ -107,8 +112,22 @@ type AncientWriter interface { // The integer return value is the total size of the written data. ModifyAncients(func(AncientWriteOp) error) (int64, error) - // TruncateAncients discards all but the first n ancient data from the ancient store. - TruncateAncients(n uint64) error + /* + Tail ------------> Head + */ + + // TruncateHead discards all, but keep the first n ancient data from the ancient store. + // After the truncation, the latest item can be accessed it item_ n-1 (start from 0) + // Tail 0 -> (n-1)New-headxxxxOld-head + TruncateHead(n uint64) (uint64, error) + + // TruncateTail discards the first n ancient data from the ancient store. The already + // deleted items are ignored. After the truncation, the earliest item can be accessed + // is item_n(start from 0). The deleted items may not be removed from the ancient store + // immediately, but only when the accumulated deleted data reach the threshold then + // will be removed all together. + // Old-tail(0)xxxxxxxNew-tail(n)->Head + TruncateTail(n uint64) (uint64, error) // Sync flushes all in-memory ancient store data to disk. Sync() error @@ -123,11 +142,17 @@ type AncientWriteOp interface { AppendRaw(kind string, number uint64, item []byte) error } +// AncientStater wraps the Stat method of a backing data store. +type AncientStater interface { + // AncientDatadir returns the root directory path of the ancient store. + AncientDatadir() (string, error) +} + // Reader contains the methods required to read data from both key-value as well as // immutable ancient data. type Reader interface { KeyValueReader - AncientBatchReader + AncientReader } // Writer contains the methods required to write data to both key-value as well as @@ -140,11 +165,18 @@ type Writer interface { // AncientStore contains all the methods required to allow handling different // ancient data stores backing immutable chain data store. type AncientStore interface { - AncientBatchReader + AncientReader AncientWriter io.Closer } +// Stater contains the methods required to retrieve states from both key-value as well as +// immutable ancient data. +type Stater interface { + KeyValueStater + AncientStater +} + // Database contains all the methods required by the high level database to not // only access the key-value data store but also the chain freezer. type Database interface { diff --git a/ethdb/dbtest/testsuite.go b/ethdb/dbtest/testsuite.go index a2b7003c27..30cef82bec 100644 --- a/ethdb/dbtest/testsuite.go +++ b/ethdb/dbtest/testsuite.go @@ -272,9 +272,13 @@ func TestDatabaseSuite(t *testing.T, New func() ethdb.KeyValueStore) { b.Put([]byte("5"), nil) b.Delete([]byte("1")) b.Put([]byte("6"), nil) - b.Delete([]byte("3")) + + b.Delete([]byte("3")) // delete then put b.Put([]byte("3"), nil) + b.Put([]byte("7"), nil) // put then delete + b.Delete([]byte("7")) + if err := b.Write(); err != nil { t.Fatal(err) } diff --git a/graphql/graphql_test.go b/graphql/graphql_test.go index 895e797535..767a37ae6f 100644 --- a/graphql/graphql_test.go +++ b/graphql/graphql_test.go @@ -245,13 +245,11 @@ func createGQLService(t *testing.T, stack *node.Node) { Ethash: ethash.Config{ PowMode: ethash.ModeFake, }, - NetworkId: 1337, - TrieCleanCache: 5, - TrieCleanCacheJournal: "triecache", - TrieCleanCacheRejournal: 60 * time.Minute, - TrieDirtyCache: 5, - TrieTimeout: 60 * time.Minute, - SnapshotCache: 5, + NetworkId: 1337, + TrieCleanCache: 5, + TrieDirtyCache: 5, + TrieTimeout: 60 * time.Minute, + SnapshotCache: 5, } ethBackend, err := eth.New(stack, ethConf) if err != nil { diff --git a/internal/ethapi/api_test.go b/internal/ethapi/api_test.go index e4226fb304..dfbda7b515 100644 --- a/internal/ethapi/api_test.go +++ b/internal/ethapi/api_test.go @@ -428,7 +428,7 @@ func newTestBackend(t *testing.T, n int, gspec *core.Genesis, engine consensus.E // Generate blocks for testing db, blocks, _ := core.GenerateChainWithGenesis(gspec, engine, n, generator) txlookupLimit := uint64(0) - chain, err := core.NewBlockChain(db, cacheConfig, gspec.Config, engine, vm.Config{}, nil, &txlookupLimit) + chain, err := core.NewBlockChain(db, cacheConfig, gspec, nil, engine, vm.Config{}, nil, &txlookupLimit) if err != nil { t.Fatalf("failed to create tester chain: %v", err) } diff --git a/internal/testrand/rand.go b/internal/testrand/rand.go new file mode 100644 index 0000000000..690993de05 --- /dev/null +++ b/internal/testrand/rand.go @@ -0,0 +1,53 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package testrand + +import ( + crand "crypto/rand" + "encoding/binary" + mrand "math/rand" + + "github.com/ethereum/go-ethereum/common" +) + +// prng is a pseudo random number generator seeded by strong randomness. +// The randomness is printed on startup in order to make failures reproducible. +var prng = initRand() + +func initRand() *mrand.Rand { + var seed [8]byte + crand.Read(seed[:]) + rnd := mrand.New(mrand.NewSource(int64(binary.LittleEndian.Uint64(seed[:])))) + return rnd +} + +// Bytes generates a random byte slice with specified length. +func Bytes(n int) []byte { + r := make([]byte, n) + prng.Read(r) + return r +} + +// Hash generates a random hash. +func Hash() common.Hash { + return common.BytesToHash(Bytes(common.HashLength)) +} + +// Address generates a random address. +func Address() common.Address { + return common.BytesToAddress(Bytes(common.AddressLength)) +} diff --git a/les/client.go b/les/client.go index 46daa0eb03..3ea07911f5 100644 --- a/les/client.go +++ b/les/client.go @@ -47,6 +47,7 @@ import ( "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/rpc" + "github.com/ethereum/go-ethereum/trie" ) type LightEthereum struct { @@ -88,7 +89,7 @@ func New(stack *node.Node, config *ethconfig.Config) (*LightEthereum, error) { if err != nil { return nil, err } - chainConfig, genesisHash, genesisErr := core.SetupGenesisBlockWithOverride(chainDb, config.Genesis, config.OverrideArrowGlacier, false) + chainConfig, genesisHash, genesisErr := core.SetupGenesisBlockWithOverride(chainDb, trie.NewDatabase(chainDb, nil), config.Genesis, config.OverrideArrowGlacier, false) if _, isCompat := genesisErr.(*params.ConfigCompatError); genesisErr != nil && !isCompat { return nil, genesisErr } diff --git a/les/downloader/downloader.go b/les/downloader/downloader.go index e7dfc4158e..ab637529ef 100644 --- a/les/downloader/downloader.go +++ b/les/downloader/downloader.go @@ -229,7 +229,7 @@ func New(checkpoint uint64, stateDb ethdb.Database, stateBloom *trie.SyncBloom, headerProcCh: make(chan []*types.Header, 1), quitCh: make(chan struct{}), stateCh: make(chan dataPack), - SnapSyncer: snap.NewSyncer(stateDb), + SnapSyncer: snap.NewSyncer(stateDb, ""), stateSyncStart: make(chan *stateSync), syncStatsState: stateSyncStats{ processed: rawdb.ReadFastTrieProgress(stateDb), @@ -705,9 +705,11 @@ func (d *Downloader) fetchHead(p *peerConnection) (head *types.Header, pivot *ty // calculateRequestSpan calculates what headers to request from a peer when trying to determine the // common ancestor. // It returns parameters to be used for peer.RequestHeadersByNumber: -// from - starting block number -// count - number of headers to request -// skip - number of headers to skip +// +// from - starting block number +// count - number of headers to request +// skip - number of headers to skip +// // and also returns 'max', the last block which is expected to be returned by the remote peers, // given the (from,count,skip) func calculateRequestSpan(remoteHeight, localHeight uint64) (int64, int, int, uint64) { @@ -1322,22 +1324,22 @@ func (d *Downloader) fetchReceipts(from uint64) error { // various callbacks to handle the slight differences between processing them. // // The instrumentation parameters: -// - errCancel: error type to return if the fetch operation is cancelled (mostly makes logging nicer) -// - deliveryCh: channel from which to retrieve downloaded data packets (merged from all concurrent peers) -// - deliver: processing callback to deliver data packets into type specific download queues (usually within `queue`) -// - wakeCh: notification channel for waking the fetcher when new tasks are available (or sync completed) -// - expire: task callback method to abort requests that took too long and return the faulty peers (traffic shaping) -// - pending: task callback for the number of requests still needing download (detect completion/non-completability) -// - inFlight: task callback for the number of in-progress requests (wait for all active downloads to finish) -// - throttle: task callback to check if the processing queue is full and activate throttling (bound memory use) -// - reserve: task callback to reserve new download tasks to a particular peer (also signals partial completions) -// - fetchHook: tester callback to notify of new tasks being initiated (allows testing the scheduling logic) -// - fetch: network callback to actually send a particular download request to a physical remote peer -// - cancel: task callback to abort an in-flight download request and allow rescheduling it (in case of lost peer) -// - capacity: network callback to retrieve the estimated type-specific bandwidth capacity of a peer (traffic shaping) -// - idle: network callback to retrieve the currently (type specific) idle peers that can be assigned tasks -// - setIdle: network callback to set a peer back to idle and update its estimated capacity (traffic shaping) -// - kind: textual label of the type being downloaded to display in log messages +// - errCancel: error type to return if the fetch operation is cancelled (mostly makes logging nicer) +// - deliveryCh: channel from which to retrieve downloaded data packets (merged from all concurrent peers) +// - deliver: processing callback to deliver data packets into type specific download queues (usually within `queue`) +// - wakeCh: notification channel for waking the fetcher when new tasks are available (or sync completed) +// - expire: task callback method to abort requests that took too long and return the faulty peers (traffic shaping) +// - pending: task callback for the number of requests still needing download (detect completion/non-completability) +// - inFlight: task callback for the number of in-progress requests (wait for all active downloads to finish) +// - throttle: task callback to check if the processing queue is full and activate throttling (bound memory use) +// - reserve: task callback to reserve new download tasks to a particular peer (also signals partial completions) +// - fetchHook: tester callback to notify of new tasks being initiated (allows testing the scheduling logic) +// - fetch: network callback to actually send a particular download request to a physical remote peer +// - cancel: task callback to abort an in-flight download request and allow rescheduling it (in case of lost peer) +// - capacity: network callback to retrieve the estimated type-specific bandwidth capacity of a peer (traffic shaping) +// - idle: network callback to retrieve the currently (type specific) idle peers that can be assigned tasks +// - setIdle: network callback to set a peer back to idle and update its estimated capacity (traffic shaping) +// - kind: textual label of the type being downloaded to display in log messages func (d *Downloader) fetchParts(deliveryCh chan dataPack, deliver func(dataPack) (int, error), wakeCh chan bool, expire func() map[string]int, pending func() int, inFlight func() bool, reserve func(*peerConnection, int) (*fetchRequest, bool, bool), fetchHook func([]*types.Header), fetch func(*peerConnection, *fetchRequest) error, cancel func(*fetchRequest), capacity func(*peerConnection) int, diff --git a/les/downloader/downloader_test.go b/les/downloader/downloader_test.go index 17cd3630c9..fe686ab407 100644 --- a/les/downloader/downloader_test.go +++ b/les/downloader/downloader_test.go @@ -229,7 +229,7 @@ func (dl *downloadTester) CurrentFastBlock() *types.Block { func (dl *downloadTester) FastSyncCommitHead(hash common.Hash) error { // For now only check that the state trie is correct if block := dl.GetBlockByHash(hash); block != nil { - _, err := trie.NewSecure(block.Root(), trie.NewDatabase(dl.stateDb)) + _, err := trie.NewSecure(trie.StateTrieID(block.Root()), trie.NewDatabase(dl.stateDb, nil)) return err } return fmt.Errorf("non existent block: %x", hash[:4]) diff --git a/les/downloader/queue_test.go b/les/downloader/queue_test.go index f78a914b83..75a3c80116 100644 --- a/les/downloader/queue_test.go +++ b/les/downloader/queue_test.go @@ -27,23 +27,17 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/consensus/ethash" "github.com/ethereum/go-ethereum/core" - "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/params" ) -var ( - testdb = rawdb.NewMemoryDatabase() - genesis = core.GenesisBlockForTesting(testdb, testAddress, big.NewInt(1000000000000000)) -) - // makeChain creates a chain of n blocks starting at and including parent. // the returned hash chain is ordered head->parent. In addition, every 3rd block // contains a transaction and every 5th an uncle to allow testing correct block // reassembly. func makeChain(n int, seed byte, parent *types.Block, empty bool) ([]*types.Block, []types.Receipts) { - blocks, receipts := core.GenerateChain(params.TestChainConfig, parent, ethash.NewFaker(), testdb, n, func(i int, block *core.BlockGen) { + blocks, receipts := core.GenerateChain(params.TestChainConfig, parent, ethash.NewFaker(), testDB, n, func(i int, block *core.BlockGen) { block.SetCoinbase(common.Address{seed}) // Add one tx to every secondblock if !empty && i%2 == 0 { @@ -69,10 +63,10 @@ var emptyChain *chainData func init() { // Create a chain of blocks to import targetBlocks := 128 - blocks, _ := makeChain(targetBlocks, 0, genesis, false) + blocks, _ := makeChain(targetBlocks, 0, testGenesis, false) chain = &chainData{blocks, 0} - blocks, _ = makeChain(targetBlocks, 0, genesis, true) + blocks, _ = makeChain(targetBlocks, 0, testGenesis, true) emptyChain = &chainData{blocks, 0} } @@ -261,7 +255,7 @@ func TestEmptyBlocks(t *testing.T) { // some more advanced scenarios func XTestDelivery(t *testing.T) { // the outside network, holding blocks - blo, rec := makeChain(128, 0, genesis, false) + blo, rec := makeChain(128, 0, testGenesis, false) world := newNetwork() world.receipts = rec world.chain = blo diff --git a/les/downloader/statesync.go b/les/downloader/statesync.go index 6c53e5577a..c50ed367b0 100644 --- a/les/downloader/statesync.go +++ b/les/downloader/statesync.go @@ -35,7 +35,7 @@ import ( // a single data retrieval network packet. type stateReq struct { nItems uint16 // Number of items requested for download (max is 384, so uint16 is sufficient) - trieTasks map[common.Hash]*trieTask // Trie node download tasks to track previous attempts + trieTasks map[string]*trieTask // Trie node download tasks to track previous attempts codeTasks map[common.Hash]*codeTask // Byte code download tasks to track previous attempts timeout time.Duration // Maximum round trip time for this to complete timer *time.Timer // Timer to fire when the RTT timeout expires @@ -264,7 +264,7 @@ type stateSync struct { sched *trie.Sync // State trie sync scheduler defining the tasks keccak crypto.KeccakState // Keccak256 hasher to verify deliveries with - trieTasks map[common.Hash]*trieTask // Set of trie node tasks currently queued for retrieval + trieTasks map[string]*trieTask // Set of trie node tasks currently queued for retrieval codeTasks map[common.Hash]*codeTask // Set of byte code tasks currently queued for retrieval numUncommitted int @@ -282,6 +282,7 @@ type stateSync struct { // trieTask represents a single trie node download task, containing a set of // peers already attempted retrieval from to detect stalled syncs and abort. type trieTask struct { + hash common.Hash path [][]byte attempts map[string]struct{} } @@ -295,12 +296,15 @@ type codeTask struct { // newStateSync creates a new state trie download scheduler. This method does not // yet start the sync. The user needs to call run to initiate. func newStateSync(d *Downloader, root common.Hash) *stateSync { + // Hack the node scheme here. It's a dead code is not used + // by light client at all. Just aim for passing tests. + scheme := trie.NewDatabase(rawdb.NewMemoryDatabase(), nil).Scheme() return &stateSync{ d: d, root: root, - sched: state.NewStateSync(root, d.stateDB, d.stateBloom, nil), + sched: state.NewStateSync(root, d.stateDB, d.stateBloom, nil, scheme), keccak: sha3.NewLegacyKeccak256().(crypto.KeccakState), - trieTasks: make(map[common.Hash]*trieTask), + trieTasks: make(map[string]*trieTask), codeTasks: make(map[common.Hash]*codeTask), deliver: make(chan *stateReq), cancel: make(chan struct{}), @@ -456,10 +460,11 @@ func (s *stateSync) assignTasks() { func (s *stateSync) fillTasks(n int, req *stateReq) (nodes []common.Hash, paths []trie.SyncPath, codes []common.Hash) { // Refill available tasks from the scheduler. if fill := n - (len(s.trieTasks) + len(s.codeTasks)); fill > 0 { - nodes, paths, codes := s.sched.Missing(fill) - for i, hash := range nodes { - s.trieTasks[hash] = &trieTask{ - path: paths[i], + paths, hashes, codes := s.sched.Missing(fill) + for i, path := range paths { + s.trieTasks[path] = &trieTask{ + hash: hashes[i], + path: trie.NewSyncPath([]byte(path)), attempts: make(map[string]struct{}), } } @@ -475,7 +480,7 @@ func (s *stateSync) fillTasks(n int, req *stateReq) (nodes []common.Hash, paths paths = make([]trie.SyncPath, 0, n) codes = make([]common.Hash, 0, n) - req.trieTasks = make(map[common.Hash]*trieTask, n) + req.trieTasks = make(map[string]*trieTask, n) req.codeTasks = make(map[common.Hash]*codeTask, n) for hash, t := range s.codeTasks { @@ -493,7 +498,7 @@ func (s *stateSync) fillTasks(n int, req *stateReq) (nodes []common.Hash, paths req.codeTasks[hash] = t delete(s.codeTasks, hash) } - for hash, t := range s.trieTasks { + for path, t := range s.trieTasks { // Stop when we've gathered enough requests if len(nodes)+len(codes) == n { break @@ -505,11 +510,11 @@ func (s *stateSync) fillTasks(n int, req *stateReq) (nodes []common.Hash, paths // Assign the request to this peer t.attempts[req.peer.id] = struct{}{} - nodes = append(nodes, hash) + nodes = append(nodes, t.hash) paths = append(paths, t.path) - req.trieTasks[hash] = t - delete(s.trieTasks, hash) + req.trieTasks[path] = t + delete(s.trieTasks, path) } req.nItems = uint16(len(nodes) + len(codes)) return nodes, paths, codes @@ -531,7 +536,7 @@ func (s *stateSync) process(req *stateReq) (int, error) { // Iterate over all the delivered data and inject one-by-one into the trie for _, blob := range req.response { - hash, err := s.processNodeData(blob) + hash, err := s.processNodeData(req.trieTasks, req.codeTasks, blob) switch err { case nil: s.numUncommitted++ @@ -544,13 +549,10 @@ func (s *stateSync) process(req *stateReq) (int, error) { default: return successful, fmt.Errorf("invalid state node %s: %v", hash.TerminalString(), err) } - // Delete from both queues (one delivery is enough for the syncer) - delete(req.trieTasks, hash) - delete(req.codeTasks, hash) } // Put unfulfilled tasks back into the retry queue npeers := s.d.peers.Len() - for hash, task := range req.trieTasks { + for path, task := range req.trieTasks { // If the node did deliver something, missing items may be due to a protocol // limit or a previous timeout + delayed delivery. Both cases should permit // the node to retry the missing items (to avoid single-peer stalls). @@ -560,10 +562,10 @@ func (s *stateSync) process(req *stateReq) (int, error) { // If we've requested the node too many times already, it may be a malicious // sync where nobody has the right data. Abort. if len(task.attempts) >= npeers { - return successful, fmt.Errorf("trie node %s failed with all peers (%d tries, %d peers)", hash.TerminalString(), len(task.attempts), npeers) + return successful, fmt.Errorf("trie node %s failed with all peers (%d tries, %d peers)", task.hash.TerminalString(), len(task.attempts), npeers) } // Missing item, place into the retry queue. - s.trieTasks[hash] = task + s.trieTasks[path] = task } for hash, task := range req.codeTasks { // If the node did deliver something, missing items may be due to a protocol @@ -586,13 +588,35 @@ func (s *stateSync) process(req *stateReq) (int, error) { // processNodeData tries to inject a trie node data blob delivered from a remote // peer into the state trie, returning whether anything useful was written or any // error occurred. -func (s *stateSync) processNodeData(blob []byte) (common.Hash, error) { - res := trie.SyncResult{Data: blob} +// +// If multiple requests correspond to the same hash, this method will inject the +// blob as a result for the first one only, leaving the remaining duplicates to +// be fetched again. +func (s *stateSync) processNodeData(nodeTasks map[string]*trieTask, codeTasks map[common.Hash]*codeTask, blob []byte) (common.Hash, error) { + var hash common.Hash s.keccak.Reset() s.keccak.Write(blob) - s.keccak.Read(res.Hash[:]) - err := s.sched.Process(res) - return res.Hash, err + s.keccak.Read(hash[:]) + + if _, present := codeTasks[hash]; present { + err := s.sched.ProcessCode(trie.CodeSyncResult{ + Hash: hash, + Data: blob, + }) + delete(codeTasks, hash) + return hash, err + } + for path, task := range nodeTasks { + if task.hash == hash { + err := s.sched.ProcessNode(trie.NodeSyncResult{ + Path: path, + Data: blob, + }) + delete(nodeTasks, path) + return hash, err + } + } + return common.Hash{}, trie.ErrNotRequested } // updateStats bumps the various state sync progress counters and displays a log diff --git a/les/downloader/testchain_test.go b/les/downloader/testchain_test.go index 485bbdc54a..41cb789817 100644 --- a/les/downloader/testchain_test.go +++ b/les/downloader/testchain_test.go @@ -28,6 +28,7 @@ import ( "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/params" + "github.com/ethereum/go-ethereum/trie" ) // Test chain parameters. @@ -35,7 +36,12 @@ var ( testKey, _ = crypto.HexToECDSA("b71c71a67e1177ad4e901695e1b4b9ee17ae16c6668d313eac2f96dbcda3f291") testAddress = crypto.PubkeyToAddress(testKey.PublicKey) testDB = rawdb.NewMemoryDatabase() - testGenesis = core.GenesisBlockForTesting(testDB, testAddress, big.NewInt(1000000000000000)) + + gspec = core.Genesis{ + Alloc: core.GenesisAlloc{testAddress: {Balance: big.NewInt(1000000000000000)}}, + BaseFee: big.NewInt(params.InitialBaseFee), + } + testGenesis = gspec.MustCommit(testDB, trie.NewDatabase(testDB, nil)) ) // The common prefix of all test chains: diff --git a/les/fetcher/block_fetcher_test.go b/les/fetcher/block_fetcher_test.go index bb3bfe42f8..6a64d3933b 100644 --- a/les/fetcher/block_fetcher_test.go +++ b/les/fetcher/block_fetcher_test.go @@ -35,10 +35,15 @@ import ( ) var ( - testdb = rawdb.NewMemoryDatabase() - testKey, _ = crypto.HexToECDSA("b71c71a67e1177ad4e901695e1b4b9ee17ae16c6668d313eac2f96dbcda3f291") - testAddress = crypto.PubkeyToAddress(testKey.PublicKey) - genesis = core.GenesisBlockForTesting(testdb, testAddress, big.NewInt(1000000000000000)) + testdb = rawdb.NewMemoryDatabase() + testKey, _ = crypto.HexToECDSA("b71c71a67e1177ad4e901695e1b4b9ee17ae16c6668d313eac2f96dbcda3f291") + testAddress = crypto.PubkeyToAddress(testKey.PublicKey) + + gspec = core.Genesis{ + Alloc: core.GenesisAlloc{testAddress: {Balance: big.NewInt(1000000000000000)}}, + BaseFee: big.NewInt(params.InitialBaseFee), + } + genesis = gspec.MustCommit(testdb, trie.NewDatabase(testdb, nil)) unknownBlock = types.NewBlock(&types.Header{GasLimit: params.GenesisGasLimit, BaseFee: big.NewInt(params.InitialBaseFee)}, nil, nil, nil, trie.NewStackTrie(nil)) ) diff --git a/les/handler_test.go b/les/handler_test.go index 1e26c8d795..61500d99f9 100644 --- a/les/handler_test.go +++ b/les/handler_test.go @@ -406,7 +406,7 @@ func testGetProofs(t *testing.T, protocol int) { accounts := []common.Address{bankAddr, userAddr1, userAddr2, signerAddr, {}} for i := uint64(0); i <= bc.CurrentBlock().NumberU64(); i++ { header := bc.GetHeaderByNumber(i) - trie, _ := trie.New(header.Root, trie.NewDatabase(server.db)) + trie, _ := trie.New(trie.StateTrieID(header.Root), server.backend.Blockchain().TrieDB()) for _, acc := range accounts { req := ProofReq{ @@ -457,7 +457,7 @@ func testGetStaleProof(t *testing.T, protocol int) { var expected []rlp.RawValue if wantOK { proofsV2 := light.NewNodeSet() - t, _ := trie.New(header.Root, trie.NewDatabase(server.db)) + t, _ := trie.New(trie.StateTrieID(header.Root), server.backend.Blockchain().TrieDB()) t.Prove(account, 0, proofsV2) expected = proofsV2.NodeList() } @@ -513,7 +513,7 @@ func testGetCHTProofs(t *testing.T, protocol int) { AuxData: [][]byte{rlp}, } root := light.GetChtRoot(server.db, 0, bc.GetHeaderByNumber(config.ChtSize-1).Hash()) - trie, _ := trie.New(root, trie.NewDatabase(rawdb.NewTable(server.db, light.ChtTablePrefix))) + trie, _ := trie.New(trie.StateTrieID(root), trie.NewDatabase(rawdb.NewTable(server.db, light.ChtTablePrefix), nil)) trie.Prove(key, 0, &proofsV2.Proofs) // Assemble the requests for the different protocols requestsV2 := []HelperTrieReq{{ @@ -578,7 +578,7 @@ func testGetBloombitsProofs(t *testing.T, protocol int) { var proofs HelperTrieResps root := light.GetBloomTrieRoot(server.db, 0, bc.GetHeaderByNumber(config.BloomTrieSize-1).Hash()) - trie, _ := trie.New(root, trie.NewDatabase(rawdb.NewTable(server.db, light.BloomTrieTablePrefix))) + trie, _ := trie.New(trie.StateTrieID(root), trie.NewDatabase(rawdb.NewTable(server.db, light.BloomTrieTablePrefix), nil)) trie.Prove(key, 0, &proofs.Proofs) // Send the proof request and verify the response diff --git a/les/peer_test.go b/les/peer_test.go index d6551ce6b6..b8a1482a04 100644 --- a/les/peer_test.go +++ b/les/peer_test.go @@ -28,7 +28,6 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core" "github.com/ethereum/go-ethereum/core/forkid" - "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/p2p" "github.com/ethereum/go-ethereum/p2p/enode" @@ -100,7 +99,7 @@ type fakeChain struct{} func (f *fakeChain) Config() *params.ChainConfig { return params.MainnetChainConfig } func (f *fakeChain) Genesis() *types.Block { - return core.DefaultGenesisBlock().ToBlock(rawdb.NewMemoryDatabase()) + return core.DefaultGenesisBlock().ToBlock() } func (f *fakeChain) CurrentHeader() *types.Header { return &types.Header{Number: big.NewInt(10000000)} } diff --git a/les/server_handler.go b/les/server_handler.go index 687409efaa..dc2b3d7527 100644 --- a/les/server_handler.go +++ b/les/server_handler.go @@ -360,7 +360,7 @@ func (h *serverHandler) AddTxsSync() bool { // getAccount retrieves an account from the state based on root. func getAccount(triedb *trie.Database, root, hash common.Hash) (types.StateAccount, error) { - trie, err := trie.New(root, triedb) + trie, err := trie.New(trie.StateTrieID(root), triedb) if err != nil { return types.StateAccount{}, err } @@ -392,7 +392,7 @@ func (h *serverHandler) GetHelperTrie(typ uint, index uint64) *trie.Trie { if root == (common.Hash{}) { return nil } - trie, _ := trie.New(root, trie.NewDatabase(rawdb.NewTable(h.chainDb, prefix))) + trie, _ := trie.New(trie.StateTrieID(root), trie.NewDatabase(rawdb.NewTable(h.chainDb, prefix), nil)) return trie } diff --git a/les/server_requests.go b/les/server_requests.go index b0b675b659..5b35115791 100644 --- a/les/server_requests.go +++ b/les/server_requests.go @@ -429,7 +429,7 @@ func handleGetProofs(msg Decoder) (serveRequestFn, uint64, uint64, error) { p.bumpInvalid() continue } - trie, err = statedb.OpenStorageTrie(common.BytesToHash(request.AccKey), account.Root) + trie, err = statedb.OpenStorageTrie(root, common.BytesToHash(request.AccKey), account.Root) if trie == nil || err != nil { p.Log().Warn("Failed to open storage trie for proof", "block", header.Number, "hash", header.Hash(), "account", common.BytesToHash(request.AccKey), "root", account.Root, "err", err) continue diff --git a/les/test_helper.go b/les/test_helper.go index bd967b112d..01dcf89132 100644 --- a/les/test_helper.go +++ b/les/test_helper.go @@ -52,6 +52,7 @@ import ( "github.com/ethereum/go-ethereum/p2p" "github.com/ethereum/go-ethereum/p2p/enode" "github.com/ethereum/go-ethereum/params" + "github.com/ethereum/go-ethereum/trie" ) var ( @@ -203,7 +204,7 @@ func newTestClientHandler(backend *backends.SimulatedBackend, odr *LesOdr, index } oracle *checkpointoracle.CheckpointOracle ) - genesis := gspec.MustCommit(db) + genesis := gspec.MustCommit(db, trie.NewDatabase(db, trie.HashDefaults)) chain, _ := light.NewLightChain(odr, gspec.Config, engine, nil) if indexers != nil { checkpointConfig := ¶ms.CheckpointOracleConfig{ @@ -263,7 +264,7 @@ func newTestServerHandler(blocks int, indexers []*core.ChainIndexer, db ethdb.Da } oracle *checkpointoracle.CheckpointOracle ) - genesis := gspec.MustCommit(db) + genesis := gspec.MustCommit(db, trie.NewDatabase(db, nil)) // create a simulation backend and pre-commit several customized block to the database. simulation := backends.NewSimulatedBackendWithDatabase(db, gspec.Alloc, 100000000) diff --git a/light/lightchain_test.go b/light/lightchain_test.go index e9d43d6da0..d9fb159119 100644 --- a/light/lightchain_test.go +++ b/light/lightchain_test.go @@ -29,6 +29,7 @@ import ( "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/params" + "github.com/ethereum/go-ethereum/trie" ) // So we can deterministically seed different blockchains @@ -55,7 +56,7 @@ func makeHeaderChain(parent *types.Header, n int, db ethdb.Database, seed int) [ func newCanonical(n int) (ethdb.Database, *LightChain, error) { db := rawdb.NewMemoryDatabase() gspec := core.Genesis{Config: params.TestChainConfig} - genesis := gspec.MustCommit(db) + genesis := gspec.MustCommit(db, trie.NewDatabase(db, nil)) blockchain, _ := NewLightChain(&dummyOdr{db: db, indexerConfig: TestClientIndexerConfig}, gspec.Config, ethash.NewFaker(), nil) // Create and inject the requested chain @@ -75,7 +76,7 @@ func newTestLightChain() *LightChain { Difficulty: big.NewInt(1), Config: params.TestChainConfig, } - gspec.MustCommit(db) + gspec.MustCommit(db, trie.NewDatabase(db, nil)) lc, err := NewLightChain(&dummyOdr{db: db}, gspec.Config, ethash.NewFullFaker(), nil) if err != nil { panic(err) diff --git a/light/odr.go b/light/odr.go index 493f6fd7fc..f998dbe584 100644 --- a/light/odr.go +++ b/light/odr.go @@ -54,9 +54,11 @@ type OdrRequest interface { // TrieID identifies a state or account storage trie type TrieID struct { - BlockHash, Root common.Hash - BlockNumber uint64 - AccKey []byte + BlockHash common.Hash + BlockNumber uint64 + StateRoot common.Hash + Root common.Hash + AccKey []byte } // StateTrieID returns a TrieID for a state trie belonging to a certain block @@ -65,8 +67,9 @@ func StateTrieID(header *types.Header) *TrieID { return &TrieID{ BlockHash: header.Hash(), BlockNumber: header.Number.Uint64(), - AccKey: nil, + StateRoot: header.Root, Root: header.Root, + AccKey: nil, } } @@ -77,6 +80,7 @@ func StorageTrieID(state *TrieID, addrHash, root common.Hash) *TrieID { return &TrieID{ BlockHash: state.BlockHash, BlockNumber: state.BlockNumber, + StateRoot: state.StateRoot, AccKey: addrHash[:], Root: root, } diff --git a/light/odr_test.go b/light/odr_test.go index 0be7e6e4ef..6929696cc3 100644 --- a/light/odr_test.go +++ b/light/odr_test.go @@ -82,7 +82,7 @@ func (odr *testOdr) Retrieve(ctx context.Context, req OdrRequest) error { req.Receipts = rawdb.ReadRawReceipts(odr.sdb, req.Hash, *number) } case *TrieRequest: - t, _ := trie.New(req.Id.Root, trie.NewDatabase(odr.sdb)) + t, _ := trie.New(trie.StorageTrieID(req.Id.StateRoot, common.BytesToHash(req.Id.AccKey), req.Id.Root), trie.NewDatabase(odr.sdb, nil)) nodes := NewNodeSet() t.Prove(req.Key, 0, nodes) req.Proof = nodes @@ -254,14 +254,15 @@ func testChainOdr(t *testing.T, protocol int, fn odrTestFn) { sdb = rawdb.NewMemoryDatabase() ldb = rawdb.NewMemoryDatabase() gspec = core.Genesis{ + Config: params.TestChainConfig, Alloc: core.GenesisAlloc{testBankAddress: {Balance: testBankFunds}}, BaseFee: big.NewInt(params.InitialBaseFee), } - genesis = gspec.MustCommit(sdb) + genesis = gspec.MustCommit(sdb, trie.NewDatabase(sdb, nil)) ) - gspec.MustCommit(ldb) + gspec.MustCommit(ldb, trie.NewDatabase(ldb, nil)) // Assemble the test environment - blockchain, _ := core.NewBlockChain(sdb, nil, params.TestChainConfig, ethash.NewFullFaker(), vm.Config{}, nil, nil) + blockchain, _ := core.NewBlockChain(sdb, nil, &gspec, nil, ethash.NewFullFaker(), vm.Config{}, nil, nil) gchain, _ := core.GenerateChain(params.TestChainConfig, genesis, ethash.NewFaker(), sdb, 4, testChainGen, true) if _, err := blockchain.InsertChain(gchain, nil); err != nil { t.Fatal(err) diff --git a/light/postprocess.go b/light/postprocess.go index ce38d091e8..4dcc358129 100644 --- a/light/postprocess.go +++ b/light/postprocess.go @@ -36,6 +36,7 @@ import ( "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/trienode" ) // IndexerConfig includes a set of configs for chain indexers. @@ -138,6 +139,7 @@ type ChtIndexerBackend struct { section, sectionSize uint64 lastHash common.Hash trie *trie.Trie + originRoot common.Hash } // NewChtIndexer creates a Cht chain indexer @@ -147,7 +149,7 @@ func NewChtIndexer(db ethdb.Database, odr OdrBackend, size, confirms uint64, dis diskdb: db, odr: odr, trieTable: trieTable, - triedb: trie.NewDatabaseWithConfig(trieTable, &trie.Config{Cache: 1}), // Use a tiny cache only to keep memory down + triedb: trie.NewDatabase(trieTable, nil), // Use a tiny cache only to keep memory down trieset: mapset.NewSet(), sectionSize: size, disablePruning: disablePruning, @@ -187,15 +189,16 @@ func (c *ChtIndexerBackend) Reset(ctx context.Context, section uint64, lastSecti root = GetChtRoot(c.diskdb, section-1, lastSectionHead) } var err error - c.trie, err = trie.New(root, c.triedb) + c.trie, err = trie.New(trie.StateTrieID(root), c.triedb) if err != nil && c.odr != nil { err = c.fetchMissingNodes(ctx, section, root) if err == nil { - c.trie, err = trie.New(root, c.triedb) + c.trie, err = trie.New(trie.StateTrieID(root), c.triedb) } } c.section = section + c.originRoot = root return err } @@ -217,7 +220,18 @@ func (c *ChtIndexerBackend) Process(ctx context.Context, header *types.Header) e // Commit implements core.ChainIndexerBackend func (c *ChtIndexerBackend) Commit() error { - root, _, err := c.trie.Commit(nil) + root, nodes, err := c.trie.Commit(false) + if err != nil { + return err + } + // Commite trie changes into trie database in case it's not nil. + if nodes != nil { + if err := c.triedb.Update(root, c.originRoot, 0, trienode.NewWithNodeSet(nodes), nil); err != nil { + return err + } + } + // Re-create trie with nelwy generated root and updated database. + c.trie, err = trie.New(trie.StateTrieID(root), c.triedb) if err != nil { return err } @@ -225,7 +239,7 @@ func (c *ChtIndexerBackend) Commit() error { if !c.disablePruning { // Flush the triedb and track the latest trie nodes. c.trieset.Clear() - c.triedb.Commit(root, false, func(hash common.Hash) { c.trieset.Add(hash) }) + c.triedb.Commit(root, false) it := c.trieTable.NewIterator(nil, nil) defer it.Release() @@ -246,16 +260,15 @@ func (c *ChtIndexerBackend) Commit() error { } log.Debug("Prune historical CHT trie nodes", "deleted", deleted, "remaining", remaining, "elapsed", common.PrettyDuration(time.Since(t))) } else { - c.triedb.Commit(root, false, nil) + c.triedb.Commit(root, false) } log.Info("Storing CHT", "section", c.section, "head", fmt.Sprintf("%064x", c.lastHash), "root", fmt.Sprintf("%064x", root)) StoreChtRoot(c.diskdb, c.section, c.lastHash, root) return nil } -// PruneSections implements core.ChainIndexerBackend which deletes all -// chain data(except hash<->number mappings) older than the specified -// threshold. +// Prune implements core.ChainIndexerBackend which deletes all chain data +// (except hash<->number mappings) older than the specified threshold. func (c *ChtIndexerBackend) Prune(threshold uint64) error { // Short circuit if the light pruning is disabled. if c.disablePruning { @@ -331,6 +344,7 @@ type BloomTrieIndexerBackend struct { bloomTrieRatio uint64 trie *trie.Trie sectionHeads []common.Hash + originRoot common.Hash } // NewBloomTrieIndexer creates a BloomTrie chain indexer @@ -340,7 +354,7 @@ func NewBloomTrieIndexer(db ethdb.Database, odr OdrBackend, parentSize, size uin diskdb: db, odr: odr, trieTable: trieTable, - triedb: trie.NewDatabaseWithConfig(trieTable, &trie.Config{Cache: 1}), // Use a tiny cache only to keep memory down + triedb: trie.NewDatabase(trieTable, nil), // Use a tiny cache only to keep memory down trieset: mapset.NewSet(), parentSize: parentSize, size: size, @@ -404,11 +418,11 @@ func (b *BloomTrieIndexerBackend) Reset(ctx context.Context, section uint64, las root = GetBloomTrieRoot(b.diskdb, section-1, lastSectionHead) } var err error - b.trie, err = trie.New(root, b.triedb) + b.trie, err = trie.New(trie.StateTrieID(root), b.triedb) if err != nil && b.odr != nil { err = b.fetchMissingNodes(ctx, section, root) if err == nil { - b.trie, err = trie.New(root, b.triedb) + b.trie, err = trie.New(trie.StateTrieID(root), b.triedb) } } b.section = section @@ -454,7 +468,19 @@ func (b *BloomTrieIndexerBackend) Commit() error { b.trie.Delete(encKey[:]) } } - root, _, err := b.trie.Commit(nil) + root, nodes, err := b.trie.Commit(false) + if err != nil { + return err + } + + if nodes != nil { + if err := b.triedb.Update(root, b.originRoot, 0, trienode.NewWithNodeSet(nodes), nil); err != nil { + return err + } + } + + // Re-create trie with nelwy generated root and updated database. + b.trie, err = trie.New(trie.StateTrieID(root), b.triedb) if err != nil { return err } @@ -462,7 +488,7 @@ func (b *BloomTrieIndexerBackend) Commit() error { if !b.disablePruning { // Flush the triedb and track the latest trie nodes. b.trieset.Clear() - b.triedb.Commit(root, false, func(hash common.Hash) { b.trieset.Add(hash) }) + b.triedb.Commit(root, false) it := b.trieTable.NewIterator(nil, nil) defer it.Release() @@ -483,7 +509,7 @@ func (b *BloomTrieIndexerBackend) Commit() error { } log.Debug("Prune historical bloom trie nodes", "deleted", deleted, "remaining", remaining, "elapsed", common.PrettyDuration(time.Since(t))) } else { - b.triedb.Commit(root, false, nil) + b.triedb.Commit(root, false) } sectionHead := b.sectionHeads[b.bloomTrieRatio-1] StoreBloomTrieRoot(b.diskdb, b.section, sectionHead, root) diff --git a/light/trie.go b/light/trie.go index 4ab6f4ace0..b7c4f311bb 100644 --- a/light/trie.go +++ b/light/trie.go @@ -29,6 +29,7 @@ import ( "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/trienode" ) var ( @@ -54,7 +55,7 @@ func (db *odrDatabase) OpenTrie(root common.Hash) (state.Trie, error) { return &odrTrie{db: db, id: db.id}, nil } -func (db *odrDatabase) OpenStorageTrie(addrHash, root common.Hash) (state.Trie, error) { +func (db *odrDatabase) OpenStorageTrie(stateRoot, addrHash, root common.Hash) (state.Trie, error) { return &odrTrie{db: db, id: StorageTrieID(db.id, addrHash, root)}, nil } @@ -63,8 +64,7 @@ func (db *odrDatabase) CopyTrie(t state.Trie) state.Trie { case *odrTrie: cpy := &odrTrie{db: t.db, id: t.id} if t.trie != nil { - cpytrie := *t.trie - cpy.trie = &cpytrie + cpy.trie = t.trie.Copy() } return cpy default: @@ -137,11 +137,11 @@ func (t *odrTrie) TryDelete(key []byte) error { }) } -func (t *odrTrie) Commit(onleaf trie.LeafCallback) (common.Hash, int, error) { +func (t *odrTrie) Commit(collectLeaf bool) (common.Hash, *trienode.NodeSet, error) { if t.trie == nil { - return t.id.Root, 0, nil + return t.id.Root, nil, nil } - return t.trie.Commit(onleaf) + return t.trie.Commit(collectLeaf) } func (t *odrTrie) Hash() common.Hash { @@ -151,8 +151,8 @@ func (t *odrTrie) Hash() common.Hash { return t.trie.Hash() } -func (t *odrTrie) NodeIterator(startkey []byte) trie.NodeIterator { - return newNodeIterator(t, startkey) +func (t *odrTrie) NodeIterator(startkey []byte) (trie.NodeIterator, error) { + return newNodeIterator(t, startkey), nil } func (t *odrTrie) GetKey(sha []byte) []byte { @@ -169,7 +169,13 @@ func (t *odrTrie) do(key []byte, fn func() error) error { for { var err error if t.trie == nil { - t.trie, err = trie.New(t.id.Root, trie.NewDatabase(t.db.backend.Database())) + var id *trie.ID + if len(t.id.AccKey) > 0 { + id = trie.StorageTrieID(t.id.StateRoot, common.BytesToHash(t.id.AccKey), t.id.Root) + } else { + id = trie.StateTrieID(t.id.StateRoot) + } + t.trie, err = trie.New(id, trie.NewDatabase(t.db.backend.Database(), nil)) } if err == nil { err = fn() @@ -195,7 +201,13 @@ func newNodeIterator(t *odrTrie, startkey []byte) trie.NodeIterator { // Open the actual non-ODR trie if that hasn't happened yet. if t.trie == nil { it.do(func() error { - t, err := trie.New(t.id.Root, trie.NewDatabase(t.db.backend.Database())) + var id *trie.ID + if len(t.id.AccKey) > 0 { + id = trie.StorageTrieID(t.id.StateRoot, common.BytesToHash(t.id.AccKey), t.id.Root) + } else { + id = trie.StateTrieID(t.id.StateRoot) + } + t, err := trie.New(id, trie.NewDatabase(t.db.backend.Database(), nil)) if err == nil { it.t.trie = t } @@ -203,7 +215,11 @@ func newNodeIterator(t *odrTrie, startkey []byte) trie.NodeIterator { }) } it.do(func() error { - it.NodeIterator = it.t.trie.NodeIterator(startkey) + var err error + it.NodeIterator, err = it.t.trie.NodeIterator(startkey) + if err != nil { + return err + } return it.NodeIterator.Error() }) return it diff --git a/light/trie_test.go b/light/trie_test.go index c8ec1116fc..be05504f02 100644 --- a/light/trie_test.go +++ b/light/trie_test.go @@ -38,13 +38,14 @@ func TestNodeIterator(t *testing.T) { fulldb = rawdb.NewMemoryDatabase() lightdb = rawdb.NewMemoryDatabase() gspec = core.Genesis{ + Config: params.TestChainConfig, Alloc: core.GenesisAlloc{testBankAddress: {Balance: testBankFunds}}, BaseFee: big.NewInt(params.InitialBaseFee), } - genesis = gspec.MustCommit(fulldb) + genesis = gspec.MustCommit(fulldb, trie.NewDatabase(fulldb, nil)) ) - gspec.MustCommit(lightdb) - blockchain, _ := core.NewBlockChain(fulldb, nil, params.TestChainConfig, ethash.NewFullFaker(), vm.Config{}, nil, nil) + gspec.MustCommit(lightdb, trie.NewDatabase(lightdb, nil)) + blockchain, _ := core.NewBlockChain(fulldb, nil, &gspec, nil, ethash.NewFullFaker(), vm.Config{}, nil, nil) gchain, _ := core.GenerateChain(params.TestChainConfig, genesis, ethash.NewFaker(), fulldb, 4, testChainGen, true) if _, err := blockchain.InsertChain(gchain, nil); err != nil { panic(err) @@ -61,8 +62,16 @@ func TestNodeIterator(t *testing.T) { } func diffTries(t1, t2 state.Trie) error { - i1 := trie.NewIterator(t1.NodeIterator(nil)) - i2 := trie.NewIterator(t2.NodeIterator(nil)) + trieIt1, err := t1.NodeIterator(nil) + if err != nil { + return err + } + trieIt2, err := t2.NodeIterator(nil) + if err != nil { + return err + } + i1 := trie.NewIterator(trieIt1) + i2 := trie.NewIterator(trieIt2) for i1.Next() && i2.Next() { if !bytes.Equal(i1.Key, i2.Key) { spew.Dump(i2) diff --git a/light/txpool_test.go b/light/txpool_test.go index e47a649c65..8398742689 100644 --- a/light/txpool_test.go +++ b/light/txpool_test.go @@ -30,6 +30,7 @@ import ( "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/core/vm" "github.com/ethereum/go-ethereum/params" + "github.com/ethereum/go-ethereum/trie" ) type testTxRelay struct { @@ -84,14 +85,15 @@ func TestTxPool(t *testing.T) { sdb = rawdb.NewMemoryDatabase() ldb = rawdb.NewMemoryDatabase() gspec = core.Genesis{ + Config: params.TestChainConfig, Alloc: core.GenesisAlloc{testBankAddress: {Balance: testBankFunds}}, BaseFee: big.NewInt(params.InitialBaseFee), } - genesis = gspec.MustCommit(sdb) + genesis = gspec.MustCommit(sdb, trie.NewDatabase(sdb, nil)) ) - gspec.MustCommit(ldb) + gspec.MustCommit(ldb, trie.NewDatabase(ldb, nil)) // Assemble the test environment - blockchain, _ := core.NewBlockChain(sdb, nil, params.TestChainConfig, ethash.NewFullFaker(), vm.Config{}, nil, nil) + blockchain, _ := core.NewBlockChain(sdb, nil, &gspec, nil, ethash.NewFullFaker(), vm.Config{}, nil, nil) gchain, _ := core.GenerateChain(params.TestChainConfig, genesis, ethash.NewFaker(), sdb, poolTestBlocks, txPoolTestChainGen, true) if _, err := blockchain.InsertChain(gchain, nil); err != nil { panic(err) diff --git a/miner/miner_test.go b/miner/miner_test.go index 1bd0f816d5..dd2fec27ca 100644 --- a/miner/miner_test.go +++ b/miner/miner_test.go @@ -57,6 +57,7 @@ func (m *mockBackend) TxPool() *txpool.TxPool { } type testBlockChain struct { + root common.Hash statedb *state.StateDB gasLimit uint64 chainHeadFeed *event.Feed @@ -76,6 +77,10 @@ func (bc *testBlockChain) StateAt(common.Hash) (*state.StateDB, error) { return bc.statedb, nil } +func (bc *testBlockChain) HasState(root common.Hash) bool { + return bc.root == root +} + func (bc *testBlockChain) SubscribeChainHeadEvent(ch chan<- core.ChainHeadEvent) event.Subscription { return bc.chainHeadFeed.Subscribe(ch) } @@ -240,21 +245,21 @@ func createMiner(t *testing.T) (*Miner, *event.TypeMux) { memdb := memorydb.New() chainDB := rawdb.NewDatabase(memdb) genesis := core.DeveloperGenesisBlock(15, 11_500_000, common.HexToAddress("12345")) - chainConfig, _, err := core.SetupGenesisBlock(chainDB, genesis, false) + loadedChainConfig, err := core.LoadChainConfig(chainDB, genesis) if err != nil { - t.Fatalf("can't create new chain config: %v", err) + t.Fatalf("can't load chain config: %v", err) } // Create consensus engine - engine := clique.New(chainConfig.Clique, chainDB) + engine := clique.New(loadedChainConfig.Clique, chainDB) // Create Ethereum backend - bc, err := core.NewBlockChain(chainDB, nil, chainConfig, engine, vm.Config{}, nil, nil) + bc, err := core.NewBlockChain(chainDB, nil, genesis, nil, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("can't create new chain %v", err) } - statedb, _ := state.New(common.Hash{}, state.NewDatabase(chainDB), nil) - blockchain := &testBlockChain{statedb, 10000000, new(event.Feed)} + statedb, _ := state.New(bc.Genesis().Root(), bc.StateCache(), nil) + blockchain := &testBlockChain{bc.Genesis().Root(), statedb, 10000000, new(event.Feed)} - legacyPool := legacypool.New(testTxPoolConfig, chainConfig, blockchain) + legacyPool := legacypool.New(testTxPoolConfig, bc.Config(), blockchain) txPool, err := txpool.New(testTxPoolConfig.PriceLimit, blockchain, []txpool.SubPool{legacyPool}) if err != nil { t.Fatal(err) @@ -263,5 +268,5 @@ func createMiner(t *testing.T) (*Miner, *event.TypeMux) { // Create event Mux mux := new(event.TypeMux) // Create Miner - return New(backend, &config, chainConfig, mux, engine, nil), mux + return New(backend, &config, bc.Config(), mux, engine, nil), mux } diff --git a/miner/worker_test.go b/miner/worker_test.go index 3a8b5a3848..11d2f51c4f 100644 --- a/miner/worker_test.go +++ b/miner/worker_test.go @@ -42,6 +42,7 @@ import ( "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/event" "github.com/ethereum/go-ethereum/params" + "github.com/ethereum/go-ethereum/trie" "github.com/holiman/uint256" ) @@ -142,9 +143,9 @@ func newTestWorkerBackend(t *testing.T, chainConfig *params.ChainConfig, engine default: t.Fatalf("unexpected consensus engine type: %T", engine) } - genesis := gspec.MustCommit(db) + genesis := gspec.MustCommit(db, trie.NewDatabase(db, nil)) - chain, _ := core.NewBlockChain(db, &core.CacheConfig{TrieDirtyDisabled: true}, gspec.Config, engine, vm.Config{}, nil, nil) + chain, _ := core.NewBlockChain(db, &core.CacheConfig{TrieDirtyDisabled: true}, &gspec, nil, engine, vm.Config{}, nil, nil) legacyPool := legacypool.New(testTxPoolConfig, chainConfig, chain) txpool, err := txpool.New(testTxPoolConfig.PriceLimit, chain, []txpool.SubPool{legacyPool}) if err != nil { @@ -244,8 +245,8 @@ func testGenerateBlockAndImport(t *testing.T, isClique bool) { // This test chain imports the mined blocks. db2 := rawdb.NewMemoryDatabase() - b.genesis.MustCommit(db2) - chain, _ := core.NewBlockChain(db2, nil, b.chain.Config(), engine, vm.Config{}, nil, nil) + b.genesis.MustCommit(db2, trie.NewDatabase(db2, nil)) + chain, _ := core.NewBlockChain(db2, nil, b.genesis, nil, engine, vm.Config{}, nil, nil) defer chain.Stop() // Ignore empty commit here for less noise. diff --git a/node/node.go b/node/node.go index 2cf85519f6..b7000b339c 100644 --- a/node/node.go +++ b/node/node.go @@ -602,7 +602,7 @@ func (n *Node) OpenDatabase(name string, cache, handles int, namespace string, r // also attaching a chain freezer to it that moves ancient chain data from the // database to immutable append-only files. If the node is an ephemeral one, a // memory database is returned. -func (n *Node) OpenDatabaseWithFreezer(name string, cache, handles int, freezer, namespace string, readonly bool) (ethdb.Database, error) { +func (n *Node) OpenDatabaseWithFreezer(name string, cache, handles int, ancient string, namespace string, readonly bool) (ethdb.Database, error) { n.lock.Lock() defer n.lock.Unlock() if n.state == closedState { @@ -613,17 +613,10 @@ func (n *Node) OpenDatabaseWithFreezer(name string, cache, handles int, freezer, if n.config.DataDir == "" { db = rawdb.NewMemoryDatabase() } else { - root := n.ResolvePath(name) - switch { - case freezer == "": - freezer = filepath.Join(root, "ancient") - case !filepath.IsAbs(freezer): - freezer = n.ResolvePath(freezer) - } db, err = rawdb.Open(rawdb.OpenOptions{ Type: n.config.DBEngine, Directory: n.ResolvePath(name), - AncientsDirectory: freezer, + AncientsDirectory: n.ResolveAncient(name, ancient), Namespace: namespace, Cache: cache, Handles: handles, @@ -637,6 +630,17 @@ func (n *Node) OpenDatabaseWithFreezer(name string, cache, handles int, freezer, return db, err } +// ResolveAncient returns the absolute path of the root ancient directory. +func (n *Node) ResolveAncient(name string, ancient string) string { + switch { + case ancient == "": // Use the default ancient directory + ancient = filepath.Join(n.ResolvePath(name), "ancient") + case !filepath.IsAbs(ancient): // ancient is relative path to the instance directory + ancient = n.ResolvePath(ancient) + } + return ancient +} + // ResolvePath returns the absolute path of a resource in the instance directory. func (n *Node) ResolvePath(x string) string { return n.config.ResolvePath(x) diff --git a/params/config.go b/params/config.go index b60bd2ba70..dd64c50aca 100644 --- a/params/config.go +++ b/params/config.go @@ -484,6 +484,36 @@ var ( ConsortiumV2Contracts: nil, RoninTreasuryAddress: &common.Address{}, } + NonActivatedConfig = &ChainConfig{ + ChainID: nil, + HomesteadBlock: nil, + DAOForkBlock: nil, + DAOForkSupport: false, + EIP150Block: nil, + EIP150Hash: common.Hash{}, + EIP155Block: nil, + EIP158Block: nil, + ByzantiumBlock: nil, + ConstantinopleBlock: nil, + PetersburgBlock: nil, + IstanbulBlock: nil, + MuirGlacierBlock: nil, + MikoBlock: nil, + BerlinBlock: nil, + LondonBlock: nil, + ArrowGlacierBlock: nil, + OdysseusBlock: nil, + FenixBlock: nil, + ConsortiumV2Block: nil, + PuffyBlock: nil, + BlacklistContractAddress: nil, + FenixValidatorContractAddress: nil, + TerminalTotalDifficulty: nil, + Ethash: new(EthashConfig), + Clique: nil, + Consortium: nil, + ConsortiumV2Contracts: nil, + } TestRules = TestChainConfig.Rules(new(big.Int)) ) diff --git a/rlp/encbuffer.go b/rlp/encbuffer.go index 64dd4fd884..b4582f12e6 100644 --- a/rlp/encbuffer.go +++ b/rlp/encbuffer.go @@ -39,27 +39,31 @@ func (buf *encBuffer) size() int { return len(buf.str) + buf.lhsize } -// toBytes creates the encoder output. -func (w *encBuffer) toBytes() []byte { +// makeBytes creates the encoder output. +func (w *encBuffer) makeBytes() []byte { out := make([]byte, w.size()) + w.copyTo(out) + return out +} + +func (w *encBuffer) copyTo(dst []byte) { strpos := 0 pos := 0 for _, head := range w.lheads { // write string data before header - n := copy(out[pos:], w.str[strpos:head.offset]) + n := copy(dst[pos:], w.str[strpos:head.offset]) pos += n strpos += n // write the header - enc := head.encode(out[pos:]) + enc := head.encode(dst[pos:]) pos += len(enc) } // copy string data after the last list header - copy(out[pos:], w.str[strpos:]) - return out + copy(dst[pos:], w.str[strpos:]) } -// toWriter writes the encoder output to w. -func (buf *encBuffer) toWriter(w io.Writer) (err error) { +// writeTo writes the encoder output to w. +func (buf *encBuffer) writeTo(w io.Writer) (err error) { strpos := 0 for _, head := range buf.lheads { // write string data before header @@ -268,6 +272,19 @@ func (r *encReader) next() []byte { } } +func encBufferFromWriter(w io.Writer) *encBuffer { + switch w := w.(type) { + case EncoderBuffer: + return w.buf + case *EncoderBuffer: + return w.buf + case *encBuffer: + return w + default: + return nil + } +} + // EncoderBuffer is a buffer for incremental encoding. // // The zero value is NOT ready for use. To get a usable buffer, @@ -295,14 +312,10 @@ func (w *EncoderBuffer) Reset(dst io.Writer) { // If the destination writer has an *encBuffer, use it. // Note that w.ownBuffer is left false here. if dst != nil { - if outer, ok := dst.(*encBuffer); ok { + if outer := encBufferFromWriter(dst); outer != nil { *w = EncoderBuffer{outer, nil, false} return } - if outer, ok := dst.(EncoderBuffer); ok { - *w = EncoderBuffer{outer.buf, nil, false} - return - } } // Get a fresh buffer. @@ -319,7 +332,7 @@ func (w *EncoderBuffer) Reset(dst io.Writer) { func (w *EncoderBuffer) Flush() error { var err error if w.dst != nil { - err = w.buf.toWriter(w.dst) + err = w.buf.writeTo(w.dst) } // Release the internal buffer. if w.ownBuffer { @@ -331,7 +344,15 @@ func (w *EncoderBuffer) Flush() error { // ToBytes returns the encoded bytes. func (w *EncoderBuffer) ToBytes() []byte { - return w.buf.toBytes() + return w.buf.makeBytes() +} + +// AppendToBytes appends the encoded bytes to dst. +func (w *EncoderBuffer) AppendToBytes(dst []byte) []byte { + size := w.buf.size() + out := append(dst, make([]byte, size)...) + w.buf.copyTo(out[len(dst):]) + return out } // Write appends b directly to the encoder output. diff --git a/rlp/encode.go b/rlp/encode.go index de11410b4f..cb2418b8b3 100644 --- a/rlp/encode.go +++ b/rlp/encode.go @@ -57,20 +57,16 @@ type Encoder interface { // Please see package-level documentation of encoding rules. func Encode(w io.Writer, val interface{}) error { // Optimization: reuse *encBuffer when called by EncodeRLP. - if buf, ok := w.(*encBuffer); ok { + if buf := encBufferFromWriter(w); buf != nil { return buf.encode(val) } - if ebuf, ok := w.(EncoderBuffer); ok { - return ebuf.buf.encode(val) - } buf := getEncBuffer() defer encBufferPool.Put(buf) - if err := buf.encode(val); err != nil { return err } - return buf.toWriter(w) + return buf.writeTo(w) } // EncodeToBytes returns the RLP encoding of val. @@ -82,7 +78,7 @@ func EncodeToBytes(val interface{}) ([]byte, error) { if err := buf.encode(val); err != nil { return nil, err } - return buf.toBytes(), nil + return buf.makeBytes(), nil } // EncodeToReader returns a reader from which the RLP encoding of val diff --git a/rlp/encode_test.go b/rlp/encode_test.go index 58d90c1f12..ce916caf6d 100644 --- a/rlp/encode_test.go +++ b/rlp/encode_test.go @@ -431,6 +431,21 @@ func TestEncodeToBytes(t *testing.T) { runEncTests(t, EncodeToBytes) } +func TestEncodeAppendToBytes(t *testing.T) { + buffer := make([]byte, 20) + runEncTests(t, func(val interface{}) ([]byte, error) { + w := NewEncoderBuffer(nil) + defer w.Flush() + + err := Encode(w, val) + if err != nil { + return nil, err + } + output := w.AppendToBytes(buffer[:0]) + return output, nil + }) +} + func TestEncodeToReader(t *testing.T) { runEncTests(t, func(val interface{}) ([]byte, error) { _, r, err := EncodeToReader(val) diff --git a/tests/block_test.go b/tests/block_test.go index 74c7ed8197..baea3615d9 100644 --- a/tests/block_test.go +++ b/tests/block_test.go @@ -18,6 +18,8 @@ package tests import ( "testing" + + "github.com/ethereum/go-ethereum/core/rawdb" ) func TestBlockchain(t *testing.T) { @@ -47,12 +49,19 @@ func TestBlockchain(t *testing.T) { // using 4.6 TGas bt.skipLoad(`.*randomStatetest94.json.*`) bt.walk(t, blockTestDir, func(t *testing.T, name string, test *BlockTest) { - if err := bt.checkFailure(t, test.Run(false)); err != nil { - t.Errorf("test without snapshotter failed: %v", err) + if err := bt.checkFailure(t, test.Run(false, rawdb.HashScheme)); err != nil { + t.Errorf("test in hash mode without snapshotter failed: %v", err) } - if err := bt.checkFailure(t, test.Run(true)); err != nil { - t.Errorf("test with snapshotter failed: %v", err) + if err := bt.checkFailure(t, test.Run(true, rawdb.HashScheme)); err != nil { + t.Errorf("test in hash mode with snapshotter failed: %v", err) } + // if err := bt.checkFailure(t, test.Run(false, rawdb.PathScheme)); err != nil { + + // t.Errorf("test in path mode without snapshotter failed: %v", err) + // } + // if err := bt.checkFailure(t, test.Run(true, rawdb.PathScheme)); err != nil { + // t.Errorf("test in path mode with snapshotter failed: %v", err) + // } }) // There is also a LegacyTests folder, containing blockchain tests generated // prior to Istanbul. However, they are all derived from GeneralStateTests, diff --git a/tests/block_test_util.go b/tests/block_test_util.go index 2d6abb5eb3..227b9bf605 100644 --- a/tests/block_test_util.go +++ b/tests/block_test_util.go @@ -37,6 +37,9 @@ import ( "github.com/ethereum/go-ethereum/core/vm" "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/triedb/hashdb" + "github.com/ethereum/go-ethereum/trie/triedb/pathdb" ) // A BlockTest checks handling of entire blocks. @@ -98,18 +101,26 @@ type btHeaderMarshaling struct { BaseFeePerGas *math.HexOrDecimal256 } -func (t *BlockTest) Run(snapshotter bool) error { +func (t *BlockTest) Run(snapshotter bool, scheme string) error { config, ok := Forks[t.json.Network] if !ok { return UnsupportedForkError{t.json.Network} } // import pre accounts & construct test genesis block & state root - db := rawdb.NewMemoryDatabase() - gblock, err := t.genesis(config).Commit(db) - if err != nil { - return err + var ( + db = rawdb.NewMemoryDatabase() + tconf = &trie.Config{} + ) + if scheme == rawdb.PathScheme { + tconf.PathDB = pathdb.Defaults + } else { + tconf.HashDB = hashdb.Defaults } + triedb := trie.NewDatabase(db, tconf) + // Commit genesis state + gblock := t.genesis(config).MustCommit(db, triedb) + triedb.Close() if gblock.Hash() != t.json.Genesis.Hash { return fmt.Errorf("genesis block hash doesn't match test: computed=%x, test=%x", gblock.Hash().Bytes()[:6], t.json.Genesis.Hash[:6]) } @@ -122,12 +133,15 @@ func (t *BlockTest) Run(snapshotter bool) error { } else { engine = ethash.NewShared() } - cache := &core.CacheConfig{TrieCleanLimit: 0} + cache := &core.CacheConfig{TrieCleanLimit: 0, StateScheme: scheme} if snapshotter { cache.SnapshotLimit = 1 cache.SnapshotWait = true } - chain, err := core.NewBlockChain(db, cache, config, engine, vm.Config{}, nil, nil) + gspec := &core.Genesis{ + Config: config, + } + chain, err := core.NewBlockChain(db, cache, gspec, nil, engine, vm.Config{}, nil, nil) if err != nil { return err } diff --git a/tests/fuzzers/les/les-fuzzer.go b/tests/fuzzers/les/les-fuzzer.go index cb24a2746e..3f48578291 100644 --- a/tests/fuzzers/les/les-fuzzer.go +++ b/tests/fuzzers/les/les-fuzzer.go @@ -62,7 +62,7 @@ func makechain() (bc *core.BlockChain, addrHashes, txHashes []common.Hash) { Alloc: core.GenesisAlloc{bankAddr: {Balance: bankFunds}}, GasLimit: 100000000, } - genesis := gspec.MustCommit(db) + genesis := gspec.MustCommit(db, nil) signer := types.HomesteadSigner{} blocks, _ := core.GenerateChain(gspec.Config, genesis, ethash.NewFaker(), db, testChainLen, func(i int, gen *core.BlockGen) { @@ -82,7 +82,7 @@ func makechain() (bc *core.BlockChain, addrHashes, txHashes []common.Hash) { addrHashes = append(addrHashes, crypto.Keccak256Hash(addr[:])) txHashes = append(txHashes, tx.Hash()) }, true) - bc, _ = core.NewBlockChain(db, nil, gspec.Config, ethash.NewFaker(), vm.Config{}, nil, nil) + bc, _ = core.NewBlockChain(db, nil, &gspec, nil, ethash.NewFaker(), vm.Config{}, nil, nil) if _, err := bc.InsertChain(blocks, nil); err != nil { panic(err) } @@ -90,8 +90,8 @@ func makechain() (bc *core.BlockChain, addrHashes, txHashes []common.Hash) { } func makeTries() (chtTrie *trie.Trie, bloomTrie *trie.Trie, chtKeys, bloomKeys [][]byte) { - chtTrie, _ = trie.New(common.Hash{}, trie.NewDatabase(rawdb.NewMemoryDatabase())) - bloomTrie, _ = trie.New(common.Hash{}, trie.NewDatabase(rawdb.NewMemoryDatabase())) + chtTrie = trie.NewEmpty(trie.NewDatabase(rawdb.NewMemoryDatabase(), nil)) + bloomTrie = trie.NewEmpty(trie.NewDatabase(rawdb.NewMemoryDatabase(), nil)) for i := 0; i < testChainLen; i++ { // The element in CHT is -> key := make([]byte, 8) diff --git a/tests/fuzzers/rangeproof/rangeproof-fuzzer.go b/tests/fuzzers/rangeproof/rangeproof-fuzzer.go index 09ee6bb9c7..aea4a47985 100644 --- a/tests/fuzzers/rangeproof/rangeproof-fuzzer.go +++ b/tests/fuzzers/rangeproof/rangeproof-fuzzer.go @@ -24,6 +24,7 @@ import ( "sort" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/ethdb/memorydb" "github.com/ethereum/go-ethereum/trie" ) @@ -61,8 +62,7 @@ func (f *fuzzer) readInt() uint64 { } func (f *fuzzer) randomTrie(n int) (*trie.Trie, map[string]*kv) { - - trie := new(trie.Trie) + trie := trie.NewEmpty(trie.NewDatabase(rawdb.NewMemoryDatabase(), nil)) vals := make(map[string]*kv) size := f.readInt() // Fill it with some fluff @@ -182,8 +182,10 @@ func (f *fuzzer) fuzz() int { // The function must return // 1 if the fuzzer should increase priority of the -// given input during subsequent fuzzing (for example, the input is lexically -// correct and was parsed successfully); +// +// given input during subsequent fuzzing (for example, the input is lexically +// correct and was parsed successfully); +// // -1 if the input must not be added to corpus even if gives new coverage; and // 0 otherwise; other values are reserved for future use. func Fuzz(input []byte) int { diff --git a/tests/fuzzers/stacktrie/trie_fuzzer.go b/tests/fuzzers/stacktrie/trie_fuzzer.go index 9ed8bcbc51..0e291d7b9d 100644 --- a/tests/fuzzers/stacktrie/trie_fuzzer.go +++ b/tests/fuzzers/stacktrie/trie_fuzzer.go @@ -26,8 +26,12 @@ import ( "sort" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/trienode" "golang.org/x/crypto/sha3" ) @@ -116,8 +120,10 @@ func (k kvs) Swap(i, j int) { // The function must return // 1 if the fuzzer should increase priority of the -// given input during subsequent fuzzing (for example, the input is lexically -// correct and was parsed successfully); +// +// given input during subsequent fuzzing (for example, the input is lexically +// correct and was parsed successfully); +// // -1 if the input must not be added to corpus even if gives new coverage; and // 0 otherwise // other values are reserved for future use. @@ -142,11 +148,15 @@ func (f *fuzzer) fuzz() int { // This spongeDb is used to check the sequence of disk-db-writes var ( - spongeA = &spongeDb{sponge: sha3.NewLegacyKeccak256()} - dbA = trie.NewDatabase(spongeA) - trieA, _ = trie.New(common.Hash{}, dbA) - spongeB = &spongeDb{sponge: sha3.NewLegacyKeccak256()} - trieB = trie.NewStackTrie(spongeB) + spongeA = &spongeDb{sponge: sha3.NewLegacyKeccak256()} + dbA = trie.NewDatabase(rawdb.NewDatabase(spongeA), nil) + trieA = trie.NewEmpty(dbA) + spongeB = &spongeDb{sponge: sha3.NewLegacyKeccak256()} + dbB = trie.NewDatabase(rawdb.NewDatabase(spongeB), nil) + options = trie.NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) { + rawdb.WriteTrieNode(spongeB, common.Hash{}, path, hash, blob, dbB.Scheme()) + }) + trieB = trie.NewStackTrie(options) vals kvs useful bool maxElements = 10000 @@ -175,12 +185,15 @@ func (f *fuzzer) fuzz() int { return 0 } // Flush trie -> database - rootA, _, err := trieA.Commit(nil) + rootA, nodes, err := trieA.Commit(false) if err != nil { panic(err) } + if nodes != nil { + dbA.Update(rootA, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) + } // Flush memdb -> disk (sponge) - dbA.Commit(rootA, false, nil) + dbA.Commit(rootA, false) // Stacktrie requires sorted insertion sort.Sort(vals) @@ -191,9 +204,7 @@ func (f *fuzzer) fuzz() int { trieB.Update(kv.k, kv.v) } rootB := trieB.Hash() - if _, err := trieB.Commit(); err != nil { - panic(err) - } + trieB.Commit() if rootA != rootB { panic(fmt.Sprintf("roots differ: (trie) %x != %x (stacktrie)", rootA, rootB)) } @@ -202,5 +213,47 @@ func (f *fuzzer) fuzz() int { if !bytes.Equal(sumA, sumB) { panic(fmt.Sprintf("sequence differ: (trie) %x != %x (stacktrie)", sumA, sumB)) } + // Ensure all the nodes are persisted correctly + // Need tracked deleted nodes. + var ( + nodeset = make(map[string][]byte) // path -> blob + optionsC = trie.NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) { + if crypto.Keccak256Hash(blob) != hash { + panic("invalid node blob") + } + nodeset[string(path)] = common.CopyBytes(blob) + }) + trieC = trie.NewStackTrie(optionsC) + checked int + ) + for _, kv := range vals { + trieC.Update(kv.k, kv.v) + } + rootC := trieC.Commit() + if rootA != rootC { + panic(fmt.Sprintf("roots differ: (trie) %x != %x (stacktrie)", rootA, rootC)) + } + trieA, _ = trie.New(trie.TrieID(rootA), dbA) + iterA := trieA.MustNodeIterator(nil) + for iterA.Next(true) { + if iterA.Hash() == (common.Hash{}) { + if _, present := nodeset[string(iterA.Path())]; present { + panic("unexpected tiny node") + } + continue + } + nodeBlob, present := nodeset[string(iterA.Path())] + if !present { + panic("missing node") + } + if !bytes.Equal(nodeBlob, iterA.NodeBlob()) { + panic("node blob is not matched") + } + checked += 1 + } + if checked != len(nodeset) { + panic("node number is not matched") + } + return 1 } diff --git a/tests/fuzzers/trie/trie-fuzzer.go b/tests/fuzzers/trie/trie-fuzzer.go index e993af47cf..617d4f7c30 100644 --- a/tests/fuzzers/trie/trie-fuzzer.go +++ b/tests/fuzzers/trie/trie-fuzzer.go @@ -21,9 +21,10 @@ import ( "encoding/binary" "fmt" - "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/ethdb/memorydb" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/trienode" ) // randTest performs random trie operations. @@ -124,8 +125,10 @@ func Generate(input []byte) randTest { // The function must return // 1 if the fuzzer should increase priority of the -// given input during subsequent fuzzing (for example, the input is lexically -// correct and was parsed successfully); +// +// given input during subsequent fuzzing (for example, the input is lexically +// correct and was parsed successfully); +// // -1 if the input must not be added to corpus even if gives new coverage; and // 0 otherwise // other values are reserved for future use. @@ -141,11 +144,12 @@ func Fuzz(input []byte) int { } func runRandTest(rt randTest) error { - - triedb := trie.NewDatabase(memorydb.New()) - - tr, _ := trie.New(common.Hash{}, triedb) - values := make(map[string]string) // tracks content of the trie + var ( + triedb = trie.NewDatabase(rawdb.NewMemoryDatabase(), nil) + tr = trie.NewEmpty(triedb) + origin = types.EmptyRootHash + values = make(map[string]string) // tracks content of the trie + ) for i, step := range rt { switch step.op { @@ -161,23 +165,27 @@ func runRandTest(rt randTest) error { if string(v) != want { rt[i].err = fmt.Errorf("mismatch for key 0x%x, got 0x%x want 0x%x", step.key, v, want) } - case opCommit: - _, _, rt[i].err = tr.Commit(nil) case opHash: tr.Hash() - case opReset: - hash, _, err := tr.Commit(nil) + case opCommit: + hash, nodes, err := tr.Commit(false) if err != nil { return err } - newtr, err := trie.New(hash, triedb) + if nodes != nil { + if err := triedb.Update(hash, origin, 0, trienode.NewWithNodeSet(nodes), nil); err != nil { + return err + } + } + newtr, err := trie.New(trie.TrieID(hash), triedb) if err != nil { return err } tr = newtr + origin = hash case opItercheckhash: - checktr, _ := trie.New(common.Hash{}, triedb) - it := trie.NewIterator(tr.NodeIterator(nil)) + checktr := trie.NewEmpty(triedb) + it := trie.NewIterator(tr.MustNodeIterator(nil)) for it.Next() { checktr.Update(it.Key, it.Value) } diff --git a/tests/state_test.go b/tests/state_test.go index 831542e720..6eb65350a4 100644 --- a/tests/state_test.go +++ b/tests/state_test.go @@ -20,10 +20,14 @@ import ( "bufio" "bytes" "fmt" - "github.com/ethereum/go-ethereum/eth/tracers/logger" "reflect" "testing" + "github.com/ethereum/go-ethereum/eth/tracers/logger" + + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/state" + "github.com/ethereum/go-ethereum/core/state/snapshot" "github.com/ethereum/go-ethereum/core/vm" ) @@ -67,29 +71,52 @@ func TestState(t *testing.T) { subtest := subtest key := fmt.Sprintf("%s/%d", subtest.Fork, subtest.Index) - t.Run(key+"/trie", func(t *testing.T) { + t.Run(key+"/hash/trie", func(t *testing.T) { + withTrace(t, test.gasLimit(subtest), func(vmconfig vm.Config) error { + var result error + test.Run(subtest, vmconfig, false, rawdb.HashScheme, func(err error, snaps *snapshot.Tree, state *state.StateDB) { + result = st.checkFailure(t, err) + }) + return result + }) + }) + t.Run(key+"/hash/snap", func(t *testing.T) { + withTrace(t, test.gasLimit(subtest), func(vmconfig vm.Config) error { + var result error + test.Run(subtest, vmconfig, true, rawdb.HashScheme, func(err error, snaps *snapshot.Tree, state *state.StateDB) { + if snaps != nil && state != nil { + if _, err := snaps.Journal(state.IntermediateRoot(false)); err != nil { + result = err + return + } + } + result = st.checkFailure(t, err) + }) + return result + }) + }) + t.Run(key+"/path/trie", func(t *testing.T) { withTrace(t, test.gasLimit(subtest), func(vmconfig vm.Config) error { - _, _, err := test.Run(subtest, vmconfig, false) - if err != nil && len(test.json.Post[subtest.Fork][subtest.Index].ExpectException) > 0 { - // Ignore expected errors (TODO MariusVanDerWijden check error string) - return nil - } - return st.checkFailure(t, err) + var result error + test.Run(subtest, vmconfig, false, rawdb.PathScheme, func(err error, snaps *snapshot.Tree, state *state.StateDB) { + result = st.checkFailure(t, err) + }) + return result }) }) - t.Run(key+"/snap", func(t *testing.T) { + t.Run(key+"/path/snap", func(t *testing.T) { withTrace(t, test.gasLimit(subtest), func(vmconfig vm.Config) error { - snaps, statedb, err := test.Run(subtest, vmconfig, true) - if snaps != nil && statedb != nil { - if _, err := snaps.Journal(statedb.IntermediateRoot(false)); err != nil { - return err + var result error + test.Run(subtest, vmconfig, true, rawdb.PathScheme, func(err error, snaps *snapshot.Tree, state *state.StateDB) { + if snaps != nil && state != nil { + if _, err := snaps.Journal(state.IntermediateRoot(false)); err != nil { + result = err + return + } } - } - if err != nil && len(test.json.Post[subtest.Fork][subtest.Index].ExpectException) > 0 { - // Ignore expected errors (TODO MariusVanDerWijden check error string) - return nil - } - return st.checkFailure(t, err) + result = st.checkFailure(t, err) + }) + return result }) }) } diff --git a/tests/state_test_util.go b/tests/state_test_util.go index 484d688d38..e3f576718b 100644 --- a/tests/state_test_util.go +++ b/tests/state_test_util.go @@ -37,6 +37,9 @@ import ( "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/triedb/hashdb" + "github.com/ethereum/go-ethereum/trie/triedb/pathdb" "golang.org/x/crypto/sha3" ) @@ -158,32 +161,41 @@ func (t *StateTest) Subtests() []StateSubtest { } // Run executes a specific subtest and verifies the post-state and logs -func (t *StateTest) Run(subtest StateSubtest, vmconfig vm.Config, snapshotter bool) (*snapshot.Tree, *state.StateDB, error) { - snaps, statedb, root, err := t.RunNoVerify(subtest, vmconfig, snapshotter) +func (t *StateTest) Run(subtest StateSubtest, vmconfig vm.Config, snapshotter bool, scheme string, postCheck func(err error, snaps *snapshot.Tree, state *state.StateDB)) (result error) { + triedb, snaps, statedb, root, err := t.RunNoVerify(subtest, vmconfig, snapshotter, scheme) if err != nil { - return snaps, statedb, err + return err } + // Invoke the callback at the end of function for further analysis. + defer func() { + postCheck(result, snaps, statedb) + + if triedb != nil { + triedb.Close() + } + }() post := t.json.Post[subtest.Fork][subtest.Index] // N.B: We need to do this in a two-step process, because the first Commit takes care // of suicides, and we need to touch the coinbase _after_ it has potentially suicided. if root != common.Hash(post.Root) { - return snaps, statedb, fmt.Errorf("post state root mismatch: got %x, want %x", root, post.Root) + return fmt.Errorf("post state root mismatch: got %x, want %x", root, post.Root) } if logs := rlpHash(statedb.Logs()); logs != common.Hash(post.Logs) { - return snaps, statedb, fmt.Errorf("post state logs hash mismatch: got %x, want %x", logs, post.Logs) + return fmt.Errorf("post state logs hash mismatch: got %x, want %x", logs, post.Logs) } - return snaps, statedb, nil + return nil } // RunNoVerify runs a specific subtest and returns the statedb and post-state root -func (t *StateTest) RunNoVerify(subtest StateSubtest, vmconfig vm.Config, snapshotter bool) (*snapshot.Tree, *state.StateDB, common.Hash, error) { +func (t *StateTest) RunNoVerify(subtest StateSubtest, vmconfig vm.Config, snapshotter bool, scheme string) (*trie.Database, *snapshot.Tree, *state.StateDB, common.Hash, error) { config, eips, err := GetChainConfig(subtest.Fork) if err != nil { - return nil, nil, common.Hash{}, UnsupportedForkError{subtest.Fork} + return nil, nil, nil, common.Hash{}, UnsupportedForkError{subtest.Fork} } vmconfig.ExtraEips = eips - block := t.genesis(config).ToBlock(nil) - snaps, statedb := MakePreState(rawdb.NewMemoryDatabase(), t.json.Pre, snapshotter) + block := t.genesis(config).ToBlock() + triedb, snaps, statedb := MakePreState(rawdb.NewMemoryDatabase(), t.json.Pre, snapshotter, scheme) + defer triedb.Close() var baseFee *big.Int if config.IsLondon(new(big.Int)) { @@ -197,7 +209,7 @@ func (t *StateTest) RunNoVerify(subtest StateSubtest, vmconfig vm.Config, snapsh post := t.json.Post[subtest.Fork][subtest.Index] msg, err := t.json.Tx.toMessage(post, baseFee) if err != nil { - return nil, nil, common.Hash{}, err + return nil, nil, nil, common.Hash{}, err } // Try to recover tx with current signer @@ -205,11 +217,11 @@ func (t *StateTest) RunNoVerify(subtest StateSubtest, vmconfig vm.Config, snapsh var ttx types.Transaction err := ttx.UnmarshalBinary(post.TxBytes) if err != nil { - return nil, nil, common.Hash{}, err + return nil, nil, nil, common.Hash{}, err } if _, err := types.Sender(types.LatestSigner(config), &ttx); err != nil { - return nil, nil, common.Hash{}, err + return nil, nil, nil, common.Hash{}, err } } @@ -237,18 +249,25 @@ func (t *StateTest) RunNoVerify(subtest StateSubtest, vmconfig vm.Config, snapsh // the coinbase gets no txfee, so isn't created, and thus needs to be touched statedb.AddBalance(block.Coinbase(), new(big.Int)) // Commit block - statedb.Commit(config.IsEIP158(block.Number())) + statedb.Commit(block.NumberU64(), config.IsEIP158(block.Number())) // And _now_ get the state root root := statedb.IntermediateRoot(config.IsEIP158(block.Number())) - return snaps, statedb, root, err + return triedb, snaps, statedb, root, err } func (t *StateTest) gasLimit(subtest StateSubtest) uint64 { return t.json.Tx.GasLimit[t.json.Post[subtest.Fork][subtest.Index].Indexes.Gas] } -func MakePreState(db ethdb.Database, accounts core.GenesisAlloc, snapshotter bool) (*snapshot.Tree, *state.StateDB) { - sdb := state.NewDatabase(db) +func MakePreState(db ethdb.Database, accounts core.GenesisAlloc, snapshotter bool, scheme string) (*trie.Database, *snapshot.Tree, *state.StateDB) { + tconf := &trie.Config{Preimages: true} + if scheme == rawdb.HashScheme { + tconf.HashDB = hashdb.Defaults + } else { + tconf.PathDB = pathdb.Defaults + } + triedb := trie.NewDatabase(db, tconf) + sdb := state.NewDatabaseWithNodeDB(db, triedb) statedb, _ := state.New(common.Hash{}, sdb, nil) for addr, a := range accounts { statedb.SetCode(addr, a.Code) @@ -259,14 +278,14 @@ func MakePreState(db ethdb.Database, accounts core.GenesisAlloc, snapshotter boo } } // Commit and re-open to start with a clean state. - root, _ := statedb.Commit(false) + root, _ := statedb.Commit(0, false) var snaps *snapshot.Tree if snapshotter { - snaps, _ = snapshot.New(db, sdb.TrieDB(), 1, root, false, true, false) + snaps, _ = snapshot.New(db, triedb, 1, root, false, true, false) } statedb, _ = state.New(root, sdb, snaps) - return snaps, statedb + return triedb, snaps, statedb } func (t *StateTest) genesis(config *params.ChainConfig) *core.Genesis { diff --git a/trie/committer.go b/trie/committer.go index 0721990a21..9d689e8e35 100644 --- a/trie/committer.go +++ b/trie/committer.go @@ -17,12 +17,13 @@ package trie import ( - "errors" "fmt" "sync" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/trienode" "golang.org/x/crypto/sha3" ) @@ -30,68 +31,53 @@ import ( // some parallelism but not incur too much memory overhead. const leafChanSize = 200 -// leaf represents a trie leaf value -type leaf struct { - size int // size of the rlp data (estimate) - hash common.Hash // hash of rlp data - node node // the node to commit -} - -// committer is a type used for the trie Commit operation. A committer has some -// internal preallocated temp space, and also a callback that is invoked when -// leaves are committed. The leafs are passed through the `leafCh`, to allow -// some level of parallelism. -// By 'some level' of parallelism, it's still the case that all leaves will be -// processed sequentially - onleaf will never be called in parallel or out of order. +// committer is a type used for the trie Commit operation. The committer will +// capture all dirty nodes during the commit process and keep them cached in +// insertion order. type committer struct { - tmp sliceBuffer sha crypto.KeccakState - onleaf LeafCallback - leafCh chan *leaf + owner common.Hash // TODO: same as nodes.owner, consider removing + nodes *trienode.NodeSet + tracer *tracer + collectLeaf bool } // committers live in a global sync.Pool var committerPool = sync.Pool{ New: func() interface{} { return &committer{ - tmp: make(sliceBuffer, 0, 550), // cap is as large as a full fullNode. sha: sha3.NewLegacyKeccak256().(crypto.KeccakState), } }, } // newCommitter creates a new committer or picks one from the pool. -func newCommitter() *committer { - return committerPool.Get().(*committer) -} - -func returnCommitterToPool(h *committer) { - h.onleaf = nil - h.leafCh = nil - committerPool.Put(h) +func newCommitter(nodes *trienode.NodeSet, tracer *tracer, collectLeaf bool) *committer { + return &committer{ + nodes: nodes, + tracer: tracer, + collectLeaf: collectLeaf, + } } // Commit collapses a node down into a hash node and inserts it into the database -func (c *committer) Commit(n node, db *Database) (hashNode, int, error) { - if db == nil { - return nil, 0, errors.New("no db provided") - } - h, committed, err := c.commit(n, db) +func (c *committer) Commit(n node) (hashNode, *trienode.NodeSet, error) { + h, err := c.commit(nil, n) if err != nil { - return nil, 0, err + return nil, nil, err } - return h.(hashNode), committed, nil + return h.(hashNode), c.nodes, nil } // commit collapses a node down into a hash node and inserts it into the database -func (c *committer) commit(n node, db *Database) (node, int, error) { +func (c *committer) commit(path []byte, n node) (node, error) { // if this path is clean, use available cached data hash, dirty := n.cache() if hash != nil && !dirty { - return hash, 0, nil + return hash, nil } - // Commit children, then parent, and remove remove the dirty flag. + // Commit children, then parent, and remove the dirty flag. switch cn := n.(type) { case *shortNode: // Commit child @@ -99,36 +85,35 @@ func (c *committer) commit(n node, db *Database) (node, int, error) { // If the child is fullNode, recursively commit, // otherwise it can only be hashNode or valueNode. - var childCommitted int if _, ok := cn.Val.(*fullNode); ok { - childV, committed, err := c.commit(cn.Val, db) + childV, err := c.commit(append(path, cn.Key...), cn.Val) if err != nil { - return nil, 0, err + return nil, err } - collapsed.Val, childCommitted = childV, committed + collapsed.Val = childV } // The key needs to be copied, since we're delivering it to database collapsed.Key = hexToCompact(cn.Key) - hashedNode := c.store(collapsed, db) + hashedNode := c.store(path, collapsed) if hn, ok := hashedNode.(hashNode); ok { - return hn, childCommitted + 1, nil + return hn, nil } - return collapsed, childCommitted, nil + return collapsed, nil case *fullNode: - hashedKids, childCommitted, err := c.commitChildren(cn, db) + hashedKids, err := c.commitChildren(path, cn) if err != nil { - return nil, 0, err + return nil, err } collapsed := cn.copy() collapsed.Children = hashedKids - hashedNode := c.store(collapsed, db) + hashedNode := c.store(path, collapsed) if hn, ok := hashedNode.(hashNode); ok { - return hn, childCommitted + 1, nil + return hn, nil } - return collapsed, childCommitted, nil + return collapsed, nil case hashNode: - return cn, 0, nil + return cn, nil default: // nil, valuenode shouldn't be committed panic(fmt.Sprintf("%T: invalid node: %v", n, n)) @@ -136,11 +121,8 @@ func (c *committer) commit(n node, db *Database) (node, int, error) { } // commitChildren commits the children of the given fullnode -func (c *committer) commitChildren(n *fullNode, db *Database) ([17]node, int, error) { - var ( - committed int - children [17]node - ) +func (c *committer) commitChildren(path []byte, n *fullNode) ([17]node, error) { + var children [17]node for i := 0; i < 16; i++ { child := n.Children[i] if child == nil { @@ -156,121 +138,89 @@ func (c *committer) commitChildren(n *fullNode, db *Database) ([17]node, int, er // Commit the child recursively and store the "hashed" value. // Note the returned node can be some embedded nodes, so it's // possible the type is not hashNode. - hashed, childCommitted, err := c.commit(child, db) + hashed, err := c.commit(append(path, byte(i)), child) if err != nil { - return children, 0, err + return children, err } children[i] = hashed - committed += childCommitted } // For the 17th child, it's possible the type is valuenode. if n.Children[16] != nil { children[16] = n.Children[16] } - return children, committed, nil + return children, nil } // store hashes the node n and if we have a storage layer specified, it writes // the key/value pair to it and tracks any node->child references as well as any // node->external trie references. -func (c *committer) store(n node, db *Database) node { +func (c *committer) store(path []byte, n node) node { // Larger nodes are replaced by their hash and stored in the database. - var ( - hash, _ = n.cache() - size int - ) + hash, _ := n.cache() + + // This was not generated - must be a small node stored in the parent. + // In theory, we should check if the node is leaf here (embedded node + // usually is leaf node). But small value(less than 32bytes) is not + // our target(leaves in account trie only). if hash == nil { - // This was not generated - must be a small node stored in the parent. - // In theory, we should apply the leafCall here if it's not nil(embedded - // node usually contains value). But small value(less than 32bytes) is - // not our target. - return n - } else { - // We have the hash already, estimate the RLP encoding-size of the node. - // The size is used for mem tracking, does not need to be exact - size = estimateSize(n) - } - // If we're using channel-based leaf-reporting, send to channel. - // The leaf channel will be active only when there an active leaf-callback - if c.leafCh != nil { - c.leafCh <- &leaf{ - size: size, - hash: common.BytesToHash(hash), - node: n, + // The node is embedded in its parent, in other words, this node + // will not be stored in the database independently, mark it as + // deleted only if the node was existent in database before. + _, ok := c.tracer.accessList[string(path)] + if ok { + c.nodes.AddNode(path, trienode.NewDeleted()) } - } else if db != nil { - // No leaf-callback used, but there's still a database. Do serial - // insertion - db.lock.Lock() - db.insert(common.BytesToHash(hash), size, n) - db.lock.Unlock() + return n } - return hash -} - -// commitLoop does the actual insert + leaf callback for nodes. -func (c *committer) commitLoop(db *Database) { - for item := range c.leafCh { - var ( - hash = item.hash - size = item.size - n = item.node + // We have the hash already, estimate the RLP encoding-size of the node. + // The size is used for mem tracking, does not need to be exact + var ( + nhash = common.BytesToHash(hash) + blob, _ = rlp.EncodeToBytes(n) + node = trienode.New( + nhash, + blob, ) - // We are pooling the trie nodes into an intermediate memory cache - db.lock.Lock() - db.insert(hash, size, n) - db.lock.Unlock() + ) - if c.onleaf != nil { - switch n := n.(type) { - case *shortNode: - if child, ok := n.Val.(valueNode); ok { - c.onleaf(nil, nil, child, hash) - } - case *fullNode: - // For children in range [0, 15], it's impossible - // to contain valueNode. Only check the 17th child. - if n.Children[16] != nil { - c.onleaf(nil, nil, n.Children[16].(valueNode), hash) - } + // Collect the dirty node to nodeset for return. + c.nodes.AddNode(path, node) + // Collect the corresponding leaf node if it's required. We don't check + // full node since it's impossible to store value in fullNode. The key + // length of leaves should be exactly same. + if c.collectLeaf { + if sn, ok := n.(*shortNode); ok { + if val, ok := sn.Val.(valueNode); ok { + c.nodes.AddLeaf(nhash, val) } } } + return hash } -func (c *committer) makeHashNode(data []byte) hashNode { - n := make(hashNode, c.sha.Size()) - c.sha.Reset() - c.sha.Write(data) - c.sha.Read(n) - return n +// mptResolver the children resolver in merkle-patricia-tree. +type mptResolver struct{} + +// ForEach implements childResolver, decodes the provided node and +// traverses the children inside. +func (resolver mptResolver) ForEach(node []byte, onChild func(common.Hash)) { + forGatherChildren(mustDecodeNode(nil, node), onChild) } -// estimateSize estimates the size of an rlp-encoded node, without actually -// rlp-encoding it (zero allocs). This method has been experimentally tried, and with a trie -// with 1000 leafs, the only errors above 1% are on small shortnodes, where this -// method overestimates by 2 or 3 bytes (e.g. 37 instead of 35) -func estimateSize(n node) int { +// forGatherChildren traverses the node hierarchy and invokes the callback +// for all the hashnode children. +func forGatherChildren(n node, onChild func(hash common.Hash)) { switch n := n.(type) { case *shortNode: - // A short node contains a compacted key, and a value. - return 3 + len(n.Key) + estimateSize(n.Val) + forGatherChildren(n.Val, onChild) case *fullNode: - // A full node contains up to 16 hashes (some nils), and a key - s := 3 for i := 0; i < 16; i++ { - if child := n.Children[i]; child != nil { - s += estimateSize(child) - } else { - s++ - } + forGatherChildren(n.Children[i], onChild) } - return s - case valueNode: - return 1 + len(n) case hashNode: - return 1 + len(n) + onChild(common.BytesToHash(n)) + case valueNode, nil: default: - panic(fmt.Sprintf("node type %T", n)) + panic(fmt.Sprintf("unknown node type: %T", n)) } } diff --git a/trie/database.go b/trie/database.go index 58ca4e6f3c..42eedc2001 100644 --- a/trie/database.go +++ b/trie/database.go @@ -1,4 +1,4 @@ -// Copyright 2018 The go-ethereum Authors +// Copyright 2022 The go-ethereum Authors // This file is part of the go-ethereum library. // // The go-ethereum library is free software: you can redistribute it and/or modify @@ -18,872 +18,310 @@ package trie import ( "errors" - "fmt" - "io" - "reflect" - "runtime" - "sync" - "time" - - "github.com/VictoriaMetrics/fastcache" + "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/metrics" - "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/triedb/hashdb" + "github.com/ethereum/go-ethereum/trie/triedb/pathdb" + "github.com/ethereum/go-ethereum/trie/trienode" + "github.com/ethereum/go-ethereum/trie/triestate" ) -var ( - memcacheCleanHitMeter = metrics.NewRegisteredMeter("trie/memcache/clean/hit", nil) - memcacheCleanMissMeter = metrics.NewRegisteredMeter("trie/memcache/clean/miss", nil) - memcacheCleanReadMeter = metrics.NewRegisteredMeter("trie/memcache/clean/read", nil) - memcacheCleanWriteMeter = metrics.NewRegisteredMeter("trie/memcache/clean/write", nil) - - memcacheDirtyHitMeter = metrics.NewRegisteredMeter("trie/memcache/dirty/hit", nil) - memcacheDirtyMissMeter = metrics.NewRegisteredMeter("trie/memcache/dirty/miss", nil) - memcacheDirtyReadMeter = metrics.NewRegisteredMeter("trie/memcache/dirty/read", nil) - memcacheDirtyWriteMeter = metrics.NewRegisteredMeter("trie/memcache/dirty/write", nil) - - memcacheFlushTimeTimer = metrics.NewRegisteredResettingTimer("trie/memcache/flush/time", nil) - memcacheFlushNodesMeter = metrics.NewRegisteredMeter("trie/memcache/flush/nodes", nil) - memcacheFlushSizeMeter = metrics.NewRegisteredMeter("trie/memcache/flush/size", nil) - - memcacheGCTimeTimer = metrics.NewRegisteredResettingTimer("trie/memcache/gc/time", nil) - memcacheGCNodesMeter = metrics.NewRegisteredMeter("trie/memcache/gc/nodes", nil) - memcacheGCSizeMeter = metrics.NewRegisteredMeter("trie/memcache/gc/size", nil) - - memcacheCommitTimeTimer = metrics.NewRegisteredResettingTimer("trie/memcache/commit/time", nil) - memcacheCommitNodesMeter = metrics.NewRegisteredMeter("trie/memcache/commit/nodes", nil) - memcacheCommitSizeMeter = metrics.NewRegisteredMeter("trie/memcache/commit/size", nil) -) - -// Database is an intermediate write layer between the trie data structures and -// the disk database. The aim is to accumulate trie writes in-memory and only -// periodically flush a couple tries to disk, garbage collecting the remainder. -// -// Note, the trie Database is **not** thread safe in its mutations, but it **is** -// thread safe in providing individual, independent node access. The rationale -// behind this split design is to provide read access to RPC handlers and sync -// servers even while the trie is executing expensive garbage collection. -type Database struct { - diskdb ethdb.KeyValueStore // Persistent storage for matured trie nodes - - cleans *fastcache.Cache // GC friendly memory cache of clean node RLPs - dirties map[common.Hash]*cachedNode // Data and references relationships of dirty trie nodes - oldest common.Hash // Oldest tracked node, flush-list head - newest common.Hash // Newest tracked node, flush-list tail - - preimages map[common.Hash][]byte // Preimages of nodes from the secure trie - - gctime time.Duration // Time spent on garbage collection since last commit - gcnodes uint64 // Nodes garbage collected since last commit - gcsize common.StorageSize // Data storage garbage collected since last commit - - flushtime time.Duration // Time spent on data flushing since last commit - flushnodes uint64 // Nodes flushed since last commit - flushsize common.StorageSize // Data storage flushed since last commit - - dirtiesSize common.StorageSize // Storage size of the dirty node cache (exc. metadata) - childrenSize common.StorageSize // Storage size of the external children tracking - preimagesSize common.StorageSize // Storage size of the preimages cache - - lock sync.RWMutex -} - -// rawNode is a simple binary blob used to differentiate between collapsed trie -// nodes and already encoded RLP binary blobs (while at the same time store them -// in the same cache fields). -type rawNode []byte - -func (n rawNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") } -func (n rawNode) fstring(ind string) string { panic("this should never end up in a live trie") } - -func (n rawNode) EncodeRLP(w io.Writer) error { - _, err := w.Write(n) - return err -} - -// rawFullNode represents only the useful data content of a full node, with the -// caches and flags stripped out to minimize its data storage. This type honors -// the same RLP encoding as the original parent. -type rawFullNode [17]node - -func (n rawFullNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") } -func (n rawFullNode) fstring(ind string) string { panic("this should never end up in a live trie") } +// Config defines all necessary options for database. +type Config struct { + Preimages bool // Flag whether the preimage of trie key is recorded -func (n rawFullNode) EncodeRLP(w io.Writer) error { - var nodes [17]node + HashDB *hashdb.Config // Configs for hash-based scheme + PathDB *pathdb.Config // Configs for experimental path-based scheme - for i, child := range n { - if child != nil { - nodes[i] = child - } else { - nodes[i] = nilValueNode - } - } - return rlp.Encode(w, nodes) + // Testing hooks + OnCommit func(states *triestate.Set) // Hook invoked when commit is performed } -// rawShortNode represents only the useful data content of a short node, with the -// caches and flags stripped out to minimize its data storage. This type honors -// the same RLP encoding as the original parent. -type rawShortNode struct { - Key []byte - Val node -} +// backend defines the methods needed to access/update trie nodes in different +// state scheme. +type backend interface { + // Scheme returns the identifier of used storage scheme. + Scheme() string -func (n rawShortNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") } -func (n rawShortNode) fstring(ind string) string { panic("this should never end up in a live trie") } + // Initialized returns an indicator if the state data is already initialized + // according to the state scheme. + Initialized(genesisRoot common.Hash) bool -// cachedNode is all the information we know about a single cached trie node -// in the memory database write layer. -type cachedNode struct { - node node // Cached collapsed trie node, or raw rlp data - size uint16 // Byte size of the useful cached data + // Size returns the current storage size of the memory cache in front of the + // persistent database layer. + Size() common.StorageSize - parents uint32 // Number of live nodes referencing this one - children map[common.Hash]uint16 // External children referenced by this node + // Update performs a state transition by committing dirty nodes contained + // in the given set in order to update state from the specified parent to + // the specified root. + // The passed in maps(nodes, states) will be retained to avoid copying + // everything. Therefore, these maps must not be changed afterwards. + Update(root common.Hash, parent common.Hash, block uint64, nodes *trienode.MergedNodeSet, states *triestate.Set) error - flushPrev common.Hash // Previous node in the flush-list - flushNext common.Hash // Next node in the flush-list -} + // DiskDB retrieves the persistent storage backing the trie database. + DiskDB() ethdb.KeyValueStore -// cachedNodeSize is the raw size of a cachedNode data structure without any -// node data included. It's an approximate size, but should be a lot better -// than not counting them. -var cachedNodeSize = int(reflect.TypeOf(cachedNode{}).Size()) + // Commit writes all relevant trie nodes belonging to the specified state + // to disk. Report specifies whether logs will be displayed in info level. + Commit(root common.Hash, report bool) error -// cachedNodeChildrenSize is the raw size of an initialized but empty external -// reference map. -const cachedNodeChildrenSize = 48 - -// rlp returns the raw rlp encoded blob of the cached trie node, either directly -// from the cache, or by regenerating it from the collapsed node. -func (n *cachedNode) rlp() []byte { - if node, ok := n.node.(rawNode); ok { - return node - } - blob, err := rlp.EncodeToBytes(n.node) - if err != nil { - panic(err) - } - return blob + // Close closes the trie database backend and releases all held resources. + Close() error } -// obj returns the decoded and expanded trie node, either directly from the cache, -// or by regenerating it from the rlp encoded blob. -func (n *cachedNode) obj(hash common.Hash) node { - if node, ok := n.node.(rawNode); ok { - return mustDecodeNode(hash[:], node) - } - return expandNode(hash[:], n.node) +// Database is the wrapper of the underlying backend which is shared by different +// types of node backend as an entrypoint. It's responsible for all interactions +// relevant with trie nodes and node preimages. +type Database struct { + config *Config // Configuration for trie database + diskdb ethdb.Database // Persistent database to store the snapshot + preimages *preimageStore // The store for caching preimages + backend backend // The backend for managing trie nodes } -// forChilds invokes the callback for all the tracked children of this node, -// both the implicit ones from inside the node as well as the explicit ones -// from outside the node. -func (n *cachedNode) forChilds(onChild func(hash common.Hash)) { - for child := range n.children { - onChild(child) - } - if _, ok := n.node.(rawNode); !ok { - forGatherChildren(n.node, onChild) +// prepare initializes the database with provided configs, but the +// database backend is still left as nil. +func prepare(diskdb ethdb.Database, config *Config) *Database { + var preimages *preimageStore + if config != nil && config.Preimages { + preimages = newPreimageStore(diskdb) } -} - -// forGatherChildren traverses the node hierarchy of a collapsed storage node and -// invokes the callback for all the hashnode children. -func forGatherChildren(n node, onChild func(hash common.Hash)) { - switch n := n.(type) { - case *rawShortNode: - forGatherChildren(n.Val, onChild) - case rawFullNode: - for i := 0; i < 16; i++ { - forGatherChildren(n[i], onChild) - } - case hashNode: - onChild(common.BytesToHash(n)) - case valueNode, nil, rawNode: - default: - panic(fmt.Sprintf("unknown node type: %T", n)) + return &Database{ + config: config, + diskdb: diskdb, + preimages: preimages, } } -// simplifyNode traverses the hierarchy of an expanded memory node and discards -// all the internal caches, returning a node that only contains the raw data. -func simplifyNode(n node) node { - switch n := n.(type) { - case *shortNode: - // Short nodes discard the flags and cascade - return &rawShortNode{Key: n.Key, Val: simplifyNode(n.Val)} - - case *fullNode: - // Full nodes discard the flags and cascade - node := rawFullNode(n.Children) - for i := 0; i < len(node); i++ { - if node[i] != nil { - node[i] = simplifyNode(node[i]) - } - } - return node - - case valueNode, hashNode, rawNode: - return n - - default: - panic(fmt.Sprintf("unknown node type: %T", n)) - } +// HashDefaults represents a config for using hash-based scheme with +// default settings. +var HashDefaults = &Config{ + Preimages: false, + HashDB: hashdb.Defaults, } -// expandNode traverses the node hierarchy of a collapsed storage node and converts -// all fields and keys into expanded memory form. -func expandNode(hash hashNode, n node) node { - switch n := n.(type) { - case *rawShortNode: - // Short nodes need key and child expansion - return &shortNode{ - Key: compactToHex(n.Key), - Val: expandNode(nil, n.Val), - flags: nodeFlag{ - hash: hash, - }, - } - - case rawFullNode: - // Full nodes need child expansion - node := &fullNode{ - flags: nodeFlag{ - hash: hash, - }, - } - for i := 0; i < len(node.Children); i++ { - if n[i] != nil { - node.Children[i] = expandNode(nil, n[i]) - } - } - return node - - case valueNode, hashNode: - return n - - default: - panic(fmt.Sprintf("unknown node type: %T", n)) +// NewDatabase initializes the trie database with default settings, namely +// the legacy hash-based scheme is used by default. +func NewDatabase(diskdb ethdb.Database, config *Config) *Database { + if config == nil { + config = HashDefaults } -} - -// Config defines all necessary options for database. -type Config struct { - Cache int // Memory allowance (MB) to use for caching trie nodes in memory - Journal string // Journal of clean cache to survive node restarts - Preimages bool // Flag whether the preimage of trie key is recorded -} - -// NewDatabase creates a new trie database to store ephemeral trie content before -// its written out to disk or garbage collected. No read cache is created, so all -// data retrievals will hit the underlying disk database. -func NewDatabase(diskdb ethdb.KeyValueStore) *Database { - return NewDatabaseWithConfig(diskdb, nil) -} - -// NewDatabaseWithConfig creates a new trie database to store ephemeral trie content -// before its written out to disk or garbage collected. It also acts as a read cache -// for nodes loaded from disk. -func NewDatabaseWithConfig(diskdb ethdb.KeyValueStore, config *Config) *Database { - var cleans *fastcache.Cache - if config != nil && config.Cache > 0 { - if config.Journal == "" { - cleans = fastcache.New(config.Cache * 1024 * 1024) - } else { - cleans = fastcache.LoadFromFileOrNew(config.Journal, config.Cache*1024*1024) - } + var preimages *preimageStore + if config.Preimages { + preimages = newPreimageStore(diskdb) } db := &Database{ - diskdb: diskdb, - cleans: cleans, - dirties: map[common.Hash]*cachedNode{{}: { - children: make(map[common.Hash]uint16), - }}, + config: config, + diskdb: diskdb, + preimages: preimages, } - if config == nil || config.Preimages { // TODO(karalabe): Flip to default off in the future - db.preimages = make(map[common.Hash][]byte) + if config.HashDB != nil && config.PathDB != nil { + log.Crit("Both 'hash' and 'path' mode are configured") } - return db -} - -// DiskDB retrieves the persistent storage backing the trie database. -func (db *Database) DiskDB() ethdb.KeyValueStore { - return db.diskdb -} - -// insert inserts a collapsed trie node into the memory database. -// The blob size must be specified to allow proper size tracking. -// All nodes inserted by this function will be reference tracked -// and in theory should only used for **trie nodes** insertion. -func (db *Database) insert(hash common.Hash, size int, node node) { - // If the node's already cached, skip - if _, ok := db.dirties[hash]; ok { - return - } - memcacheDirtyWriteMeter.Mark(int64(size)) - - // Create the cached entry for this node - entry := &cachedNode{ - node: simplifyNode(node), - size: uint16(size), - flushPrev: db.newest, - } - entry.forChilds(func(child common.Hash) { - if c := db.dirties[child]; c != nil { - c.parents++ - } - }) - db.dirties[hash] = entry - - // Update the flush-list endpoints - if db.oldest == (common.Hash{}) { - db.oldest, db.newest = hash, hash + if config.PathDB != nil { + db.backend = pathdb.New(diskdb, config.PathDB) } else { - db.dirties[db.newest].flushNext, db.newest = hash, hash + // Use hashdb by default + db.backend = hashdb.New(diskdb, config.HashDB, mptResolver{}) } - db.dirtiesSize += common.StorageSize(common.HashLength + entry.size) + return db } -// insertPreimage writes a new trie node pre-image to the memory database if it's -// yet unknown. The method will NOT make a copy of the slice, -// only use if the preimage will NOT be changed later on. -// -// Note, this method assumes that the database's lock is held! -func (db *Database) insertPreimage(hash common.Hash, preimage []byte) { - // Short circuit if preimage collection is disabled - if db.preimages == nil { - return +// Reader returns a reader for accessing all trie nodes with provided state root. +// Nil is returned in case the state is not available. +func (db *Database) Reader(blockRoot common.Hash) (Reader, error) { + switch b := db.backend.(type) { + case *hashdb.Database: + return b.Reader(blockRoot) + case *pathdb.Database: + return b.Reader(blockRoot) } - // Track the preimage if a yet unknown one - if _, ok := db.preimages[hash]; ok { - return - } - db.preimages[hash] = preimage - db.preimagesSize += common.StorageSize(common.HashLength + len(preimage)) + return nil, errors.New("unsupported") } -// node retrieves a cached trie node from memory, or returns nil if none can be -// found in the memory cache. -func (db *Database) node(hash common.Hash) node { - // Retrieve the node from the clean cache if available - if db.cleans != nil { - if enc := db.cleans.Get(nil, hash[:]); enc != nil { - memcacheCleanHitMeter.Mark(1) - memcacheCleanReadMeter.Mark(int64(len(enc))) - return mustDecodeNode(hash[:], enc) - } - } - // Retrieve the node from the dirty cache if available - db.lock.RLock() - dirty := db.dirties[hash] - db.lock.RUnlock() - - if dirty != nil { - memcacheDirtyHitMeter.Mark(1) - memcacheDirtyReadMeter.Mark(int64(dirty.size)) - return dirty.obj(hash) +// Update performs a state transition by committing dirty nodes contained in the +// given set in order to update state from the specified parent to the specified +// root. The held pre-images accumulated up to this point will be flushed in case +// the size exceeds the threshold. +func (db *Database) Update(root common.Hash, parent common.Hash, block uint64, nodes *trienode.MergedNodeSet, states *triestate.Set) error { + if db.config != nil && db.config.OnCommit != nil { + db.config.OnCommit(states) } - memcacheDirtyMissMeter.Mark(1) - - // Content unavailable in memory, attempt to retrieve from disk - enc, err := db.diskdb.Get(hash[:]) - if err != nil || enc == nil { - return nil - } - if db.cleans != nil { - db.cleans.Set(hash[:], enc) - memcacheCleanMissMeter.Mark(1) - memcacheCleanWriteMeter.Mark(int64(len(enc))) - } - return mustDecodeNode(hash[:], enc) -} - -// Node retrieves an encoded cached trie node from memory. If it cannot be found -// cached, the method queries the persistent database for the content. -func (db *Database) Node(hash common.Hash) ([]byte, error) { - // It doesn't make sense to retrieve the metaroot - if hash == (common.Hash{}) { - return nil, errors.New("not found") - } - // Retrieve the node from the clean cache if available - if db.cleans != nil { - if enc := db.cleans.Get(nil, hash[:]); enc != nil { - memcacheCleanHitMeter.Mark(1) - memcacheCleanReadMeter.Mark(int64(len(enc))) - return enc, nil - } - } - // Retrieve the node from the dirty cache if available - db.lock.RLock() - dirty := db.dirties[hash] - db.lock.RUnlock() - - if dirty != nil { - memcacheDirtyHitMeter.Mark(1) - memcacheDirtyReadMeter.Mark(int64(dirty.size)) - return dirty.rlp(), nil - } - memcacheDirtyMissMeter.Mark(1) - - // Content unavailable in memory, attempt to retrieve from disk - enc := rawdb.ReadTrieNode(db.diskdb, hash) - if len(enc) != 0 { - if db.cleans != nil { - db.cleans.Set(hash[:], enc) - memcacheCleanMissMeter.Mark(1) - memcacheCleanWriteMeter.Mark(int64(len(enc))) - } - return enc, nil + if db.preimages != nil { + db.preimages.commit(false) } - return nil, errors.New("not found") + return db.backend.Update(root, parent, block, nodes, states) } -// preimage retrieves a cached trie node pre-image from memory. If it cannot be -// found cached, the method queries the persistent database for the content. -func (db *Database) preimage(hash common.Hash) []byte { - // Short circuit if preimage collection is disabled - if db.preimages == nil { - return nil - } - // Retrieve the node from cache if available - db.lock.RLock() - preimage := db.preimages[hash] - db.lock.RUnlock() - - if preimage != nil { - return preimage +// Commit iterates over all the children of a particular node, writes them out +// to disk. As a side effect, all pre-images accumulated up to this point are +// also written. +func (db *Database) Commit(root common.Hash, report bool) error { + if db.preimages != nil { + db.preimages.commit(true) } - return rawdb.ReadPreimage(db.diskdb, hash) + return db.backend.Commit(root, report) } -// Nodes retrieves the hashes of all the nodes cached within the memory database. -// This method is extremely expensive and should only be used to validate internal -// states in test code. -func (db *Database) Nodes() []common.Hash { - db.lock.RLock() - defer db.lock.RUnlock() - - var hashes = make([]common.Hash, 0, len(db.dirties)) - for hash := range db.dirties { - if hash != (common.Hash{}) { // Special case for "root" references/nodes - hashes = append(hashes, hash) - } +// Size returns the storage size of dirty trie nodes in front of the persistent +// database and the size of cached preimages. +func (db *Database) Size() (common.StorageSize, common.StorageSize) { + var ( + storages common.StorageSize + preimages common.StorageSize + ) + storages = db.backend.Size() + if db.preimages != nil { + preimages = db.preimages.size() } - return hashes + return storages, preimages } -// Reference adds a new reference from a parent node to a child node. -// This function is used to add reference between internal trie node -// and external node(e.g. storage trie root), all internal trie nodes -// are referenced together by database itself. -func (db *Database) Reference(child common.Hash, parent common.Hash) { - db.lock.Lock() - defer db.lock.Unlock() - - db.reference(child, parent) +// Initialized returns an indicator if the state data is already initialized +// according to the state scheme. +func (db *Database) Initialized(genesisRoot common.Hash) bool { + return db.backend.Initialized(genesisRoot) } -// reference is the private locked version of Reference. -func (db *Database) reference(child common.Hash, parent common.Hash) { - // If the node does not exist, it's a node pulled from disk, skip - node, ok := db.dirties[child] - if !ok { - return - } - // If the reference already exists, only duplicate for roots - if db.dirties[parent].children == nil { - db.dirties[parent].children = make(map[common.Hash]uint16) - db.childrenSize += cachedNodeChildrenSize - } else if _, ok = db.dirties[parent].children[child]; ok && parent != (common.Hash{}) { - return - } - node.parents++ - db.dirties[parent].children[child]++ - if db.dirties[parent].children[child] == 1 { - db.childrenSize += common.HashLength + 2 // uint16 counter - } +// Scheme returns the node scheme used in the database. +func (db *Database) Scheme() string { + return db.backend.Scheme() } -// Dereference removes an existing reference from a root node. -func (db *Database) Dereference(root common.Hash) { - // Sanity check to ensure that the meta-root is not removed - if root == (common.Hash{}) { - log.Error("Attempted to dereference the trie cache meta root") - return - } - db.lock.Lock() - defer db.lock.Unlock() - - nodes, storage, start := len(db.dirties), db.dirtiesSize, time.Now() - db.dereference(root, common.Hash{}) - - db.gcnodes += uint64(nodes - len(db.dirties)) - db.gcsize += storage - db.dirtiesSize - db.gctime += time.Since(start) - - memcacheGCTimeTimer.Update(time.Since(start)) - memcacheGCSizeMeter.Mark(int64(storage - db.dirtiesSize)) - memcacheGCNodesMeter.Mark(int64(nodes - len(db.dirties))) - - log.Debug("Dereferenced trie from memory database", "nodes", nodes-len(db.dirties), "size", storage-db.dirtiesSize, "time", time.Since(start), - "gcnodes", db.gcnodes, "gcsize", db.gcsize, "gctime", db.gctime, "livenodes", len(db.dirties), "livesize", db.dirtiesSize) +// DiskDB retrieves the persistent storage backing the trie database. +func (db *Database) DiskDB() ethdb.KeyValueStore { + return db.backend.DiskDB() } -// dereference is the private locked version of Dereference. -func (db *Database) dereference(child common.Hash, parent common.Hash) { - // Dereference the parent-child - node := db.dirties[parent] - - if node.children != nil && node.children[child] > 0 { - node.children[child]-- - if node.children[child] == 0 { - delete(node.children, child) - db.childrenSize -= (common.HashLength + 2) // uint16 counter - } - } - // If the child does not exist, it's a previously committed node. - node, ok := db.dirties[child] - if !ok { - return - } - // If there are no more references to the child, delete it and cascade - if node.parents > 0 { - // This is a special cornercase where a node loaded from disk (i.e. not in the - // memcache any more) gets reinjected as a new node (short node split into full, - // then reverted into short), causing a cached node to have no parents. That is - // no problem in itself, but don't make maxint parents out of it. - node.parents-- - } - if node.parents == 0 { - // Remove the node from the flush-list - switch child { - case db.oldest: - db.oldest = node.flushNext - db.dirties[node.flushNext].flushPrev = common.Hash{} - case db.newest: - db.newest = node.flushPrev - db.dirties[node.flushPrev].flushNext = common.Hash{} - default: - db.dirties[node.flushPrev].flushNext = node.flushNext - db.dirties[node.flushNext].flushPrev = node.flushPrev - } - // Dereference all children and delete the node - node.forChilds(func(hash common.Hash) { - db.dereference(hash, child) - }) - delete(db.dirties, child) - db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size)) - if node.children != nil { - db.childrenSize -= cachedNodeChildrenSize - } +// Close flushes the dangling preimages to disk and closes the trie database. +// It is meant to be called when closing the blockchain object, so that all +// resources held can be released correctly. +func (db *Database) Close() error { + if db.preimages != nil { + db.preimages.commit(true) } + return db.backend.Close() } // Cap iteratively flushes old but still referenced trie nodes until the total -// memory usage goes below the given threshold. +// memory usage goes below the given threshold. The held pre-images accumulated +// up to this point will be flushed in case the size exceeds the threshold. // -// Note, this method is a non-synchronized mutator. It is unsafe to call this -// concurrently with other mutators. +// It's only supported by hash-based database and will return an error for others. func (db *Database) Cap(limit common.StorageSize) error { - // Create a database batch to flush persistent data out. It is important that - // outside code doesn't see an inconsistent state (referenced data removed from - // memory cache during commit but not yet in persistent storage). This is ensured - // by only uncaching existing data when the database write finalizes. - nodes, storage, start := len(db.dirties), db.dirtiesSize, time.Now() - batch := db.diskdb.NewBatch() - - // db.dirtiesSize only contains the useful data in the cache, but when reporting - // the total memory consumption, the maintenance metadata is also needed to be - // counted. - size := db.dirtiesSize + common.StorageSize((len(db.dirties)-1)*cachedNodeSize) - size += db.childrenSize - common.StorageSize(len(db.dirties[common.Hash{}].children)*(common.HashLength+2)) - - // If the preimage cache got large enough, push to disk. If it's still small - // leave for later to deduplicate writes. - flushPreimages := db.preimagesSize > 4*1024*1024 - if flushPreimages { - if db.preimages == nil { - log.Error("Attempted to write preimages whilst disabled") - } else { - rawdb.WritePreimages(batch, db.preimages) - if batch.ValueSize() > ethdb.IdealBatchSize { - if err := batch.Write(); err != nil { - return err - } - batch.Reset() - } - } - } - // Keep committing nodes from the flush-list until we're below allowance - oldest := db.oldest - for size > limit && oldest != (common.Hash{}) { - // Fetch the oldest referenced node and push into the batch - node := db.dirties[oldest] - rawdb.WriteTrieNode(batch, oldest, node.rlp()) - - // If we exceeded the ideal batch size, commit and reset - if batch.ValueSize() >= ethdb.IdealBatchSize { - if err := batch.Write(); err != nil { - log.Error("Failed to write flush list to disk", "err", err) - return err - } - batch.Reset() - } - // Iterate to the next flush item, or abort if the size cap was achieved. Size - // is the total size, including the useful cached data (hash -> blob), the - // cache item metadata, as well as external children mappings. - size -= common.StorageSize(common.HashLength + int(node.size) + cachedNodeSize) - if node.children != nil { - size -= common.StorageSize(cachedNodeChildrenSize + len(node.children)*(common.HashLength+2)) - } - oldest = node.flushNext - } - // Flush out any remainder data from the last batch - if err := batch.Write(); err != nil { - log.Error("Failed to write flush list to disk", "err", err) - return err - } - // Write successful, clear out the flushed data - db.lock.Lock() - defer db.lock.Unlock() - - if flushPreimages { - if db.preimages == nil { - log.Error("Attempted to reset preimage cache whilst disabled") - } else { - db.preimages, db.preimagesSize = make(map[common.Hash][]byte), 0 - } - } - for db.oldest != oldest { - node := db.dirties[db.oldest] - delete(db.dirties, db.oldest) - db.oldest = node.flushNext - - db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size)) - if node.children != nil { - db.childrenSize -= common.StorageSize(cachedNodeChildrenSize + len(node.children)*(common.HashLength+2)) - } + hdb, ok := db.backend.(*hashdb.Database) + if !ok { + return errors.New("not supported") } - if db.oldest != (common.Hash{}) { - db.dirties[db.oldest].flushPrev = common.Hash{} + if db.preimages != nil { + db.preimages.commit(false) } - db.flushnodes += uint64(nodes - len(db.dirties)) - db.flushsize += storage - db.dirtiesSize - db.flushtime += time.Since(start) - - memcacheFlushTimeTimer.Update(time.Since(start)) - memcacheFlushSizeMeter.Mark(int64(storage - db.dirtiesSize)) - memcacheFlushNodesMeter.Mark(int64(nodes - len(db.dirties))) - - log.Debug("Persisted nodes from memory database", "nodes", nodes-len(db.dirties), "size", storage-db.dirtiesSize, "time", time.Since(start), - "flushnodes", db.flushnodes, "flushsize", db.flushsize, "flushtime", db.flushtime, "livenodes", len(db.dirties), "livesize", db.dirtiesSize) - - return nil + return hdb.Cap(limit) } -// Commit iterates over all the children of a particular node, writes them out -// to disk, forcefully tearing down all references in both directions. As a side -// effect, all pre-images accumulated up to this point are also written. +// Reference adds a new reference from a parent node to a child node. This function +// is used to add reference between internal trie node and external node(e.g. storage +// trie root), all internal trie nodes are referenced together by database itself. // -// Note, this method is a non-synchronized mutator. It is unsafe to call this -// concurrently with other mutators. -func (db *Database) Commit(node common.Hash, report bool, callback func(common.Hash)) error { - // Create a database batch to flush persistent data out. It is important that - // outside code doesn't see an inconsistent state (referenced data removed from - // memory cache during commit but not yet in persistent storage). This is ensured - // by only uncaching existing data when the database write finalizes. - start := time.Now() - batch := db.diskdb.NewBatch() - - // Move all of the accumulated preimages into a write batch - if db.preimages != nil { - rawdb.WritePreimages(batch, db.preimages) - // Since we're going to replay trie node writes into the clean cache, flush out - // any batched pre-images before continuing. - if err := batch.Write(); err != nil { - return err - } - batch.Reset() - } - // Move the trie itself into the batch, flushing if enough data is accumulated - nodes, storage := len(db.dirties), db.dirtiesSize - - uncacher := &cleaner{db} - if err := db.commit(node, batch, uncacher, callback); err != nil { - log.Error("Failed to commit trie from trie database", "err", err) - return err - } - // Trie mostly committed to disk, flush any batch leftovers - if err := batch.Write(); err != nil { - log.Error("Failed to write trie to disk", "err", err) - return err - } - // Uncache any leftovers in the last batch - db.lock.Lock() - defer db.lock.Unlock() - - batch.Replay(uncacher) - batch.Reset() - - // Reset the storage counters and bumped metrics - if db.preimages != nil { - db.preimages, db.preimagesSize = make(map[common.Hash][]byte), 0 - } - memcacheCommitTimeTimer.Update(time.Since(start)) - memcacheCommitSizeMeter.Mark(int64(storage - db.dirtiesSize)) - memcacheCommitNodesMeter.Mark(int64(nodes - len(db.dirties))) - - logger := log.Info - if !report { - logger = log.Debug +// It's only supported by hash-based database and will return an error for others. +func (db *Database) Reference(root common.Hash, parent common.Hash) error { + hdb, ok := db.backend.(*hashdb.Database) + if !ok { + return errors.New("not supported") } - logger("Persisted trie from memory database", "nodes", nodes-len(db.dirties)+int(db.flushnodes), "size", storage-db.dirtiesSize+db.flushsize, "time", time.Since(start)+db.flushtime, - "gcnodes", db.gcnodes, "gcsize", db.gcsize, "gctime", db.gctime, "livenodes", len(db.dirties), "livesize", db.dirtiesSize) - - // Reset the garbage collection statistics - db.gcnodes, db.gcsize, db.gctime = 0, 0, 0 - db.flushnodes, db.flushsize, db.flushtime = 0, 0, 0 - + hdb.Reference(root, parent) return nil } -// commit is the private locked version of Commit. -func (db *Database) commit(hash common.Hash, batch ethdb.Batch, uncacher *cleaner, callback func(common.Hash)) error { - // If the node does not exist, it's a previously committed node - node, ok := db.dirties[hash] +// Dereference removes an existing reference from a root node. It's only +// supported by hash-based database and will return an error for others. +func (db *Database) Dereference(root common.Hash) error { + hdb, ok := db.backend.(*hashdb.Database) if !ok { - return nil - } - var err error - node.forChilds(func(child common.Hash) { - if err == nil { - err = db.commit(child, batch, uncacher, callback) - } - }) - if err != nil { - return err - } - // If we've reached an optimal batch size, commit and start over - rawdb.WriteTrieNode(batch, hash, node.rlp()) - if callback != nil { - callback(hash) - } - if batch.ValueSize() >= ethdb.IdealBatchSize { - if err := batch.Write(); err != nil { - return err - } - db.lock.Lock() - batch.Replay(uncacher) - batch.Reset() - db.lock.Unlock() + return errors.New("not supported") } + hdb.Dereference(root) return nil } -// cleaner is a database batch replayer that takes a batch of write operations -// and cleans up the trie database from anything written to disk. -type cleaner struct { - db *Database -} - -// Put reacts to database writes and implements dirty data uncaching. This is the -// post-processing step of a commit operation where the already persisted trie is -// removed from the dirty cache and moved into the clean cache. The reason behind -// the two-phase commit is to ensure data availability while moving from memory -// to disk. -func (c *cleaner) Put(key []byte, rlp []byte) error { - hash := common.BytesToHash(key) - - // If the node does not exist, we're done on this path - node, ok := c.db.dirties[hash] +// Node retrieves the rlp-encoded node blob with provided node hash. It's +// only supported by hash-based database and will return an error for others. +// Note, this function should be deprecated once ETH66 is deprecated. +func (db *Database) Node(hash common.Hash) ([]byte, error) { + hdb, ok := db.backend.(*hashdb.Database) if !ok { - return nil + return nil, errors.New("not supported") } - // Node still exists, remove it from the flush-list - switch hash { - case c.db.oldest: - c.db.oldest = node.flushNext - c.db.dirties[node.flushNext].flushPrev = common.Hash{} - case c.db.newest: - c.db.newest = node.flushPrev - c.db.dirties[node.flushPrev].flushNext = common.Hash{} - default: - c.db.dirties[node.flushPrev].flushNext = node.flushNext - c.db.dirties[node.flushNext].flushPrev = node.flushPrev - } - // Remove the node from the dirty cache - delete(c.db.dirties, hash) - c.db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size)) - if node.children != nil { - c.db.dirtiesSize -= common.StorageSize(cachedNodeChildrenSize + len(node.children)*(common.HashLength+2)) - } - // Move the flushed node into the clean cache to prevent insta-reloads - if c.db.cleans != nil { - c.db.cleans.Set(hash[:], rlp) - memcacheCleanWriteMeter.Mark(int64(len(rlp))) - } - return nil + return hdb.Node(hash) } -func (c *cleaner) Delete(key []byte) error { - panic("not implemented") +// Recover rollbacks the database to a specified historical point. The state is +// supported as the rollback destination only if it's canonical state and the +// corresponding trie histories are existent. It's only supported by path-based +// database and will return an error for others. +func (db *Database) Recover(target common.Hash) error { + pdb, ok := db.backend.(*pathdb.Database) + if !ok { + return errors.New("not supported") + } + return pdb.Recover(target, &trieLoader{db: db}) } -// Size returns the current storage size of the memory cache in front of the -// persistent database layer. -func (db *Database) Size() (common.StorageSize, common.StorageSize) { - db.lock.RLock() - defer db.lock.RUnlock() - - // db.dirtiesSize only contains the useful data in the cache, but when reporting - // the total memory consumption, the maintenance metadata is also needed to be - // counted. - var metadataSize = common.StorageSize((len(db.dirties) - 1) * cachedNodeSize) - var metarootRefs = common.StorageSize(len(db.dirties[common.Hash{}].children) * (common.HashLength + 2)) - return db.dirtiesSize + db.childrenSize + metadataSize - metarootRefs, db.preimagesSize +// Recoverable returns the indicator if the specified state is enabled to be +// recovered. It's only supported by path-based database and will return an +// error for others. +func (db *Database) Recoverable(root common.Hash) (bool, error) { + pdb, ok := db.backend.(*pathdb.Database) + if !ok { + return false, errors.New("not supported") + } + return pdb.Recoverable(root), nil } -// saveCache saves clean state cache to given directory path -// using specified CPU cores. -func (db *Database) saveCache(dir string, threads int) error { - if db.cleans == nil { - return nil +// Disable deactivates the database and invalidates all available state layers +// as stale to prevent access to the persistent state, which is in the syncing +// stage. +// +// It's only supported by path-based database and will return an error for others. +func (db *Database) Disable() error { + pdb, ok := db.backend.(*pathdb.Database) + if !ok { + return errors.New("not supported") } - log.Info("Writing clean trie cache to disk", "path", dir, "threads", threads) + return pdb.Disable() +} - start := time.Now() - err := db.cleans.SaveToFileConcurrent(dir, threads) - if err != nil { - log.Error("Failed to persist clean trie cache", "error", err) - return err +// Enable activates database and resets the state tree with the provided persistent +// state root once the state sync is finished. +func (db *Database) Enable(root common.Hash) error { + pdb, ok := db.backend.(*pathdb.Database) + if !ok { + return errors.New("not supported") } - log.Info("Persisted the clean trie cache", "path", dir, "elapsed", common.PrettyDuration(time.Since(start))) - return nil + return pdb.Enable(root) } -// SaveCache atomically saves fast cache data to the given dir using all -// available CPU cores. -func (db *Database) SaveCache(dir string) error { - return db.saveCache(dir, runtime.GOMAXPROCS(0)) +// Journal commits an entire diff hierarchy to disk into a single journal entry. +// This is meant to be used during shutdown to persist the snapshot without +// flattening everything down (bad for reorgs). It's only supported by path-based +// database and will return an error for others. +func (db *Database) Journal(root common.Hash) error { + pdb, ok := db.backend.(*pathdb.Database) + if !ok { + return errors.New("not supported") + } + return pdb.Journal(root) } -// SaveCachePeriodically atomically saves fast cache data to the given dir with -// the specified interval. All dump operation will only use a single CPU core. -func (db *Database) SaveCachePeriodically(dir string, interval time.Duration, stopCh <-chan struct{}) { - ticker := time.NewTicker(interval) - defer ticker.Stop() - - for { - select { - case <-ticker.C: - db.saveCache(dir, 1) - case <-stopCh: - return - } +// SetBufferSize sets the node buffer size to the provided value(in bytes). +// It's only supported by path-based database and will return an error for +// others. +func (db *Database) SetBufferSize(size int) error { + pdb, ok := db.backend.(*pathdb.Database) + if !ok { + return errors.New("not supported") } + return pdb.SetBufferSize(size) } diff --git a/trie/database_test.go b/trie/database_test.go index 81c469500f..e456bf7177 100644 --- a/trie/database_test.go +++ b/trie/database_test.go @@ -17,17 +17,20 @@ package trie import ( - "testing" - - "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/ethdb/memorydb" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/trie/triedb/hashdb" + "github.com/ethereum/go-ethereum/trie/triedb/pathdb" ) -// Tests that the trie database returns a missing trie node error if attempting -// to retrieve the meta root. -func TestDatabaseMetarootFetch(t *testing.T) { - db := NewDatabase(memorydb.New()) - if _, err := db.Node(common.Hash{}); err == nil { - t.Fatalf("metaroot retrieval succeeded") +// newTestDatabase initializes the trie database with specified scheme. + +func newTestDatabase(diskdb ethdb.Database, scheme string) *Database { + db := prepare(diskdb, nil) + if scheme == rawdb.HashScheme { + db.backend = hashdb.New(diskdb, &hashdb.Config{}, mptResolver{}) + } else { + db.backend = pathdb.New(diskdb, &pathdb.Config{}) // disable clean/dirty cache } + return db } diff --git a/trie/encoding_test.go b/trie/encoding_test.go index 16393313f7..e8fe4f3c6b 100644 --- a/trie/encoding_test.go +++ b/trie/encoding_test.go @@ -78,17 +78,17 @@ func TestHexKeybytes(t *testing.T) { } func TestHexToCompactInPlace(t *testing.T) { - for i, keyS := range []string{ + for i, key := range []string{ "00", "060a040c0f000a090b040803010801010900080d090a0a0d0903000b10", "10", } { - hexBytes, _ := hex.DecodeString(keyS) + hexBytes, _ := hex.DecodeString(key) exp := hexToCompact(hexBytes) sz := hexToCompactInPlace(hexBytes) got := hexBytes[:sz] if !bytes.Equal(exp, got) { - t.Fatalf("test %d: encoding err\ninp %v\ngot %x\nexp %x\n", i, keyS, got, exp) + t.Fatalf("test %d: encoding err\ninp %v\ngot %x\nexp %x\n", i, key, got, exp) } } } diff --git a/trie/errors.go b/trie/errors.go index 567b80078c..f614dd30ad 100644 --- a/trie/errors.go +++ b/trie/errors.go @@ -17,19 +17,36 @@ package trie import ( + "errors" "fmt" "github.com/ethereum/go-ethereum/common" ) +// ErrCommitted is returned when a already committed trie is requested for usage. +// The potential usages can be `Get`, `Update`, `Delete`, `NodeIterator`, `Prove` +// and so on. +var ErrCommitted = errors.New("trie is already committed") + // MissingNodeError is returned by the trie functions (TryGet, TryUpdate, TryDelete) // in the case where a trie node is not present in the local database. It contains // information necessary for retrieving the missing node. type MissingNodeError struct { + Owner common.Hash // owner of the trie if it's 2-layered trie NodeHash common.Hash // hash of the missing node Path []byte // hex-encoded path to the missing node + err error // concrete error for missing trie node +} + +// Unwrap returns the concrete error for missing trie node which +// allows us for further analysis outside. +func (err *MissingNodeError) Unwrap() error { + return err.err } func (err *MissingNodeError) Error() string { - return fmt.Sprintf("missing trie node %x (path %x)", err.NodeHash, err.Path) + if err.Owner == (common.Hash{}) { + return fmt.Sprintf("missing trie node %x (path %x) %v", err.NodeHash, err.Path, err.err) + } + return fmt.Sprintf("missing trie node %x (owner %x) (path %x) %v", err.NodeHash, err.Owner, err.Path, err.err) } diff --git a/trie/hasher.go b/trie/hasher.go index 3a62a2f119..7f0748c13d 100644 --- a/trie/hasher.go +++ b/trie/hasher.go @@ -24,22 +24,12 @@ import ( "golang.org/x/crypto/sha3" ) -type sliceBuffer []byte - -func (b *sliceBuffer) Write(data []byte) (n int, err error) { - *b = append(*b, data...) - return len(data), nil -} - -func (b *sliceBuffer) Reset() { - *b = (*b)[:0] -} - // hasher is a type used for the trie Hash operation. A hasher has some // internal preallocated temp space type hasher struct { sha crypto.KeccakState - tmp sliceBuffer + tmp []byte + encbuf rlp.EncoderBuffer parallel bool // Whether to use paralallel threads when hashing } @@ -47,8 +37,9 @@ type hasher struct { var hasherPool = sync.Pool{ New: func() interface{} { return &hasher{ - tmp: make(sliceBuffer, 0, 550), // cap is as large as a full fullNode. - sha: sha3.NewLegacyKeccak256().(crypto.KeccakState), + tmp: make([]byte, 0, 550), // cap is as large as a full fullNode. + sha: sha3.NewLegacyKeccak256().(crypto.KeccakState), + encbuf: rlp.NewEncoderBuffer(nil), } }, } @@ -153,30 +144,41 @@ func (h *hasher) hashFullNodeChildren(n *fullNode) (collapsed *fullNode, cached // into compact form for RLP encoding. // If the rlp data is smaller than 32 bytes, `nil` is returned. func (h *hasher) shortnodeToHash(n *shortNode, force bool) node { - h.tmp.Reset() - if err := rlp.Encode(&h.tmp, n); err != nil { - panic("encode error: " + err.Error()) - } + n.encode(h.encbuf) + enc := h.encodedBytes() - if len(h.tmp) < 32 && !force { + if len(enc) < 32 && !force { return n // Nodes smaller than 32 bytes are stored inside their parent } - return h.hashData(h.tmp) + return h.hashData(enc) } // shortnodeToHash is used to creates a hashNode from a set of hashNodes, (which // may contain nil values) func (h *hasher) fullnodeToHash(n *fullNode, force bool) node { - h.tmp.Reset() - // Generate the RLP encoding of the node - if err := n.EncodeRLP(&h.tmp); err != nil { - panic("encode error: " + err.Error()) - } + n.encode(h.encbuf) + enc := h.encodedBytes() - if len(h.tmp) < 32 && !force { + if len(enc) < 32 && !force { return n // Nodes smaller than 32 bytes are stored inside their parent } - return h.hashData(h.tmp) + return h.hashData(enc) +} + +// encodedBytes returns the result of the last encoding operation on h.encbuf. +// This also resets the encoder buffer. +// +// All node encoding must be done like this: +// +// node.encode(h.encbuf) +// enc := h.encodedBytes() +// +// This convention exists because node.encode can only be inlined/escape-analyzed when +// called on a concrete receiver type. +func (h *hasher) encodedBytes() []byte { + h.tmp = h.encbuf.AppendToBytes(h.tmp[:0]) + h.encbuf.Reset(nil) + return h.tmp } // hashData hashes the provided data diff --git a/trie/iterator.go b/trie/iterator.go index 81316c4551..92f64a4c07 100644 --- a/trie/iterator.go +++ b/trie/iterator.go @@ -22,10 +22,15 @@ import ( "errors" "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/ethdb" - "github.com/ethereum/go-ethereum/rlp" ) +// NodeResolver is used for looking up trie nodes before reaching into the real +// persistent layer. This is not mandatory, rather is an optimization for cases +// where trie nodes can be recovered from some external mechanism without reading +// from disk. In those cases, this resolver allows short circuiting accesses and +// returning them from memory. (It should be supported for multiple schemes in iterator) +type NodeResolver func(owner common.Hash, path []byte, hash common.Hash) []byte + // Iterator is a key-value trie iterator that traverses a Trie. type Iterator struct { nodeIt NodeIterator @@ -86,6 +91,10 @@ type NodeIterator interface { // For leaf nodes, the last element of the path is the 'terminator symbol' 0x10. Path() []byte + // NodeBlob returns the rlp-encoded value of the current iterated node. + // If the node is an embedded node in its parent, nil is returned then. + NodeBlob() []byte + // Leaf returns true iff the current node is a leaf node. Leaf() bool @@ -104,8 +113,8 @@ type NodeIterator interface { // to the value after calling Next. LeafProof() [][]byte - // AddResolver sets an intermediate database to use for looking up trie nodes - // before reaching into the real persistent layer. + // AddResolver sets a node resolver to use for looking up trie nodes before + // reaching into the real persistent layer. // // This is not required for normal operation, rather is an optimization for // cases where trie nodes can be recovered from some external mechanism without @@ -115,7 +124,7 @@ type NodeIterator interface { // Before adding a similar mechanism to any other place in Geth, consider // making trie.Database an interface and wrapping at that level. It's a huge // refactor, but it could be worth it if another occurrence arises. - AddResolver(ethdb.KeyValueStore) + AddResolver(NodeResolver) } // nodeIteratorState represents the iteration state at one particular node of the @@ -134,7 +143,7 @@ type nodeIterator struct { path []byte // Path to the current node err error // Failure set in case of an internal error in the iterator - resolver ethdb.KeyValueStore // Optional intermediate resolver above the disk layer + resolver NodeResolver // optional node resolver for avoiding disk hits } // errIteratorEnd is stored in nodeIterator.err when iteration is done. @@ -162,7 +171,7 @@ func newNodeIterator(trie *Trie, start []byte) NodeIterator { return it } -func (it *nodeIterator) AddResolver(resolver ethdb.KeyValueStore) { +func (it *nodeIterator) AddResolver(resolver NodeResolver) { it.resolver = resolver } @@ -213,8 +222,7 @@ func (it *nodeIterator) LeafProof() [][]byte { // Gather nodes that end up as hash nodes (or the root) node, hashed := hasher.proofHash(item.node) if _, ok := hashed.(hashNode); ok || i == 0 { - enc, _ := rlp.EncodeToBytes(node) - proofs = append(proofs, enc) + proofs = append(proofs, nodeToBytes(node)) } } return proofs @@ -227,6 +235,18 @@ func (it *nodeIterator) Path() []byte { return it.path } +func (it *nodeIterator) NodeBlob() []byte { + if it.Hash() == (common.Hash{}) { + return nil // skip the non-standalone node + } + blob, err := it.resolveBlob(it.Hash().Bytes(), it.Path()) + if err != nil { + it.err = err + return nil + } + return blob +} + func (it *nodeIterator) Error() error { if it.err == errIteratorEnd { return nil @@ -355,14 +375,37 @@ func (it *nodeIterator) peekSeek(seekKey []byte) (*nodeIteratorState, *int, []by func (it *nodeIterator) resolveHash(hash hashNode, path []byte) (node, error) { if it.resolver != nil { - if blob, err := it.resolver.Get(hash); err == nil && len(blob) > 0 { + // Support hash/path from memory. + if blob := it.resolver(it.trie.owner, path, common.BytesToHash(hash)); len(blob) > 0 { if resolved, err := decodeNode(hash, blob); err == nil { return resolved, nil } } } - resolved, err := it.trie.resolveHash(hash, path) - return resolved, err + + // Retrieve the specified node from the underlying node reader. + blob, err := it.trie.reader.node(path, common.BytesToHash(hash)) + if err != nil { + return nil, err + } + // The raw-blob format nodes are loaded either from the + // clean cache or the database, they are all in their own + // copy and safe to use unsafe decoder. + return mustDecodeNode(hash, blob), nil +} + +func (it *nodeIterator) resolveBlob(hash hashNode, path []byte) ([]byte, error) { + if it.resolver != nil { + if blob := it.resolver(it.trie.owner, path, common.BytesToHash(hash)); len(blob) > 0 { + return blob, nil + } + } + // Retrieve the specified node from the underlying node reader. + // it.trie.resolveAndTrack is not used since in that function the + // loaded blob will be tracked, while it's not required here since + // all loaded nodes won't be linked to trie at all and track nodes + // may lead to out-of-memory issue. + return it.trie.reader.node(path, common.BytesToHash(hash)) } func (st *nodeIteratorState) resolve(it *nodeIterator, path []byte) error { @@ -553,7 +596,11 @@ func (it *differenceIterator) Path() []byte { return it.b.Path() } -func (it *differenceIterator) AddResolver(resolver ethdb.KeyValueStore) { +func (it *differenceIterator) NodeBlob() []byte { + return it.b.NodeBlob() +} + +func (it *differenceIterator) AddResolver(resolver NodeResolver) { panic("not implemented") } @@ -664,7 +711,11 @@ func (it *unionIterator) Path() []byte { return (*it.items)[0].Path() } -func (it *unionIterator) AddResolver(resolver ethdb.KeyValueStore) { +func (it *unionIterator) NodeBlob() []byte { + return (*it.items)[0].NodeBlob() +} + +func (it *unionIterator) AddResolver(resolver NodeResolver) { panic("not implemented") } diff --git a/trie/iterator_test.go b/trie/iterator_test.go index 1f984c0f4b..f173e3d45b 100644 --- a/trie/iterator_test.go +++ b/trie/iterator_test.go @@ -24,14 +24,17 @@ import ( "testing" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/ethdb/memorydb" + "github.com/ethereum/go-ethereum/trie/trienode" ) func TestEmptyIterator(t *testing.T) { - trie := newEmpty() - iter := trie.NodeIterator(nil) + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) + iter := trie.MustNodeIterator(nil) seen := make(map[string]struct{}) for iter.Next(true) { @@ -43,7 +46,8 @@ func TestEmptyIterator(t *testing.T) { } func TestIterator(t *testing.T) { - trie := newEmpty() + db := NewDatabase(rawdb.NewMemoryDatabase(), nil) + trie := NewEmpty(db) vals := []struct{ k, v string }{ {"do", "verb"}, {"ether", "wookiedoo"}, @@ -58,10 +62,14 @@ func TestIterator(t *testing.T) { all[val.k] = val.v trie.Update([]byte(val.k), []byte(val.v)) } - trie.Commit(nil) - + root, nodes, err := trie.Commit(false) + if err != nil { + t.Fatalf("Failed to commit trie %v", err) + } + db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) + trie, _ = New(TrieID(root), db) found := make(map[string]string) - it := NewIterator(trie.NodeIterator(nil)) + it := NewIterator(trie.MustNodeIterator(nil)) for it.Next() { found[string(it.Key)] = string(it.Value) } @@ -78,8 +86,12 @@ type kv struct { t bool } +func (k *kv) cmp(other *kv) int { + return bytes.Compare(k.k, other.k) +} + func TestIteratorLargeData(t *testing.T) { - trie := newEmpty() + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) vals := make(map[string]*kv) for i := byte(0); i < 255; i++ { @@ -91,7 +103,7 @@ func TestIteratorLargeData(t *testing.T) { vals[string(value2.k)] = value2 } - it := NewIterator(trie.NodeIterator(nil)) + it := NewIterator(trie.MustNodeIterator(nil)) for it.Next() { vals[string(it.Key)].t = true } @@ -111,39 +123,66 @@ func TestIteratorLargeData(t *testing.T) { } } -// Tests that the node iterator indeed walks over the entire database contents. +type iterationElement struct { + hash common.Hash + path []byte + blob []byte +} + func TestNodeIteratorCoverage(t *testing.T) { + + testNodeIteratorCoverage(t, rawdb.HashScheme) + testNodeIteratorCoverage(t, rawdb.PathScheme) +} + +func testNodeIteratorCoverage(t *testing.T, scheme string) { // Create some arbitrary test trie to iterate - db, trie, _ := makeTestTrie() + db, nodeDb, trie, _ := makeTestTrie(scheme) // Gather all the node hashes found by the iterator - hashes := make(map[common.Hash]struct{}) - for it := trie.NodeIterator(nil); it.Next(true); { + var elements = make(map[common.Hash]iterationElement) + for it := trie.MustNodeIterator(nil); it.Next(true); { if it.Hash() != (common.Hash{}) { - hashes[it.Hash()] = struct{}{} + elements[it.Hash()] = iterationElement{ + hash: it.Hash(), + path: common.CopyBytes(it.Path()), + blob: common.CopyBytes(it.NodeBlob()), + } } } // Cross check the hashes and the database itself - for hash := range hashes { - if _, err := db.Node(hash); err != nil { - t.Errorf("failed to retrieve reported node %x: %v", hash, err) + for _, element := range elements { + reader, err := nodeDb.Reader(trie.Hash()) + if err != nil { + t.Errorf("failed to retrieve reader %v", err) } - } - for hash, obj := range db.dirties { - if obj != nil && hash != (common.Hash{}) { - if _, ok := hashes[hash]; !ok { - t.Errorf("state entry not reported %x", hash) - } + + if blob, err := reader.Node(common.Hash{}, element.path, element.hash); err != nil { + t.Errorf("failed to retrieve reported node %x: %v", element.hash, err) + } else if !bytes.Equal(blob, element.blob) { + t.Errorf("node blob is different, want %v got %v", element.blob, blob) } } - it := db.diskdb.NewIterator(nil, nil) + var ( + count int + it = db.NewIterator(nil, nil) + ) for it.Next() { - key := it.Key() - if _, ok := hashes[common.BytesToHash(key)]; !ok { - t.Errorf("state entry not reported %x", key) + res, _, _ := isTrieNode(nodeDb.Scheme(), it.Key(), it.Value()) + if !res { + continue + } + count += 1 + if elem, ok := elements[crypto.Keccak256Hash(it.Value())]; !ok { + t.Error("state entry not reported") + } else if !bytes.Equal(it.Value(), elem.blob) { + t.Errorf("node blob is different, want %v got %v", elem.blob, it.Value()) } } it.Release() + if count != len(elements) { + t.Errorf("state entry is mismatched %d %d", count, len(elements)) + } } type kvs struct{ k, v string } @@ -172,25 +211,25 @@ var testdata2 = []kvs{ } func TestIteratorSeek(t *testing.T) { - trie := newEmpty() + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) for _, val := range testdata1 { trie.Update([]byte(val.k), []byte(val.v)) } // Seek to the middle. - it := NewIterator(trie.NodeIterator([]byte("fab"))) + it := NewIterator(trie.MustNodeIterator([]byte("fab"))) if err := checkIteratorOrder(testdata1[4:], it); err != nil { t.Fatal(err) } // Seek to a non-existent key. - it = NewIterator(trie.NodeIterator([]byte("barc"))) + it = NewIterator(trie.MustNodeIterator([]byte("barc"))) if err := checkIteratorOrder(testdata1[1:], it); err != nil { t.Fatal(err) } // Seek beyond the end. - it = NewIterator(trie.NodeIterator([]byte("z"))) + it = NewIterator(trie.MustNodeIterator([]byte("z"))) if err := checkIteratorOrder(nil, it); err != nil { t.Fatal(err) } @@ -213,20 +252,26 @@ func checkIteratorOrder(want []kvs, it *Iterator) error { } func TestDifferenceIterator(t *testing.T) { - triea := newEmpty() + dba := NewDatabase(rawdb.NewMemoryDatabase(), nil) + triea := NewEmpty(dba) for _, val := range testdata1 { triea.Update([]byte(val.k), []byte(val.v)) } - triea.Commit(nil) + rootA, nodesA, _ := triea.Commit(false) + dba.Update(rootA, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodesA), nil) + triea, _ = New(TrieID(rootA), dba) - trieb := newEmpty() + dbb := NewDatabase(rawdb.NewMemoryDatabase(), nil) + trieb := NewEmpty(dbb) for _, val := range testdata2 { trieb.Update([]byte(val.k), []byte(val.v)) } - trieb.Commit(nil) + rootB, nodesB, _ := trieb.Commit(false) + dbb.Update(rootB, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodesB), nil) + trieb, _ = New(TrieID(rootB), dbb) found := make(map[string]string) - di, _ := NewDifferenceIterator(triea.NodeIterator(nil), trieb.NodeIterator(nil)) + di, _ := NewDifferenceIterator(triea.MustNodeIterator(nil), trieb.MustNodeIterator(nil)) it := NewIterator(di) for it.Next() { found[string(it.Key)] = string(it.Value) @@ -249,19 +294,25 @@ func TestDifferenceIterator(t *testing.T) { } func TestUnionIterator(t *testing.T) { - triea := newEmpty() + dba := NewDatabase(rawdb.NewMemoryDatabase(), nil) + triea := NewEmpty(dba) for _, val := range testdata1 { triea.Update([]byte(val.k), []byte(val.v)) } - triea.Commit(nil) + rootA, nodesA, _ := triea.Commit(false) + dba.Update(rootA, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodesA), nil) + triea, _ = New(TrieID(rootA), dba) - trieb := newEmpty() + dbb := NewDatabase(rawdb.NewMemoryDatabase(), nil) + trieb := NewEmpty(dbb) for _, val := range testdata2 { trieb.Update([]byte(val.k), []byte(val.v)) } - trieb.Commit(nil) + rootB, nodesB, _ := trieb.Commit(false) + dbb.Update(rootB, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodesB), nil) + trieb, _ = New(TrieID(rootB), dbb) - di, _ := NewUnionIterator([]NodeIterator{triea.NodeIterator(nil), trieb.NodeIterator(nil)}) + di, _ := NewUnionIterator([]NodeIterator{triea.MustNodeIterator(nil), trieb.MustNodeIterator(nil)}) it := NewIterator(di) all := []struct{ k, v string }{ @@ -296,86 +347,106 @@ func TestUnionIterator(t *testing.T) { } func TestIteratorNoDups(t *testing.T) { - var tr Trie + tr := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) for _, val := range testdata1 { tr.Update([]byte(val.k), []byte(val.v)) } - checkIteratorNoDups(t, tr.NodeIterator(nil), nil) + checkIteratorNoDups(t, tr.MustNodeIterator(nil), nil) } // This test checks that nodeIterator.Next can be retried after inserting missing trie nodes. -func TestIteratorContinueAfterErrorDisk(t *testing.T) { testIteratorContinueAfterError(t, false) } -func TestIteratorContinueAfterErrorMemonly(t *testing.T) { testIteratorContinueAfterError(t, true) } +func TestIteratorContinueAfterError(t *testing.T) { + testIteratorContinueAfterError(t, false, rawdb.HashScheme) + testIteratorContinueAfterError(t, true, rawdb.HashScheme) + testIteratorContinueAfterError(t, false, rawdb.PathScheme) + testIteratorContinueAfterError(t, true, rawdb.PathScheme) +} -func testIteratorContinueAfterError(t *testing.T, memonly bool) { - diskdb := memorydb.New() - triedb := NewDatabase(diskdb) +func testIteratorContinueAfterError(t *testing.T, memonly bool, scheme string) { + diskdb := rawdb.NewMemoryDatabase() + tdb := newTestDatabase(diskdb, scheme) - tr, _ := New(common.Hash{}, triedb) + tr := NewEmpty(tdb) for _, val := range testdata1 { tr.Update([]byte(val.k), []byte(val.v)) } - tr.Commit(nil) + root, nodes, _ := tr.Commit(false) + tdb.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) if !memonly { - triedb.Commit(tr.Hash(), true, nil) + tdb.Commit(root, false) } - wantNodeCount := checkIteratorNoDups(t, tr.NodeIterator(nil), nil) + tr, _ = New(TrieID(root), tdb) + wantNodeCount := checkIteratorNoDups(t, tr.MustNodeIterator(nil), nil) var ( - diskKeys [][]byte - memKeys []common.Hash + paths [][]byte + hashes []common.Hash ) if memonly { - memKeys = triedb.Nodes() + for path, n := range nodes.Nodes { + paths = append(paths, []byte(path)) + hashes = append(hashes, n.Hash) + } } else { it := diskdb.NewIterator(nil, nil) for it.Next() { - diskKeys = append(diskKeys, it.Key()) + ok, path, hash := isTrieNode(tdb.Scheme(), it.Key(), it.Value()) + if !ok { + continue + } + paths = append(paths, path) + hashes = append(hashes, hash) } it.Release() } for i := 0; i < 20; i++ { // Create trie that will load all nodes from DB. - tr, _ := New(tr.Hash(), triedb) + tr, _ := New(TrieID(tr.Hash()), tdb) // Remove a random node from the database. It can't be the root node // because that one is already loaded. var ( - rkey common.Hash - rval []byte - robj *cachedNode + rval []byte + rpath []byte + rhash common.Hash ) for { if memonly { - rkey = memKeys[rand.Intn(len(memKeys))] + rpath = paths[rand.Intn(len(paths))] + n := nodes.Nodes[string(rpath)] + if n == nil { + continue + } + rhash = n.Hash } else { - copy(rkey[:], diskKeys[rand.Intn(len(diskKeys))]) + index := rand.Intn(len(paths)) + rpath = paths[index] + rhash = hashes[index] } - if rkey != tr.Hash() { + if rhash != tr.Hash() { break } } if memonly { - robj = triedb.dirties[rkey] - delete(triedb.dirties, rkey) + tr.reader.banned = map[string]struct{}{string(rpath): {}} } else { - rval, _ = diskdb.Get(rkey[:]) - diskdb.Delete(rkey[:]) + rval = rawdb.ReadTrieNode(diskdb, common.Hash{}, rpath, rhash, tdb.Scheme()) + rawdb.DeleteTrieNode(diskdb, common.Hash{}, rpath, rhash, tdb.Scheme()) } // Iterate until the error is hit. seen := make(map[string]bool) - it := tr.NodeIterator(nil) + it := tr.MustNodeIterator(nil) checkIteratorNoDups(t, it, seen) missing, ok := it.Error().(*MissingNodeError) - if !ok || missing.NodeHash != rkey { + if !ok || missing.NodeHash != rhash { t.Fatal("didn't hit missing node, got", it.Error()) } // Add the node back and continue iteration. if memonly { - triedb.dirties[rkey] = robj + delete(tr.reader.banned, string(rpath)) } else { - diskdb.Put(rkey[:], rval) + rawdb.WriteTrieNode(diskdb, common.Hash{}, rpath, rhash, rval, tdb.Scheme()) } checkIteratorNoDups(t, it, seen) if it.Error() != nil { @@ -390,42 +461,49 @@ func testIteratorContinueAfterError(t *testing.T, memonly bool) { // Similar to the test above, this one checks that failure to create nodeIterator at a // certain key prefix behaves correctly when Next is called. The expectation is that Next // should retry seeking before returning true for the first time. -func TestIteratorContinueAfterSeekErrorDisk(t *testing.T) { - testIteratorContinueAfterSeekError(t, false) -} -func TestIteratorContinueAfterSeekErrorMemonly(t *testing.T) { - testIteratorContinueAfterSeekError(t, true) +func TestIteratorContinueAfterSeekError(t *testing.T) { + testIteratorContinueAfterSeekError(t, false, rawdb.HashScheme) + testIteratorContinueAfterSeekError(t, true, rawdb.HashScheme) + testIteratorContinueAfterSeekError(t, false, rawdb.PathScheme) + testIteratorContinueAfterSeekError(t, true, rawdb.PathScheme) } -func testIteratorContinueAfterSeekError(t *testing.T, memonly bool) { +func testIteratorContinueAfterSeekError(t *testing.T, memonly bool, scheme string) { // Commit test trie to db, then remove the node containing "bars". - diskdb := memorydb.New() - triedb := NewDatabase(diskdb) - - ctr, _ := New(common.Hash{}, triedb) + var ( + barNodePath []byte + barNodeHash = common.HexToHash("05041990364eb72fcb1127652ce40d8bab765f2bfe53225b1170d276cc101c2e") + ) + diskdb := rawdb.NewMemoryDatabase() + triedb := newTestDatabase(diskdb, scheme) + ctr := NewEmpty(triedb) for _, val := range testdata1 { ctr.Update([]byte(val.k), []byte(val.v)) } - root, _, _ := ctr.Commit(nil) + root, nodes, _ := ctr.Commit(false) + for path, n := range nodes.Nodes { + if n.Hash == barNodeHash { + barNodePath = []byte(path) + break + } + } + triedb.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) if !memonly { - triedb.Commit(root, true, nil) + triedb.Commit(root, false) } - barNodeHash := common.HexToHash("05041990364eb72fcb1127652ce40d8bab765f2bfe53225b1170d276cc101c2e") var ( barNodeBlob []byte - barNodeObj *cachedNode ) + tr, _ := New(TrieID(root), triedb) if memonly { - barNodeObj = triedb.dirties[barNodeHash] - delete(triedb.dirties, barNodeHash) + tr.reader.banned = map[string]struct{}{string(barNodePath): {}} } else { - barNodeBlob, _ = diskdb.Get(barNodeHash[:]) - diskdb.Delete(barNodeHash[:]) + barNodeBlob = rawdb.ReadTrieNode(diskdb, common.Hash{}, barNodePath, barNodeHash, triedb.Scheme()) + rawdb.DeleteTrieNode(diskdb, common.Hash{}, barNodePath, barNodeHash, triedb.Scheme()) } // Create a new iterator that seeks to "bars". Seeking can't proceed because // the node is missing. - tr, _ := New(root, triedb) - it := tr.NodeIterator([]byte("bars")) + it := tr.MustNodeIterator([]byte("bars")) missing, ok := it.Error().(*MissingNodeError) if !ok { t.Fatal("want MissingNodeError, got", it.Error()) @@ -434,9 +512,9 @@ func testIteratorContinueAfterSeekError(t *testing.T, memonly bool) { } // Reinsert the missing node. if memonly { - triedb.dirties[barNodeHash] = barNodeObj + delete(tr.reader.banned, string(barNodePath)) } else { - diskdb.Put(barNodeHash[:], barNodeBlob) + rawdb.WriteTrieNode(diskdb, common.Hash{}, barNodePath, barNodeHash, barNodeBlob, triedb.Scheme()) } // Check that iteration produces the right set of values. if err := checkIteratorOrder(testdata1[2:], NewIterator(it)); err != nil { @@ -457,6 +535,11 @@ func checkIteratorNoDups(t *testing.T, it NodeIterator, seen map[string]bool) in return len(seen) } +func TestIteratorNodeBlob(t *testing.T) { + testIteratorNodeBlob(t, rawdb.HashScheme) + testIteratorNodeBlob(t, rawdb.PathScheme) +} + type loggingDb struct { getCount uint64 backend ethdb.KeyValueStore @@ -511,8 +594,8 @@ func (l *loggingDb) Close() error { func makeLargeTestTrie() (*Database, *SecureTrie, *loggingDb) { // Create an empty trie logDb := &loggingDb{0, memorydb.New()} - triedb := NewDatabase(logDb) - trie, _ := NewSecure(common.Hash{}, triedb) + triedb := NewDatabase(rawdb.NewDatabase(logDb), nil) + trie, _ := NewSecure(TrieID(types.EmptyRootHash), triedb) // Fill it with some arbitrary data for i := 0; i < 10000; i++ { @@ -524,8 +607,12 @@ func makeLargeTestTrie() (*Database, *SecureTrie, *loggingDb) { val = crypto.Keccak256(val) trie.Update(key, val) } - trie.Commit(nil) + root, nodes, _ := trie.Commit(false) + triedb.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) + triedb.Commit(root, false) // Return the generated trie + trie, _ = NewSecure(TrieID(root), triedb) + return triedb, trie, logDb } @@ -537,8 +624,92 @@ func TestNodeIteratorLargeTrie(t *testing.T) { // Do a seek operation trie.NodeIterator(common.FromHex("0x77667766776677766778855885885885")) // master: 24 get operations - // this pr: 5 get operations - if have, want := logDb.getCount, uint64(5); have != want { + // this pr: 6 get operations + if have, want := logDb.getCount, uint64(6); have != want { t.Fatalf("Too many lookups during seek, have %d want %d", have, want) } } + +func testIteratorNodeBlob(t *testing.T, scheme string) { + var ( + db = rawdb.NewMemoryDatabase() + triedb = newTestDatabase(db, scheme) + trie = NewEmpty(triedb) + ) + vals := []struct{ k, v string }{ + {"do", "verb"}, + {"ether", "wookiedoo"}, + {"horse", "stallion"}, + {"shaman", "horse"}, + {"doge", "coin"}, + {"dog", "puppy"}, + {"somethingveryoddindeedthis is", "myothernodedata"}, + } + all := make(map[string]string) + for _, val := range vals { + all[val.k] = val.v + trie.Update([]byte(val.k), []byte(val.v)) + } + root, nodes, _ := trie.Commit(false) + triedb.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) + triedb.Commit(root, false) + + var found = make(map[common.Hash][]byte) + trie, _ = New(TrieID(root), triedb) + it := trie.MustNodeIterator(nil) + for it.Next(true) { + if it.Hash() == (common.Hash{}) { + continue + } + found[it.Hash()] = it.NodeBlob() + } + dbIter := db.NewIterator(nil, nil) + defer dbIter.Release() + + var count int + for dbIter.Next() { + ok, _, _ := isTrieNode(triedb.Scheme(), dbIter.Key(), dbIter.Value()) + if !ok { + continue + } + got, present := found[crypto.Keccak256Hash(dbIter.Value())] + if !present { + t.Fatal("Miss trie node") + } + if !bytes.Equal(got, dbIter.Value()) { + t.Fatalf("Unexpected trie node want %v got %v", dbIter.Value(), got) + } + count += 1 + } + if count != len(found) { + t.Fatal("Find extra trie node via iterator") + + } +} + +// isTrieNode is a helper function which reports if the provided +// database entry belongs to a trie node or not. Note in tests + +// only single layer trie is used, namely storage trie is not +// considered at all. +func isTrieNode(scheme string, key, val []byte) (bool, []byte, common.Hash) { + var ( + path []byte + hash common.Hash + ) + if scheme == rawdb.HashScheme { + ok := rawdb.IsLegacyTrieNode(key, val) + if !ok { + return false, nil, common.Hash{} + } + hash = common.BytesToHash(key) + } else { + ok, remain := rawdb.ResolveAccountTrieNodeKey(key) + if !ok { + return false, nil, common.Hash{} + } + path = common.CopyBytes(remain) + hash = crypto.Keccak256Hash(val) + } + return true, path, hash +} diff --git a/trie/node.go b/trie/node.go index f4055e779a..285e2d7702 100644 --- a/trie/node.go +++ b/trie/node.go @@ -28,8 +28,9 @@ import ( var indices = []string{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "[17]"} type node interface { - fstring(string) string cache() (hashNode, bool) + encode(w rlp.EncoderBuffer) + fstring(string) string } type ( @@ -52,16 +53,9 @@ var nilValueNode = valueNode(nil) // EncodeRLP encodes a full node into the consensus RLP format. func (n *fullNode) EncodeRLP(w io.Writer) error { - var nodes [17]node - - for i, child := range &n.Children { - if child != nil { - nodes[i] = child - } else { - nodes[i] = nilValueNode - } - } - return rlp.Encode(w, nodes) + eb := rlp.NewEncoderBuffer(w) + n.encode(eb) + return eb.Flush() } func (n *fullNode) copy() *fullNode { copy := *n; return © } @@ -105,6 +99,18 @@ func (n valueNode) fstring(ind string) string { return fmt.Sprintf("%x ", []byte(n)) } +// rawNode is a simple binary blob used to differentiate between collapsed trie +// nodes and already encoded RLP binary blobs (while at the same time store them +// in the same cache fields). +type rawNode []byte + +func (n rawNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") } +func (n rawNode) fstring(ind string) string { panic("this should never end up in a live trie") } +func (n rawNode) EncodeRLP(w io.Writer) error { + _, err := w.Write(n) + return err +} + func mustDecodeNode(hash, buf []byte) node { n, err := decodeNode(hash, buf) if err != nil { diff --git a/trie/node_enc.go b/trie/node_enc.go new file mode 100644 index 0000000000..1b2eca682f --- /dev/null +++ b/trie/node_enc.go @@ -0,0 +1,64 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "github.com/ethereum/go-ethereum/rlp" +) + +func nodeToBytes(n node) []byte { + w := rlp.NewEncoderBuffer(nil) + n.encode(w) + result := w.ToBytes() + w.Flush() + return result +} + +func (n *fullNode) encode(w rlp.EncoderBuffer) { + offset := w.List() + for _, c := range n.Children { + if c != nil { + c.encode(w) + } else { + w.Write(rlp.EmptyString) + } + } + w.ListEnd(offset) +} + +func (n *shortNode) encode(w rlp.EncoderBuffer) { + offset := w.List() + w.WriteBytes(n.Key) + if n.Val != nil { + n.Val.encode(w) + } else { + w.Write(rlp.EmptyString) + } + w.ListEnd(offset) +} + +func (n hashNode) encode(w rlp.EncoderBuffer) { + w.WriteBytes(n) +} + +func (n valueNode) encode(w rlp.EncoderBuffer) { + w.WriteBytes(n) +} + +func (n rawNode) encode(w rlp.EncoderBuffer) { + w.Write(n) +} diff --git a/trie/preimages.go b/trie/preimages.go new file mode 100644 index 0000000000..f5b1291a23 --- /dev/null +++ b/trie/preimages.go @@ -0,0 +1,91 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "sync" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" +) + +// preimageStore is the store for caching preimages of node key. +type preimageStore struct { + lock sync.RWMutex + disk ethdb.KeyValueStore + preimages map[common.Hash][]byte // Preimages of nodes from the secure trie + preimagesSize common.StorageSize // Storage size of the preimages cache +} + +// newPreimageStore initializes the store for caching preimages. +func newPreimageStore(disk ethdb.KeyValueStore) *preimageStore { + return &preimageStore{ + disk: disk, + preimages: make(map[common.Hash][]byte), + } +} + +func (store *preimageStore) insertPreimage(preimages map[common.Hash][]byte) { + store.lock.Lock() + defer store.lock.Unlock() + + for hash, preimage := range preimages { + if _, ok := store.preimages[hash]; ok { + continue + } + store.preimages[hash] = preimage + store.preimagesSize += common.StorageSize(common.HashLength + len(preimage)) + } +} + +func (store *preimageStore) preimage(hash common.Hash) []byte { + // Lock the store for reading + store.lock.RLock() + preimage := store.preimages[hash] + store.lock.RUnlock() + if preimage != nil { + return preimage + } + // Incase preimage is not existed in memory, then read from disk. + return rawdb.ReadPreimage(store.disk, hash) +} + +func (store *preimageStore) commit(force bool) error { + store.lock.Lock() + defer store.lock.Unlock() + + // If preimages size is less than 4MB and not forced to commit, then return. + if store.preimagesSize <= 4*1024*1024 && !force { + return nil + } + + batch := store.disk.NewBatch() + rawdb.WritePreimages(batch, store.preimages) + if err := batch.Write(); err != nil { + return err + } + store.preimages, store.preimagesSize = make(map[common.Hash][]byte), 0 + return nil +} + +func (store *preimageStore) size() common.StorageSize { + store.lock.RLock() + defer store.lock.RUnlock() + + return store.preimagesSize +} diff --git a/trie/proof.go b/trie/proof.go index 51ecea0c39..437cf3d00b 100644 --- a/trie/proof.go +++ b/trie/proof.go @@ -23,9 +23,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/ethdb" - "github.com/ethereum/go-ethereum/ethdb/memorydb" "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/rlp" ) // Prove constructs a merkle proof for key. The result contains all encoded nodes @@ -36,10 +34,16 @@ import ( // nodes of the longest existing prefix of the key (at least the root node), ending // with the node that proves the absence of the key. func (t *Trie) Prove(key []byte, fromLevel uint, proofDb ethdb.KeyValueWriter) error { + if t.committed { + return ErrCommitted + } // Collect all nodes on the path to key. + var ( + prefix []byte + nodes []node + tn = t.root + ) key = keybytesToHex(key) - var nodes []node - tn := t.root for len(key) > 0 && tn != nil { switch n := tn.(type) { case *shortNode: @@ -48,20 +52,30 @@ func (t *Trie) Prove(key []byte, fromLevel uint, proofDb ethdb.KeyValueWriter) e tn = nil } else { tn = n.Val + prefix = append(prefix, n.Key...) key = key[len(n.Key):] } nodes = append(nodes, n) case *fullNode: tn = n.Children[key[0]] + prefix = append(prefix, key[0]) key = key[1:] nodes = append(nodes, n) case hashNode: - var err error - tn, err = t.resolveHash(n, nil) + // Retrieve the specified node from the underlying node reader. + // trie.resolveAndTrack is not used since in that function the + // loaded blob will be tracked, while it's not required here since + // all loaded nodes won't be linked to trie at all and track nodes + // may lead to out-of-memory issue + blob, err := t.reader.node(prefix, common.BytesToHash(n)) if err != nil { log.Error(fmt.Sprintf("Unhandled trie error: %v", err)) return err } + // The raw-blob format nodes are loaded either from the + // clean cache or the database, they are all in their own + // copy and safe to use unsafe decoder. + tn = mustDecodeNode(n, blob) default: panic(fmt.Sprintf("%T: invalid node: %v", tn, tn)) } @@ -79,7 +93,7 @@ func (t *Trie) Prove(key []byte, fromLevel uint, proofDb ethdb.KeyValueWriter) e if hash, ok := hn.(hashNode); ok || i == 0 { // If the node's database encoding is a hash (or is the // root node), it becomes a proof element. - enc, _ := rlp.EncodeToBytes(n) + enc := nodeToBytes(n) if !ok { hash = hasher.hashData(enc) } @@ -335,9 +349,9 @@ findFork: // unset removes all internal node references either the left most or right most. // It can meet these scenarios: // -// - The given path is existent in the trie, unset the associated nodes with the -// specific direction -// - The given path is non-existent in the trie +// - The given path is existent in the trie, unset the associated nodes with the +// specific direction +// - The given path is non-existent in the trie // - the fork point is a fullnode, the corresponding child pointed by path // is nil, return // - the fork point is a shortnode, the shortnode is included in the range, @@ -452,15 +466,15 @@ func hasRightElement(node node, key []byte) bool { // Expect the normal case, this function can also be used to verify the following // range proofs: // -// - All elements proof. In this case the proof can be nil, but the range should -// be all the leaves in the trie. +// - All elements proof. In this case the proof can be nil, but the range should +// be all the leaves in the trie. // -// - One element proof. In this case no matter the edge proof is a non-existent -// proof or not, we can always verify the correctness of the proof. +// - One element proof. In this case no matter the edge proof is a non-existent +// proof or not, we can always verify the correctness of the proof. // -// - Zero element proof. In this case a single non-existent proof is enough to prove. -// Besides, if there are still some other leaves available on the right side, then -// an error will be returned. +// - Zero element proof. In this case a single non-existent proof is enough to prove. +// Besides, if there are still some other leaves available on the right side, then +// an error will be returned. // // Except returning the error to indicate the proof is valid or not, the function will // also return a flag to indicate whether there exists more accounts/slots in the trie. @@ -488,7 +502,7 @@ func VerifyRangeProof(rootHash common.Hash, firstKey []byte, lastKey []byte, key if proof == nil { tr := NewStackTrie(nil) for index, key := range keys { - tr.TryUpdate(key, values[index]) + tr.Update(key, values[index]) } if have, want := tr.Hash(), rootHash; have != want { return false, fmt.Errorf("invalid proof, want hash %x, got %x", want, have) @@ -553,7 +567,7 @@ func VerifyRangeProof(rootHash common.Hash, firstKey []byte, lastKey []byte, key } // Rebuild the trie with the leaf stream, the shape of trie // should be same with the original one. - tr := &Trie{root: root, db: NewDatabase(memorydb.New())} + tr := &Trie{root: root, reader: newEmptyReader(), tracer: newTracer()} if empty { tr.root = nil } diff --git a/trie/proof_test.go b/trie/proof_test.go index 95ad6169c3..9f31b88d17 100644 --- a/trie/proof_test.go +++ b/trie/proof_test.go @@ -26,6 +26,7 @@ import ( "time" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb/memorydb" ) @@ -48,7 +49,7 @@ func makeProvers(trie *Trie) []func(key []byte) *memorydb.Database { // Create a leaf iterator based Merkle prover provers = append(provers, func(key []byte) *memorydb.Database { proof := memorydb.New() - if it := NewIterator(trie.NodeIterator(key)); it.Next() && bytes.Equal(key, it.Key) { + if it := NewIterator(trie.MustNodeIterator(key)); it.Next() && bytes.Equal(key, it.Key) { for _, p := range it.Prove() { proof.Put(crypto.Keccak256(p), p) } @@ -79,7 +80,7 @@ func TestProof(t *testing.T) { } func TestOneElementProof(t *testing.T) { - trie := new(Trie) + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) updateString(trie, "k", "v") for i, prover := range makeProvers(trie) { proof := prover([]byte("k")) @@ -130,7 +131,7 @@ func TestBadProof(t *testing.T) { // Tests that missing keys can also be proven. The test explicitly uses a single // entry trie and checks for missing keys both before and after the single entry. func TestMissingKeyProof(t *testing.T) { - trie := new(Trie) + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) updateString(trie, "k", "v") for i, key := range []string{"a", "j", "l", "z"} { @@ -386,7 +387,7 @@ func TestOneElementRangeProof(t *testing.T) { } // Test the mini trie with only a single element. - tinyTrie := new(Trie) + tinyTrie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) entry := &kv{randBytes(32), randBytes(20), false} tinyTrie.Update(entry.k, entry.v) @@ -458,7 +459,7 @@ func TestAllElementsProof(t *testing.T) { // TestSingleSideRangeProof tests the range starts from zero. func TestSingleSideRangeProof(t *testing.T) { for i := 0; i < 64; i++ { - trie := new(Trie) + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) var entries entrySlice for i := 0; i < 4096; i++ { value := &kv{randBytes(32), randBytes(20), false} @@ -493,7 +494,7 @@ func TestSingleSideRangeProof(t *testing.T) { // TestReverseSingleSideRangeProof tests the range ends with 0xffff...fff. func TestReverseSingleSideRangeProof(t *testing.T) { for i := 0; i < 64; i++ { - trie := new(Trie) + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) var entries entrySlice for i := 0; i < 4096; i++ { value := &kv{randBytes(32), randBytes(20), false} @@ -600,7 +601,7 @@ func TestBadRangeProof(t *testing.T) { // TestGappedRangeProof focuses on the small trie with embedded nodes. // If the gapped node is embedded in the trie, it should be detected too. func TestGappedRangeProof(t *testing.T) { - trie := new(Trie) + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) var entries []*kv // Sorted entries for i := byte(0); i < 10; i++ { value := &kv{common.LeftPadBytes([]byte{i}, 32), []byte{i}, false} @@ -674,7 +675,7 @@ func TestSameSideProofs(t *testing.T) { } func TestHasRightElement(t *testing.T) { - trie := new(Trie) + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) var entries entrySlice for i := 0; i < 4096; i++ { value := &kv{randBytes(32), randBytes(20), false} @@ -1027,7 +1028,7 @@ func benchmarkVerifyRangeNoProof(b *testing.B, size int) { } func randomTrie(n int) (*Trie, map[string]*kv) { - trie := new(Trie) + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) vals := make(map[string]*kv) for i := byte(0); i < 100; i++ { value := &kv{common.LeftPadBytes([]byte{i}, 32), []byte{i}, false} @@ -1052,7 +1053,7 @@ func randBytes(n int) []byte { } func nonRandomTrie(n int) (*Trie, map[string]*kv) { - trie := new(Trie) + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) vals := make(map[string]*kv) max := uint64(0xffffffffffffffff) for i := uint64(0); i < uint64(n); i++ { diff --git a/trie/secure_trie.go b/trie/secure_trie.go index 18be12d34a..8ff4630a7f 100644 --- a/trie/secure_trie.go +++ b/trie/secure_trie.go @@ -23,6 +23,7 @@ import ( "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/trienode" ) // SecureTrie wraps a trie with key hashing. In a secure trie, all @@ -37,6 +38,7 @@ import ( // SecureTrie is not safe for concurrent use. type SecureTrie struct { trie Trie + preimages *preimageStore hashKeyBuf [common.HashLength]byte secKeyCache map[string][]byte secKeyCacheOwner *SecureTrie // Pointer to self, replace the key cache on mismatch @@ -53,15 +55,15 @@ type SecureTrie struct { // Loaded nodes are kept around until their 'cache generation' expires. // A new cache generation is created by each call to Commit. // cachelimit sets the number of past cache generations to keep. -func NewSecure(root common.Hash, db *Database) (*SecureTrie, error) { +func NewSecure(id *ID, db *Database) (*SecureTrie, error) { if db == nil { panic("trie.NewSecure called without a database") } - trie, err := New(root, db) + trie, err := New(id, db) if err != nil { return nil, err } - return &SecureTrie{trie: *trie}, nil + return &SecureTrie{trie: *trie, preimages: db.preimages}, nil } // Get returns the value for key stored in the trie. @@ -153,28 +155,33 @@ func (t *SecureTrie) GetKey(shaKey []byte) []byte { if key, ok := t.getSecKeyCache()[string(shaKey)]; ok { return key } - return t.trie.db.preimage(common.BytesToHash(shaKey)) + if t.preimages == nil { + return nil + } + + return t.preimages.preimage(common.BytesToHash(shaKey)) } -// Commit writes all nodes and the secure hash pre-images to the trie's database. -// Nodes are stored with their sha3 hash as the key. -// -// Committing flushes nodes from memory. Subsequent Get calls will load nodes -// from the database. -func (t *SecureTrie) Commit(onleaf LeafCallback) (common.Hash, int, error) { +// Commit collects all dirty nodes in the trie and replace them with the +// corresponding node hash. All collected nodes(including dirty leaves if +// collectLeaf is true) will be encapsulated into a nodeset for return. +// The returned nodeset can be nil if the trie is clean(nothing to commit). +// All cached preimages will be also flushed if preimages recording is enabled. +func (t *SecureTrie) Commit(collectLeaf bool) (common.Hash, *trienode.NodeSet, error) { // Write all the pre-images to the actual disk database if len(t.getSecKeyCache()) > 0 { - if t.trie.db.preimages != nil { // Ugly direct check but avoids the below write lock - t.trie.db.lock.Lock() + if t.preimages != nil { // Ugly direct check but avoids the below write lock + preimages := make(map[common.Hash][]byte) + for hk, key := range t.secKeyCache { - t.trie.db.insertPreimage(common.BytesToHash([]byte(hk)), key) + preimages[common.BytesToHash([]byte(hk))] = key } - t.trie.db.lock.Unlock() + t.preimages.insertPreimage(preimages) } t.secKeyCache = make(map[string][]byte) } // Commit the trie to its intermediate node database - return t.trie.Commit(onleaf) + return t.trie.Commit(collectLeaf) } // Hash returns the root hash of SecureTrie. It does not write to the @@ -185,16 +192,25 @@ func (t *SecureTrie) Hash() common.Hash { // Copy returns a copy of SecureTrie. func (t *SecureTrie) Copy() *SecureTrie { - cpy := *t - return &cpy + return &SecureTrie{ + trie: *t.trie.Copy(), + preimages: t.preimages, + secKeyCache: t.secKeyCache, + } } // NodeIterator returns an iterator that returns nodes of the underlying trie. Iteration // starts at the key after the given start key. -func (t *SecureTrie) NodeIterator(start []byte) NodeIterator { +func (t *SecureTrie) NodeIterator(start []byte) (NodeIterator, error) { return t.trie.NodeIterator(start) } +// MustNodeIterator is a wrapper of NodeIterator and will omit any encountered +// error but just print out an error message. +func (t *SecureTrie) MustNodeIterator(start []byte) NodeIterator { + return t.trie.MustNodeIterator(start) +} + // hashKey returns the hash of key as an ephemeral buffer. // The caller must not hold onto the return value because it will become // invalid on the next call to hashKey or secKey. diff --git a/trie/secure_trie_test.go b/trie/secure_trie_test.go index fb6c38ee22..797d23d4b0 100644 --- a/trie/secure_trie_test.go +++ b/trie/secure_trie_test.go @@ -23,20 +23,20 @@ import ( "testing" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/crypto" - "github.com/ethereum/go-ethereum/ethdb/memorydb" ) func newEmptySecure() *SecureTrie { - trie, _ := NewSecure(common.Hash{}, NewDatabase(memorydb.New())) + trie, _ := NewSecure(TrieID(common.Hash{}), NewDatabase(rawdb.NewMemoryDatabase(), nil)) return trie } // makeTestSecureTrie creates a large enough secure trie for testing. func makeTestSecureTrie() (*Database, *SecureTrie, map[string][]byte) { // Create an empty trie - triedb := NewDatabase(memorydb.New()) - trie, _ := NewSecure(common.Hash{}, triedb) + triedb := NewDatabase(rawdb.NewMemoryDatabase(), nil) + trie, _ := NewSecure(TrieID(common.Hash{}), triedb) // Fill it with some arbitrary data content := make(map[string][]byte) @@ -57,7 +57,7 @@ func makeTestSecureTrie() (*Database, *SecureTrie, map[string][]byte) { trie.Update(key, val) } } - trie.Commit(nil) + trie.Commit(false) // Return the generated trie return triedb, trie, content @@ -112,8 +112,7 @@ func TestSecureTrieConcurrency(t *testing.T) { threads := runtime.NumCPU() tries := make([]*SecureTrie, threads) for i := 0; i < threads; i++ { - cpy := *trie - tries[i] = &cpy + tries[i] = trie.Copy() } // Start a batch of goroutines interactng with the trie pend := new(sync.WaitGroup) @@ -136,7 +135,7 @@ func TestSecureTrieConcurrency(t *testing.T) { tries[index].Update(key, val) } } - tries[index].Commit(nil) + tries[index].Commit(false) }(i) } // Wait for all threads to finish diff --git a/trie/stacktrie.go b/trie/stacktrie.go index f9ff10b62d..8a18a6f86f 100644 --- a/trie/stacktrie.go +++ b/trie/stacktrie.go @@ -17,168 +17,145 @@ package trie import ( - "bufio" "bytes" - "encoding/gob" - "errors" "fmt" - "io" "sync" "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/metrics" ) -var ErrCommitDisabled = errors.New("no database for committing") +var ( + stPool = sync.Pool{New: func() any { return new(stNode) }} + _ = types.TrieHasher((*StackTrie)(nil)) // Ensure StackTrie implements the TrieHasher interface +) + +// StackTrieOptions contains the configured options for manipulating the stackTrie. +type StackTrieOptions struct { + Writer func(path []byte, hash common.Hash, blob []byte) // The function to commit the dirty nodes + Cleaner func(path []byte) // The function to clean up dangling nodes -var stPool = sync.Pool{ - New: func() interface{} { - return NewStackTrie(nil) - }, + SkipLeftBoundary bool // Flag whether the nodes on the left boundary are skipped for committing + SkipRightBoundary bool // Flag whether the nodes on the right boundary are skipped for committing + boundaryGauge metrics.Gauge // Gauge to track how many boundary nodes are met } -func stackTrieFromPool(db ethdb.KeyValueWriter) *StackTrie { - st := stPool.Get().(*StackTrie) - st.db = db - return st +// NewStackTrieOptions initializes an empty options for stackTrie. +func NewStackTrieOptions() *StackTrieOptions { return &StackTrieOptions{} } + +// WithWriter configures trie node writer within the options. +func (o *StackTrieOptions) WithWriter(writer func(path []byte, hash common.Hash, blob []byte)) *StackTrieOptions { + o.Writer = writer + return o +} + +// WithCleaner configures the cleaner in the option for removing dangling nodes. +func (o *StackTrieOptions) WithCleaner(cleaner func(path []byte)) *StackTrieOptions { + o.Cleaner = cleaner + return o } -func returnToPool(st *StackTrie) { - st.Reset() - stPool.Put(st) +// WithSkipBoundary configures whether the left and right boundary nodes are +// filtered for committing, along with a gauge metrics to track how many +// boundary nodes are met. +func (o *StackTrieOptions) WithSkipBoundary(skipLeft, skipRight bool, gauge metrics.Gauge) *StackTrieOptions { + o.SkipLeftBoundary = skipLeft + o.SkipRightBoundary = skipRight + o.boundaryGauge = gauge + return o } // StackTrie is a trie implementation that expects keys to be inserted // in order. Once it determines that a subtree will no longer be inserted // into, it will hash it and free up the memory it uses. type StackTrie struct { - nodeType uint8 // node type (as in branch, ext, leaf) - val []byte // value contained by this node if it's a leaf - key []byte // key chunk covered by this (full|ext) node - keyOffset int // offset of the key chunk inside a full key - children [16]*StackTrie // list of children (for fullnodes and exts) - db ethdb.KeyValueWriter // Pointer to the commit db, can be nil + options *StackTrieOptions + root *stNode + h *hasher + + first []byte // The (hex-encoded without terminator) key of first inserted entry, tracked as left boundary. + last []byte // The (hex-encoded without terminator) key of last inserted entry, tracked as right boundary. } // NewStackTrie allocates and initializes an empty trie. -func NewStackTrie(db ethdb.KeyValueWriter) *StackTrie { +func NewStackTrie(options *StackTrieOptions) *StackTrie { + if options == nil { + options = NewStackTrieOptions() + } return &StackTrie{ - nodeType: emptyNode, - db: db, + options: options, + root: stPool.Get().(*stNode), + h: newHasher(false), } } -// NewFromBinary initialises a serialized stacktrie with the given db. -func NewFromBinary(data []byte, db ethdb.KeyValueWriter) (*StackTrie, error) { - var st StackTrie - if err := st.UnmarshalBinary(data); err != nil { - return nil, err - } - // If a database is used, we need to recursively add it to every child - if db != nil { - st.setDb(db) +func (t *StackTrie) Update(key, value []byte) { + if err := t.TryUpdate(key, value); err != nil { + log.Error(fmt.Sprintf("Unhandled trie error: %v", err)) } - return &st, nil } -// MarshalBinary implements encoding.BinaryMarshaler -func (st *StackTrie) MarshalBinary() (data []byte, err error) { - var ( - b bytes.Buffer - w = bufio.NewWriter(&b) - ) - if err := gob.NewEncoder(w).Encode(struct { - Nodetype uint8 - Val []byte - Key []byte - KeyOffset uint8 - }{ - st.nodeType, - st.val, - st.key, - uint8(st.keyOffset), - }); err != nil { - return nil, err - } - for _, child := range st.children { - if child == nil { - w.WriteByte(0) - continue - } - w.WriteByte(1) - if childData, err := child.MarshalBinary(); err != nil { - return nil, err - } else { - w.Write(childData) - } +// Update inserts a (key, value) pair into the stack trie. +func (t *StackTrie) TryUpdate(key, value []byte) error { + k := keybytesToHex(key) + if len(value) == 0 { + panic("deletion not supported") } - w.Flush() - return b.Bytes(), nil -} - -// UnmarshalBinary implements encoding.BinaryUnmarshaler -func (st *StackTrie) UnmarshalBinary(data []byte) error { - r := bytes.NewReader(data) - return st.unmarshalBinary(r) -} + k = k[:len(k)-1] // chop the termination flag -func (st *StackTrie) unmarshalBinary(r io.Reader) error { - var dec struct { - Nodetype uint8 - Val []byte - Key []byte - KeyOffset uint8 + // track the first and last inserted entries. + if t.first == nil { + t.first = append([]byte{}, k...) } - gob.NewDecoder(r).Decode(&dec) - st.nodeType = dec.Nodetype - st.val = dec.Val - st.key = dec.Key - st.keyOffset = int(dec.KeyOffset) - - var hasChild = make([]byte, 1) - for i := range st.children { - if _, err := r.Read(hasChild); err != nil { - return err - } else if hasChild[0] == 0 { - continue - } - var child StackTrie - child.unmarshalBinary(r) - st.children[i] = &child + if t.last == nil { + t.last = append([]byte{}, k...) // allocate key slice + } else { + t.last = append(t.last[:0], k...) // reuse key slice } + t.insert(t.root, k, value, nil) return nil } -func (st *StackTrie) setDb(db ethdb.KeyValueWriter) { - st.db = db - for _, child := range st.children { - if child != nil { - child.setDb(db) - } - } +// Reset resets the stack trie object to empty state. +func (t *StackTrie) Reset() { + t.options = NewStackTrieOptions() + t.root = stPool.Get().(*stNode) + t.first = nil + t.last = nil +} + +// stNode represents a node within a StackTrie +type stNode struct { + typ uint8 // node type (as in branch, ext, leaf) + key []byte // key chunk covered by this (leaf|ext) node + val []byte // value contained by this node if it's a leaf + children [16]*stNode // list of children (for branch and exts) } -func newLeaf(ko int, key, val []byte, db ethdb.KeyValueWriter) *StackTrie { - st := stackTrieFromPool(db) - st.nodeType = leafNode - st.keyOffset = ko - st.key = append(st.key, key[ko:]...) +// newLeaf constructs a leaf node with provided node key and value. The key +// will be deep-copied in the function and safe to modify afterwards, but +// value is not. +func newLeaf(key, val []byte) *stNode { + st := stPool.Get().(*stNode) + st.typ = leafNode + st.key = append(st.key, key...) st.val = val return st } -func newExt(ko int, key []byte, child *StackTrie, db ethdb.KeyValueWriter) *StackTrie { - st := stackTrieFromPool(db) - st.nodeType = extNode - st.keyOffset = ko - st.key = append(st.key, key[ko:]...) +// newExt constructs an extension node with provided node key and child. The +// key will be deep-copied in the function and safe to modify afterwards. +func newExt(key []byte, child *stNode) *stNode { + st := stPool.Get().(*stNode) + st.typ = extNode + st.key = append(st.key, key...) st.children[0] = child return st } -// List all values that StackTrie#nodeType can hold +// List all values that stNode#nodeType can hold const ( emptyNode = iota branchNode @@ -187,64 +164,50 @@ const ( hashedNode ) -// TryUpdate inserts a (key, value) pair into the stack trie -func (st *StackTrie) TryUpdate(key, value []byte) error { - k := keybytesToHex(key) - if len(value) == 0 { - panic("deletion not supported") - } - st.insert(k[:len(k)-1], value) - return nil -} - -func (st *StackTrie) Update(key, value []byte) { - if err := st.TryUpdate(key, value); err != nil { - log.Error(fmt.Sprintf("Unhandled trie error: %v", err)) +func (n *stNode) reset() *stNode { + n.key = n.key[:0] + n.val = nil + for i := range n.children { + n.children[i] = nil } -} - -func (st *StackTrie) Reset() { - st.db = nil - st.key = st.key[:0] - st.val = nil - for i := range st.children { - st.children[i] = nil - } - st.nodeType = emptyNode - st.keyOffset = 0 + n.typ = emptyNode + return n } // Helper function that, given a full key, determines the index // at which the chunk pointed by st.keyOffset is different from // the same chunk in the full key. -func (st *StackTrie) getDiffIndex(key []byte) int { - diffindex := 0 - for ; diffindex < len(st.key) && st.key[diffindex] == key[st.keyOffset+diffindex]; diffindex++ { +func (n *stNode) getDiffIndex(key []byte) int { + for idx, nibble := range n.key { + if nibble != key[idx] { + return idx + } } - return diffindex + return len(n.key) } // Helper function to that inserts a (key, value) pair into // the trie. -func (st *StackTrie) insert(key, value []byte) { - switch st.nodeType { +func (t *StackTrie) insert(st *stNode, key, value []byte, path []byte) { + switch st.typ { case branchNode: /* Branch */ - idx := int(key[st.keyOffset]) + idx := int(key[0]) // Unresolve elder siblings for i := idx - 1; i >= 0; i-- { if st.children[i] != nil { - if st.children[i].nodeType != hashedNode { - st.children[i].hash() + if st.children[i].typ != hashedNode { + t.hash(st.children[i], append(path, byte(i))) } break } } // Add new child if st.children[idx] == nil { - st.children[idx] = stackTrieFromPool(st.db) - st.children[idx].keyOffset = st.keyOffset + 1 + st.children[idx] = newLeaf(key[1:], value) + } else { + t.insert(st.children[idx], key[1:], value, append(path, key[0])) } - st.children[idx].insert(key, value) + case extNode: /* Ext */ // Compare both key chunks and see where they differ diffidx := st.getDiffIndex(key) @@ -257,46 +220,49 @@ func (st *StackTrie) insert(key, value []byte) { if diffidx == len(st.key) { // Ext key and key segment are identical, recurse into // the child node. - st.children[0].insert(key, value) + t.insert(st.children[0], key[diffidx:], value, append(path, key[:diffidx]...)) return } // Save the original part. Depending if the break is // at the extension's last byte or not, create an // intermediate extension or use the extension's child // node directly. - var n *StackTrie + var n *stNode if diffidx < len(st.key)-1 { - n = newExt(diffidx+1, st.key, st.children[0], st.db) + // Break on the non-last byte, insert an intermediate + // extension. The path prefix of the newly-inserted + // extension should also contain the different byte. + n = newExt(st.key[diffidx+1:], st.children[0]) + t.hash(n, append(path, st.key[:diffidx+1]...)) } else { - // Break on the last byte, no need to insert - // an extension node: reuse the current node + // an extension node: reuse the current node. + // The path prefix of the original part should + // still be same. n = st.children[0] + t.hash(n, append(path, st.key...)) } - // Convert to hash - n.hash() - var p *StackTrie + var p *stNode if diffidx == 0 { // the break is on the first byte, so // the current node is converted into // a branch node. st.children[0] = nil p = st - st.nodeType = branchNode + st.typ = branchNode } else { // the common prefix is at least one byte // long, insert a new intermediate branch // node. - st.children[0] = stackTrieFromPool(st.db) - st.children[0].nodeType = branchNode - st.children[0].keyOffset = st.keyOffset + diffidx + st.children[0] = stPool.Get().(*stNode) + st.children[0].typ = branchNode p = st.children[0] } // Create a leaf for the inserted part - o := newLeaf(st.keyOffset+diffidx+1, key, value, st.db) + o := newLeaf(key[diffidx+1:], value) // Insert both child leaves where they belong: origIdx := st.key[diffidx] - newIdx := key[diffidx+st.keyOffset] + newIdx := key[diffidx] p.children[origIdx] = n p.children[newIdx] = o st.key = st.key[:diffidx] @@ -318,19 +284,18 @@ func (st *StackTrie) insert(key, value []byte) { // Check if the split occurs at the first nibble of the // chunk. In that case, no prefix extnode is necessary. // Otherwise, create that - var p *StackTrie + var p *stNode if diffidx == 0 { // Convert current leaf into a branch - st.nodeType = branchNode + st.typ = branchNode p = st st.children[0] = nil } else { // Convert current node into an ext, // and insert a child branch node. - st.nodeType = extNode - st.children[0] = NewStackTrie(st.db) - st.children[0].nodeType = branchNode - st.children[0].keyOffset = st.keyOffset + diffidx + st.typ = extNode + st.children[0] = stPool.Get().(*stNode) + st.children[0].typ = branchNode p = st.children[0] } @@ -339,19 +304,19 @@ func (st *StackTrie) insert(key, value []byte) { // The child leave will be hashed directly in order to // free up some memory. origIdx := st.key[diffidx] - p.children[origIdx] = newLeaf(diffidx+1, st.key, st.val, st.db) - p.children[origIdx].hash() + p.children[origIdx] = newLeaf(st.key[diffidx+1:], st.val) + t.hash(p.children[origIdx], append(path, st.key[:diffidx+1]...)) - newIdx := key[diffidx+st.keyOffset] - p.children[newIdx] = newLeaf(p.keyOffset+1, key, value, st.db) + newIdx := key[diffidx] + p.children[newIdx] = newLeaf(key[diffidx+1:], value) // Finally, cut off the key part that has been passed // over to the children. st.key = st.key[:diffidx] st.val = nil case emptyNode: /* Empty */ - st.nodeType = leafNode - st.key = key[st.keyOffset:] + st.typ = leafNode + st.key = key st.val = value case hashedNode: panic("trying to insert into hash") @@ -363,151 +328,150 @@ func (st *StackTrie) insert(key, value []byte) { // hash() hashes the node 'st' and converts it into 'hashedNode', if possible. // Possible outcomes: // 1. The rlp-encoded value was >= 32 bytes: -// - Then the 32-byte `hash` will be accessible in `st.val`. -// - And the 'st.type' will be 'hashedNode' +// - Then the 32-byte `hash` will be accessible in `st.val`. +// - And the 'st.type' will be 'hashedNode' +// // 2. The rlp-encoded value was < 32 bytes -// - Then the <32 byte rlp-encoded value will be accessible in 'st.val'. -// - And the 'st.type' will be 'hashedNode' AGAIN +// - Then the <32 byte rlp-encoded value will be accessible in 'st.val'. +// - And the 'st.type' will be 'hashedNode' AGAIN // // This method will also: // set 'st.type' to hashedNode // clear 'st.key' -func (st *StackTrie) hash() { +func (t *StackTrie) hash(st *stNode, path []byte) { /* Shortcut if node is already hashed */ - if st.nodeType == hashedNode { + if st.typ == hashedNode { return } // The 'hasher' is taken from a pool, but we don't actually // claim an instance until all children are done with their hashing, // and we actually need one - var h *hasher - switch st.nodeType { + var ( + blob []byte // RLP-encoded node blob + internal [][]byte // List of node paths covered by the extension node + ) + + switch st.typ { case branchNode: - var nodes [17]node + var node fullNode for i, child := range st.children { if child == nil { - nodes[i] = nilValueNode + node.Children[i] = nilValueNode continue } - child.hash() + t.hash(child, append(path, byte(i))) if len(child.val) < 32 { - nodes[i] = rawNode(child.val) + node.Children[i] = rawNode(child.val) } else { - nodes[i] = hashNode(child.val) + node.Children[i] = hashNode(child.val) } - st.children[i] = nil // Reclaim mem from subtree - returnToPool(child) - } - nodes[16] = nilValueNode - h = newHasher(false) - defer returnHasherToPool(h) - h.tmp.Reset() - if err := rlp.Encode(&h.tmp, nodes); err != nil { - panic(err) + st.children[i] = nil // Reclaim mem from subtree + stPool.Put(child.reset()) // Release child back to pool. } + + node.encode(t.h.encbuf) + blob = t.h.encodedBytes() case extNode: - st.children[0].hash() - h = newHasher(false) - defer returnHasherToPool(h) - h.tmp.Reset() - var valuenode node + // recursively hash and commit child as the first step + t.hash(st.children[0], append(path, st.key...)) + + // Collect the path of internal nodes between shortNode and its **in disk** + // child. This is essential in the case of path mode scheme to avoid leaving + // danging nodes within the range of this internal path on disk, which would + // break the guarantee for state healing. + if len(st.children[0].val) >= 32 && t.options.Cleaner != nil { + for i := 1; i < len(st.key); i++ { + internal = append(internal, append(path, st.key[:i]...)) + } + } + // encode the extension node + sz := hexToCompactInPlace(st.key) + n := shortNode{Key: st.key[:sz]} + if len(st.children[0].val) < 32 { - valuenode = rawNode(st.children[0].val) + n.Val = rawNode(st.children[0].val) } else { - valuenode = hashNode(st.children[0].val) - } - n := struct { - Key []byte - Val node - }{ - Key: hexToCompact(st.key), - Val: valuenode, + n.Val = hashNode(st.children[0].val) } - if err := rlp.Encode(&h.tmp, n); err != nil { - panic(err) - } - returnToPool(st.children[0]) - st.children[0] = nil // Reclaim mem from subtree + + n.encode(t.h.encbuf) + blob = t.h.encodedBytes() + + stPool.Put(st.children[0].reset()) // Release child back to pool. + st.children[0] = nil // Reclaim mem from subtree case leafNode: - h = newHasher(false) - defer returnHasherToPool(h) - h.tmp.Reset() + st.key = append(st.key, byte(16)) sz := hexToCompactInPlace(st.key) - n := [][]byte{st.key[:sz], st.val} - if err := rlp.Encode(&h.tmp, n); err != nil { - panic(err) - } + n := shortNode{Key: st.key[:sz], Val: valueNode(st.val)} + + n.encode(t.h.encbuf) + blob = t.h.encodedBytes() case emptyNode: st.val = emptyRoot.Bytes() st.key = st.key[:0] - st.nodeType = hashedNode + st.typ = hashedNode return default: panic("Invalid node type") } st.key = st.key[:0] - st.nodeType = hashedNode - if len(h.tmp) < 32 { - st.val = common.CopyBytes(h.tmp) + st.typ = hashedNode + // Skip committing the non-root node if the size is smaller than 32 bytes. + if len(blob) < 32 && len(path) > 0 { + // If rlp-encoded value was < 32 bytes, then val point directly to the rlp-encoded value + st.val = common.CopyBytes(blob) + return + } + + st.val = t.h.hashData(blob) + + // Short circuit if the stack trie is not configured for writing. + if t.options.Writer == nil { + return + } + // Skip committing if the node is on the left boundary and stackTrie is + // configured to filter the boundary. + if t.options.SkipLeftBoundary && bytes.HasPrefix(t.first, path) { + if t.options.boundaryGauge != nil { + t.options.boundaryGauge.Inc(1) + } + return + } + // Skip committing if the node is on the right boundary and stackTrie is + // configured to filter the boundary. + if t.options.SkipRightBoundary && bytes.HasPrefix(t.last, path) { + if t.options.boundaryGauge != nil { + t.options.boundaryGauge.Inc(1) + } return } - // Write the hash to the 'val'. We allocate a new val here to not mutate - // input values - st.val = make([]byte, 32) - h.sha.Reset() - h.sha.Write(h.tmp) - h.sha.Read(st.val) - if st.db != nil { - // TODO! Is it safe to Put the slice here? - // Do all db implementations copy the value provided? - st.db.Put(st.val, h.tmp) + // Clean up the internal dangling nodes covered by the extension node. + // This should be done before writing the node to adhere to the committing + // order from bottom to top. + for _, path := range internal { + t.options.Cleaner(path) } + t.options.Writer(path, common.BytesToHash(st.val), blob) } -// Hash returns the hash of the current node -func (st *StackTrie) Hash() (h common.Hash) { - st.hash() - if len(st.val) != 32 { - // If the node's RLP isn't 32 bytes long, the node will not - // be hashed, and instead contain the rlp-encoding of the - // node. For the top level node, we need to force the hashing. - ret := make([]byte, 32) - h := newHasher(false) - defer returnHasherToPool(h) - h.sha.Reset() - h.sha.Write(st.val) - h.sha.Read(ret) - return common.BytesToHash(ret) - } - return common.BytesToHash(st.val) +// Hash will firstly hash the entire trie if it's still not hashed and then commit +// all nodes to the associated database. Actually most of the trie nodes have been +// committed already. The main purpose here is to commit the nodes on right boundary. +// +// For stack trie, Hash and Commit are functionally identical +func (t *StackTrie) Hash() (h common.Hash) { + n := t.root + t.hash(n, nil) + return common.BytesToHash(n.val) } -// Commit will firstly hash the entrie trie if it's still not hashed -// and then commit all nodes to the associated database. Actually most -// of the trie nodes MAY have been committed already. The main purpose -// here is to commit the root node. +// Commit will firstly hash the entire trie if it's still not hashed and then commit +// all nodes to the associated database. Actually most of the trie nodes have been +// committed already. The main purpose here is to commit the nodes on right boundary. // -// The associated database is expected, otherwise the whole commit -// functionality should be disabled. -func (st *StackTrie) Commit() (common.Hash, error) { - if st.db == nil { - return common.Hash{}, ErrCommitDisabled - } - st.hash() - if len(st.val) != 32 { - // If the node's RLP isn't 32 bytes long, the node will not - // be hashed (and committed), and instead contain the rlp-encoding of the - // node. For the top level node, we need to force the hashing+commit. - ret := make([]byte, 32) - h := newHasher(false) - defer returnHasherToPool(h) - h.sha.Reset() - h.sha.Write(st.val) - h.sha.Read(ret) - st.db.Put(ret, st.val) - return common.BytesToHash(ret), nil - } - return common.BytesToHash(st.val), nil +// For stack trie, Hash and Commit are functionally identical. +func (t *StackTrie) Commit() common.Hash { + return t.Hash() } diff --git a/trie/stacktrie_test.go b/trie/stacktrie_test.go index fb39e42525..6f93c1f53a 100644 --- a/trie/stacktrie_test.go +++ b/trie/stacktrie_test.go @@ -19,11 +19,14 @@ package trie import ( "bytes" "math/big" + "math/rand" "testing" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/crypto" - "github.com/ethereum/go-ethereum/ethdb/memorydb" + "github.com/ethereum/go-ethereum/trie/testutil" + "golang.org/x/exp/slices" ) func TestStackTrieInsertAndHash(t *testing.T) { @@ -166,12 +169,11 @@ func TestStackTrieInsertAndHash(t *testing.T) { {"13aa", "x___________________________3", "ff0dc70ce2e5db90ee42a4c2ad12139596b890e90eb4e16526ab38fa465b35cf"}, }, } - st := NewStackTrie(nil) for i, test := range tests { // The StackTrie does not allow Insert(), Hash(), Insert(), ... // so we will create new trie for every sequence length of inserts. for l := 1; l <= len(test); l++ { - st.Reset() + st := NewStackTrie(nil) for j := 0; j < l; j++ { kv := &test[j] if err := st.TryUpdate(common.FromHex(kv.K), []byte(kv.V)); err != nil { @@ -188,7 +190,8 @@ func TestStackTrieInsertAndHash(t *testing.T) { func TestSizeBug(t *testing.T) { st := NewStackTrie(nil) - nt, _ := New(common.Hash{}, NewDatabase(memorydb.New())) + + nt := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) leaf := common.FromHex("290decd9548b62a8d60345a988386fc84ba6bc95484008f6362f93160ef3e563") value := common.FromHex("94cf40d0d2b44f2b66e07cace1372ca42b73cf21a3") @@ -203,7 +206,7 @@ func TestSizeBug(t *testing.T) { func TestEmptyBug(t *testing.T) { st := NewStackTrie(nil) - nt, _ := New(common.Hash{}, NewDatabase(memorydb.New())) + nt := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) //leaf := common.FromHex("290decd9548b62a8d60345a988386fc84ba6bc95484008f6362f93160ef3e563") //value := common.FromHex("94cf40d0d2b44f2b66e07cace1372ca42b73cf21a3") @@ -229,7 +232,7 @@ func TestEmptyBug(t *testing.T) { func TestValLength56(t *testing.T) { st := NewStackTrie(nil) - nt, _ := New(common.Hash{}, NewDatabase(memorydb.New())) + nt := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) //leaf := common.FromHex("290decd9548b62a8d60345a988386fc84ba6bc95484008f6362f93160ef3e563") //value := common.FromHex("94cf40d0d2b44f2b66e07cace1372ca42b73cf21a3") @@ -254,7 +257,8 @@ func TestValLength56(t *testing.T) { // which causes a lot of node-within-node. This case was found via fuzzing. func TestUpdateSmallNodes(t *testing.T) { st := NewStackTrie(nil) - nt, _ := New(common.Hash{}, NewDatabase(memorydb.New())) + nt := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) + kvs := []struct { K string V string @@ -282,7 +286,8 @@ func TestUpdateSmallNodes(t *testing.T) { func TestUpdateVariableKeys(t *testing.T) { t.SkipNow() st := NewStackTrie(nil) - nt, _ := New(common.Hash{}, NewDatabase(memorydb.New())) + nt := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) + kvs := []struct { K string V string @@ -347,47 +352,86 @@ func TestStacktrieNotModifyValues(t *testing.T) { } } -// TestStacktrieSerialization tests that the stacktrie works well if we -// serialize/unserialize it a lot -func TestStacktrieSerialization(t *testing.T) { +func buildPartialTree(entries []*kv, t *testing.T) map[string]common.Hash { var ( - st = NewStackTrie(nil) - nt, _ = New(common.Hash{}, NewDatabase(memorydb.New())) - keyB = big.NewInt(1) - keyDelta = big.NewInt(1) - vals [][]byte - keys [][]byte + options = NewStackTrieOptions() + nodes = make(map[string]common.Hash) ) - getValue := func(i int) []byte { - if i%2 == 0 { // large - return crypto.Keccak256(big.NewInt(int64(i)).Bytes()) - } else { //small - return big.NewInt(int64(i)).Bytes() + var ( + first int + last = len(entries) - 1 + + noLeft bool + noRight bool + ) + // Enter split mode if there are at least two elements + if rand.Intn(5) != 0 { + for { + first = rand.Intn(len(entries)) + last = rand.Intn(len(entries)) + if first <= last { + break + } + } + if first != 0 { + noLeft = true + } + if last != len(entries)-1 { + noRight = true } } - for i := 0; i < 10; i++ { - vals = append(vals, getValue(i)) - keys = append(keys, common.BigToHash(keyB).Bytes()) - keyB = keyB.Add(keyB, keyDelta) - keyDelta.Add(keyDelta, common.Big1) - } - for i, k := range keys { - nt.TryUpdate(k, common.CopyBytes(vals[i])) + options = options.WithSkipBoundary(noLeft, noRight, nil) + options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + nodes[string(path)] = hash + }) + tr := NewStackTrie(options) + + for i := first; i <= last; i++ { + tr.TryUpdate(entries[i].k, entries[i].v) } + tr.Commit() + return nodes +} + +func TestPartialStackTrie(t *testing.T) { + for round := 0; round < 100; round++ { + var ( + n = rand.Intn(100) + 1 + entries []*kv + ) + for i := 0; i < n; i++ { + var val []byte + if rand.Intn(3) == 0 { + val = testutil.RandBytes(3) + } else { + val = testutil.RandBytes(32) + } + entries = append(entries, &kv{ + k: testutil.RandBytes(32), + v: val, + }) + } + slices.SortFunc(entries, (*kv).cmp) - for i, k := range keys { - blob, err := st.MarshalBinary() - if err != nil { - t.Fatal(err) + var ( + nodes = make(map[string]common.Hash) + options = NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) { + nodes[string(path)] = hash + }) + ) + tr := NewStackTrie(options) + + for i := 0; i < len(entries); i++ { + tr.TryUpdate(entries[i].k, entries[i].v) } - newSt, err := NewFromBinary(blob, nil) - if err != nil { - t.Fatal(err) + tr.Commit() + + for j := 0; j < 100; j++ { + for path, hash := range buildPartialTree(entries, t) { + if nodes[path] != hash { + t.Errorf("%v, want %x, got %x", []byte(path), nodes[path], hash) + } + } } - st = newSt - st.TryUpdate(k, common.CopyBytes(vals[i])) - } - if have, want := st.Hash(), nt.Hash(); have != want { - t.Fatalf("have %#x want %#x", have, want) } } diff --git a/trie/sync.go b/trie/sync.go index 820ef29e7d..f7f45d3ae1 100644 --- a/trie/sync.go +++ b/trie/sync.go @@ -19,11 +19,14 @@ package trie import ( "errors" "fmt" + "sync" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/prque" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/metrics" ) // ErrNotRequested is returned by the trie sync when it's requested to process a @@ -39,18 +42,27 @@ var ErrAlreadyProcessed = errors.New("already processed") // memory if the node was configured with a significant number of peers. const maxFetchesPerDepth = 16384 -// request represents a scheduled or already in-flight state retrieval request. -type request struct { - path []byte // Merkle path leading to this node for prioritization - hash common.Hash // Hash of the node data content to retrieve - data []byte // Data content of the node, cached until all subtrees complete - code bool // Whether this is a code entry +var ( + // deletionGauge is the metric to track how many trie node deletions + // are performed in total during the sync process. + deletionGauge = metrics.NewRegisteredGauge("trie/sync/delete", nil) - parents []*request // Parent state nodes referencing this entry (notify all upon completion) - deps int // Number of dependencies before allowed to commit this node + // lookupGauge is the metric to track how many trie node lookups are + // performed to determine if node needs to be deleted. + lookupGauge = metrics.NewRegisteredGauge("trie/sync/lookup", nil) - callback LeafCallback // Callback to invoke if a leaf node it reached on this branch -} + // accountNodeSyncedGauge is the metric to track how many account trie + // node are written during the sync. + accountNodeSyncedGauge = metrics.NewRegisteredGauge("trie/sync/nodes/account", nil) + + // storageNodeSyncedGauge is the metric to track how many account trie + // node are written during the sync. + storageNodeSyncedGauge = metrics.NewRegisteredGauge("trie/sync/nodes/storage", nil) + + // codeSyncedGauge is the metric to track how many contract codes are + // written during the sync. + codeSyncedGauge = metrics.NewRegisteredGauge("trie/sync/codes", nil) +) // SyncPath is a path tuple identifying a particular trie node either in a single // trie (account) or a layered trie (account -> storage). @@ -85,109 +97,192 @@ func NewSyncPath(path []byte) SyncPath { return SyncPath{hexToKeybytes(path[:64]), hexToCompact(path[64:])} } -// SyncResult is a response with requested data along with it's hash. -type SyncResult struct { - Hash common.Hash // Hash of the originally unknown trie node - Data []byte // Data content of the retrieved node +// nodeRequest represents a scheduled or already in-flight trie node retrieval request. +type nodeRequest struct { + hash common.Hash // Hash of the trie node to retrieve + path []byte // Merkle path leading to this node for prioritization + data []byte // Data content of the node, cached until all subtrees complete + + parent *nodeRequest // Parent state node referencing this entry + deps int // Number of dependencies before allowed to commit this node + callback LeafCallback // Callback to invoke if a leaf node it reached on this branch +} + +// codeRequest represents a scheduled or already in-flight bytecode retrieval request. +type codeRequest struct { + hash common.Hash // Hash of the contract bytecode to retrieve + path []byte // Merkle path leading to this node for prioritization + data []byte // Data content of the node, cached until all subtrees complete + parents []*nodeRequest // Parent state nodes referencing this entry (notify all upon completion) +} + +// NodeSyncResult is a response with requested trie node along with its node path. +type NodeSyncResult struct { + Path string // Path of the originally unknown trie node + Data []byte // Data content of the retrieved trie node +} + +// CodeSyncResult is a response with requested bytecode along with its hash. +type CodeSyncResult struct { + Hash common.Hash // Hash the originally unknown bytecode + Data []byte // Data content of the retrieved bytecode +} + +// nodeOp represents an operation upon the trie node. It can either represent a +// deletion to the specific node or a node write for persisting retrieved node. +type nodeOp struct { + owner common.Hash // identifier of the trie (empty for account trie) + path []byte // path from the root to the specified node. + blob []byte // the content of the node (nil for deletion) + hash common.Hash // hash of the node content (empty for node deletion) +} + +// isDelete indicates if the operation is a database deletion. +func (op *nodeOp) isDelete() bool { + return len(op.blob) == 0 } // syncMemBatch is an in-memory buffer of successfully downloaded but not yet // persisted data items. type syncMemBatch struct { - nodes map[common.Hash][]byte // In-memory membatch of recently completed nodes - codes map[common.Hash][]byte // In-memory membatch of recently completed codes + scheme string // State scheme identifier + nodes []nodeOp // In-memory batch of recently completed/deleted nodes + codes map[common.Hash][]byte // In-memory membatch of recently completed codes } // newSyncMemBatch allocates a new memory-buffer for not-yet persisted trie nodes. -func newSyncMemBatch() *syncMemBatch { +func newSyncMemBatch(scheme string) *syncMemBatch { return &syncMemBatch{ - nodes: make(map[common.Hash][]byte), - codes: make(map[common.Hash][]byte), + scheme: scheme, + codes: make(map[common.Hash][]byte), } } -// hasNode reports the trie node with specific hash is already cached. -func (batch *syncMemBatch) hasNode(hash common.Hash) bool { - _, ok := batch.nodes[hash] - return ok -} - // hasCode reports the contract code with specific hash is already cached. func (batch *syncMemBatch) hasCode(hash common.Hash) bool { _, ok := batch.codes[hash] return ok } +// addCode caches a contract code database write operation. +func (batch *syncMemBatch) addCode(hash common.Hash, code []byte) { + batch.codes[hash] = code +} + +// addNode caches a node database write operation. +func (batch *syncMemBatch) addNode(owner common.Hash, path []byte, blob []byte, hash common.Hash) { + batch.nodes = append(batch.nodes, nodeOp{ + owner: owner, + path: path, + blob: blob, + hash: hash, + }) +} + +// delNode caches a node database delete operation. +func (batch *syncMemBatch) delNode(owner common.Hash, path []byte) { + if batch.scheme != rawdb.PathScheme { + log.Error("Unexpected node deletion", "owner", owner, "path", path, "scheme", batch.scheme) + return // deletion is not supported in hash mode. + } + batch.nodes = append(batch.nodes, nodeOp{ + owner: owner, + path: path, + }) +} + // Sync is the main state trie synchronisation scheduler, which provides yet // unknown trie hashes to retrieve, accepts node data associated with said hashes // and reconstructs the trie step by step until all is done. type Sync struct { - database ethdb.KeyValueReader // Persistent database to check for existing entries - membatch *syncMemBatch // Memory buffer to avoid frequent database writes - nodeReqs map[common.Hash]*request // Pending requests pertaining to a trie node hash - codeReqs map[common.Hash]*request // Pending requests pertaining to a code hash - queue *prque.Prque // Priority queue with the pending requests - fetches map[int]int // Number of active fetches per trie node depth - bloom *SyncBloom // Bloom filter for fast state existence checks + scheme string // Node scheme descriptor used in database. + database ethdb.KeyValueReader // Persistent database to check for existing entries + membatch *syncMemBatch // Memory buffer to avoid frequent database writes + nodeReqs map[string]*nodeRequest // Pending requests pertaining to a trie node path + codeReqs map[common.Hash]*codeRequest // Pending requests pertaining to a code hash + queue *prque.Prque // Priority queue with the pending requests + fetches map[int]int // Number of active fetches per trie node depth + bloom *SyncBloom // Bloom filter for fast state existence checks } +// LeafCallback is a callback type invoked when a trie operation reaches a leaf +// node. +// +// The keys is a path tuple identifying a particular trie node either in a single +// trie (account) or a layered trie (account -> storage). Each key in the tuple +// is in the raw format(32 bytes). +// +// The path is a composite hexary path identifying the trie node. All the key +// bytes are converted to the hexary nibbles and composited with the parent path +// if the trie node is in a layered trie. +// +// It's used by state sync and commit to allow handling external references +// between account and storage tries. And also it's used in the state healing +// for extracting the raw states(leaf nodes) with corresponding paths. +type LeafCallback func(keys [][]byte, path []byte, leaf []byte, parent common.Hash, parentPath []byte) error + // NewSync creates a new trie data download scheduler. -func NewSync(root common.Hash, database ethdb.KeyValueReader, callback LeafCallback, bloom *SyncBloom) *Sync { +func NewSync(root common.Hash, database ethdb.KeyValueReader, callback LeafCallback, bloom *SyncBloom, scheme string) *Sync { ts := &Sync{ + scheme: scheme, database: database, - membatch: newSyncMemBatch(), - nodeReqs: make(map[common.Hash]*request), - codeReqs: make(map[common.Hash]*request), + membatch: newSyncMemBatch(scheme), + nodeReqs: make(map[string]*nodeRequest), + codeReqs: make(map[common.Hash]*codeRequest), queue: prque.New(nil), fetches: make(map[int]int), bloom: bloom, } - ts.AddSubTrie(root, nil, common.Hash{}, callback) + ts.AddSubTrie(root, nil, common.Hash{}, nil, callback) return ts } -// AddSubTrie registers a new trie to the sync code, rooted at the designated parent. -func (s *Sync) AddSubTrie(root common.Hash, path []byte, parent common.Hash, callback LeafCallback) { +// AddSubTrie registers a new trie to the sync code, rooted at the designated +// parent for completion tracking. The given path is a unique node path in +// hex format and contain all the parent path if it's layered trie node. +func (s *Sync) AddSubTrie(root common.Hash, path []byte, parent common.Hash, parentPath []byte, callback LeafCallback) { // Short circuit if the trie is empty or already known if root == emptyRoot { return } - if s.membatch.hasNode(root) { - return - } if s.bloom == nil || s.bloom.Contains(root[:]) { // Bloom filter says this might be a duplicate, double check. // If database says yes, then at least the trie node is present // and we hold the assumption that it's NOT legacy contract code. - blob := rawdb.ReadTrieNode(s.database, root) - if len(blob) > 0 { + owner, inner := ResolvePath(path) + exist, inconsistent := s.hasNode(owner, inner, root) + if exist { + // The entire subtrie is already present in the database. return + } else if inconsistent { + // There is a pre-existing node with the wrong hash in DB, remove it. + s.membatch.delNode(owner, inner) } // False positive, bump fault meter bloomFaultMeter.Mark(1) } // Assemble the new sub-trie sync request - req := &request{ - path: path, + req := &nodeRequest{ hash: root, + path: path, callback: callback, } // If this sub-trie has a designated parent, link them together if parent != (common.Hash{}) { - ancestor := s.nodeReqs[parent] + ancestor := s.nodeReqs[string(parentPath)] if ancestor == nil { panic(fmt.Sprintf("sub-trie ancestor not found: %x", parent)) } ancestor.deps++ - req.parents = append(req.parents, ancestor) + req.parent = ancestor } - s.schedule(req) + s.scheduleNodeRequest(req) } // AddCodeEntry schedules the direct retrieval of a contract code that should not // be interpreted as a trie node, but rather accepted and stored into the database // as is. -func (s *Sync) AddCodeEntry(hash common.Hash, path []byte, parent common.Hash) { +func (s *Sync) AddCodeEntry(hash common.Hash, path []byte, parent common.Hash, parentPath []byte) { // Short circuit if the entry is empty or already known if hash == emptyState { return @@ -209,30 +304,29 @@ func (s *Sync) AddCodeEntry(hash common.Hash, path []byte, parent common.Hash) { bloomFaultMeter.Mark(1) } // Assemble the new sub-trie sync request - req := &request{ + req := &codeRequest{ path: path, hash: hash, - code: true, } // If this sub-trie has a designated parent, link them together if parent != (common.Hash{}) { - ancestor := s.nodeReqs[parent] // the parent of codereq can ONLY be nodereq + ancestor := s.nodeReqs[string(parentPath)] // the parent of codereq can ONLY be nodereq if ancestor == nil { panic(fmt.Sprintf("raw-entry ancestor not found: %x", parent)) } ancestor.deps++ req.parents = append(req.parents, ancestor) } - s.schedule(req) + s.scheduleCodeRequest(req) } // Missing retrieves the known missing nodes from the trie for retrieval. To aid // both eth/6x style fast sync and snap/1x style state sync, the paths of trie // nodes are returned too, as well as separate hash list for codes. -func (s *Sync) Missing(max int) (nodes []common.Hash, paths []SyncPath, codes []common.Hash) { +func (s *Sync) Missing(max int) ([]string, []common.Hash, []common.Hash) { var ( + nodePaths []string nodeHashes []common.Hash - nodePaths []SyncPath codeHashes []common.Hash ) for !s.queue.Empty() && (max == 0 || len(nodeHashes)+len(codeHashes) < max) { @@ -248,83 +342,123 @@ func (s *Sync) Missing(max int) (nodes []common.Hash, paths []SyncPath, codes [] s.queue.Pop() s.fetches[depth]++ - hash := item.(common.Hash) - if req, ok := s.nodeReqs[hash]; ok { - nodeHashes = append(nodeHashes, hash) - nodePaths = append(nodePaths, NewSyncPath(req.path)) - } else { - codeHashes = append(codeHashes, hash) + switch item := item.(type) { + case common.Hash: + codeHashes = append(codeHashes, item) + case string: + req, ok := s.nodeReqs[item] + if !ok { + log.Error("Missing node request", "path", item) + continue // System very wrong, shouldn't happen + } + nodePaths = append(nodePaths, item) + nodeHashes = append(nodeHashes, req.hash) } } - return nodeHashes, nodePaths, codeHashes + return nodePaths, nodeHashes, codeHashes } -// Process injects the received data for requested item. Note it can +// ProcessCode injects the received data for requested item. Note it can // happpen that the single response commits two pending requests(e.g. // there are two requests one for code and one for node but the hash // is same). In this case the second response for the same hash will // be treated as "non-requested" item or "already-processed" item but // there is no downside. -func (s *Sync) Process(result SyncResult) error { - // If the item was not requested either for code or node, bail out - if s.nodeReqs[result.Hash] == nil && s.codeReqs[result.Hash] == nil { +func (s *Sync) ProcessCode(result CodeSyncResult) error { + // If the code was not requested or it's already processed, bail out + req := s.codeReqs[result.Hash] + if req == nil { return ErrNotRequested } - // There is an pending code request for this data, commit directly - var filled bool - if req := s.codeReqs[result.Hash]; req != nil && req.data == nil { - filled = true - req.data = result.Data - s.commit(req) - } - // There is an pending node request for this data, fill it. - if req := s.nodeReqs[result.Hash]; req != nil && req.data == nil { - filled = true - // Decode the node data content and update the request - node, err := decodeNode(result.Hash[:], result.Data) - if err != nil { - return err - } - req.data = result.Data + if req.data != nil { + return ErrAlreadyProcessed + } + req.data = result.Data + return s.commitCodeRequest(req) +} - // Create and schedule a request for all the children nodes - requests, err := s.children(req, node) - if err != nil { - return err - } - if len(requests) == 0 && req.deps == 0 { - s.commit(req) - } else { - req.deps += len(requests) - for _, child := range requests { - s.schedule(child) - } - } +// ProcessNode injects the received data for requested item. Note it can +// happen that the single response commits two pending requests(e.g. +// there are two requests one for code and one for node but the hash +// is same). In this case the second response for the same hash will +// be treated as "non-requested" item or "already-processed" item but +// there is no downside. +func (s *Sync) ProcessNode(result NodeSyncResult) error { + // If the trie node was not requested or it's already processed, bail out + req := s.nodeReqs[result.Path] + if req == nil { + return ErrNotRequested } - if !filled { + if req.data != nil { return ErrAlreadyProcessed } + // Decode the node data content and update the request + node, err := decodeNode(req.hash.Bytes(), result.Data) + if err != nil { + return err + } + req.data = result.Data + + // Create and schedule a request for all the children nodes + requests, err := s.children(req, node) + if err != nil { + return err + } + if len(requests) == 0 && req.deps == 0 { + s.commitNodeRequest(req) + } else { + req.deps += len(requests) + for _, child := range requests { + s.scheduleNodeRequest(child) + } + } return nil } // Commit flushes the data stored in the internal membatch out to persistent -// storage, returning any occurred error. +// storage, returning any occurred error. The whole data set will be flushed +// in an atomic database batch. func (s *Sync) Commit(dbw ethdb.Batch) error { - // Dump the membatch into a database dbw - for key, value := range s.membatch.nodes { - rawdb.WriteTrieNode(dbw, key, value) + var ( + account int + storage int + ) + // Flush the pending node writes into database batch. + for _, op := range s.membatch.nodes { + if op.isDelete() { + // node deletion is only supported in path mode. + if op.owner == (common.Hash{}) { + rawdb.DeleteAccountTrieNode(dbw, op.path) + } else { + rawdb.DeleteStorageTrieNode(dbw, op.owner, op.path) + } + deletionGauge.Inc(1) + } else { + if op.owner == (common.Hash{}) { + account += 1 + } else { + storage += 1 + } + rawdb.WriteTrieNode(dbw, op.owner, op.path, op.hash, op.blob, s.scheme) + } + hash := op.hash if s.bloom != nil { - s.bloom.Add(key[:]) + s.bloom.Add(hash[:]) } } - for key, value := range s.membatch.codes { - rawdb.WriteCode(dbw, key, value) + accountNodeSyncedGauge.Inc(int64(account)) + storageNodeSyncedGauge.Inc(int64(storage)) + + // Flush the pending code writes into database batch. + for hash, value := range s.membatch.codes { + rawdb.WriteCode(dbw, hash, value) if s.bloom != nil { - s.bloom.Add(key[:]) + s.bloom.Add(hash[:]) } } - // Drop the membatch data and return - s.membatch = newSyncMemBatch() + codeSyncedGauge.Inc(int64(len(s.membatch.codes))) + + s.membatch = newSyncMemBatch(s.scheme) // reset the batch return nil } @@ -336,23 +470,31 @@ func (s *Sync) Pending() int { // schedule inserts a new state retrieval request into the fetch queue. If there // is already a pending request for this node, the new request will be discarded // and only a parent reference added to the old one. -func (s *Sync) schedule(req *request) { - var reqset = s.nodeReqs - if req.code { - reqset = s.codeReqs +func (s *Sync) scheduleNodeRequest(req *nodeRequest) { + s.nodeReqs[string(req.path)] = req + + // Schedule the request for future retrieval. This queue is shared + // by both node requests and code requests. + prio := int64(len(req.path)) << 56 // depth >= 128 will never happen, storage leaves will be included in their parents + for i := 0; i < 14 && i < len(req.path); i++ { + prio |= int64(15-req.path[i]) << (52 - i*4) // 15-nibble => lexicographic order } + s.queue.Push(string(req.path), prio) +} + +// schedule inserts a new state retrieval request into the fetch queue. If there +// is already a pending request for this node, the new request will be discarded +// and only a parent reference added to the old one. +func (s *Sync) scheduleCodeRequest(req *codeRequest) { // If we're already requesting this node, add a new reference and stop - if old, ok := reqset[req.hash]; ok { + if old, ok := s.codeReqs[req.hash]; ok { old.parents = append(old.parents, req.parents...) return } - reqset[req.hash] = req + s.codeReqs[req.hash] = req // Schedule the request for future retrieval. This queue is shared - // by both node requests and code requests. It can happen that there - // is a trie node and code has same hash. In this case two elements - // with same hash and same or different depth will be pushed. But it's - // ok the worst case is the second response will be treated as duplicated. + // by both node requests and code requests. prio := int64(len(req.path)) << 56 // depth >= 128 will never happen, storage leaves will be included in their parents for i := 0; i < 14 && i < len(req.path); i++ { prio |= int64(15-req.path[i]) << (52 - i*4) // 15-nibble => lexicographic order @@ -362,7 +504,7 @@ func (s *Sync) schedule(req *request) { // children retrieves all the missing children of a state trie entry for future // retrieval scheduling. -func (s *Sync) children(req *request, object node) ([]*request, error) { +func (s *Sync) children(req *nodeRequest, object node) ([]*nodeRequest, error) { // Gather all the children of the node, irrelevant whether known or not type child struct { path []byte @@ -380,6 +522,41 @@ func (s *Sync) children(req *request, object node) ([]*request, error) { node: node.Val, path: append(append([]byte(nil), req.path...), key...), }} + // Mark all internal nodes between shortNode and its **in disk** + // child as invalid. This is essential in the case of path mode + // scheme; otherwise, state healing might overwrite existing child + // nodes silently while leaving a dangling parent node within the + // range of this internal path on disk and the persistent state + // ends up with a very weird situation that nodes on the same path + // are not inconsistent while they all present in disk. This property + // would break the guarantee for state healing. + // + // While it's possible for this shortNode to overwrite a previously + // existing full node, the other branches of the fullNode can be + // retained as they are not accessible with the new shortNode, and + // also the whole sub-trie is still untouched and complete. + // + // This step is only necessary for path mode, as there is no deletion + // in hash mode at all. + if _, ok := node.Val.(hashNode); ok && s.scheme == rawdb.PathScheme { + owner, inner := ResolvePath(req.path) + for i := 1; i < len(key); i++ { + // While checking for a non-existent item in Pebble can be less efficient + // without a bloom filter, the relatively low frequency of lookups makes + // the performance impact negligible. + var exists bool + if owner == (common.Hash{}) { + exists = rawdb.ExistsAccountTrieNode(s.database, append(inner, key[:i]...)) + } else { + exists = rawdb.ExistsStorageTrieNode(s.database, owner, append(inner, key[:i]...)) + } + if exists { + s.membatch.delNode(owner, append(inner, key[:i]...)) + log.Debug("Detected dangling node", "owner", owner, "path", append(inner, key[:i]...)) + } + } + lookupGauge.Inc(int64(len(key) - 1)) + } case *fullNode: for i := 0; i < 17; i++ { if node.Children[i] != nil { @@ -393,7 +570,8 @@ func (s *Sync) children(req *request, object node) ([]*request, error) { panic(fmt.Sprintf("unknown node: %+v", node)) } // Iterate over the children, and request all unknown ones - requests := make([]*request, 0, len(children)) + requests := make([]*nodeRequest, 0, len(children)) + var batchMu sync.Mutex for _, child := range children { // Notify any external watcher of a new key/value node if req.callback != nil { @@ -405,33 +583,39 @@ func (s *Sync) children(req *request, object node) ([]*request, error) { paths = append(paths, hexToKeybytes(child.path[:2*common.HashLength])) paths = append(paths, hexToKeybytes(child.path[2*common.HashLength:])) } - if err := req.callback(paths, child.path, node, req.hash); err != nil { + if err := req.callback(paths, child.path, node, req.hash, req.path); err != nil { return nil, err } } } - // If the child references another node, resolve or schedule + // If the child references another node, resolve or schedule. + // We check all children concurrently. if node, ok := (child.node).(hashNode); ok { - // Try to resolve the node from the local database + path := child.path hash := common.BytesToHash(node) - if s.membatch.hasNode(hash) { - continue - } if s.bloom == nil || s.bloom.Contains(node) { // Bloom filter says this might be a duplicate, double check. // If database says yes, then at least the trie node is present // and we hold the assumption that it's NOT legacy contract code. - if blob := rawdb.ReadTrieNode(s.database, hash); len(blob) > 0 { + owner, inner := ResolvePath(path) + exist, inconsistent := s.hasNode(owner, inner, hash) + if exist { continue + } else if inconsistent { + // There is a pre-existing node with the wrong hash in DB, remove it. + batchMu.Lock() + s.membatch.delNode(owner, inner) + batchMu.Unlock() } + // False positive, bump fault meter bloomFaultMeter.Mark(1) } // Locally unknown node, schedule for retrieval - requests = append(requests, &request{ - path: child.path, + requests = append(requests, &nodeRequest{ + path: path, hash: hash, - parents: []*request{req}, + parent: req, callback: req.callback, }) } @@ -442,25 +626,79 @@ func (s *Sync) children(req *request, object node) ([]*request, error) { // commit finalizes a retrieval request and stores it into the membatch. If any // of the referencing parent requests complete due to this commit, they are also // committed themselves. -func (s *Sync) commit(req *request) (err error) { +func (s *Sync) commitNodeRequest(req *nodeRequest) error { // Write the node content to the membatch - if req.code { - s.membatch.codes[req.hash] = req.data - delete(s.codeReqs, req.hash) - s.fetches[len(req.path)]-- - } else { - s.membatch.nodes[req.hash] = req.data - delete(s.nodeReqs, req.hash) - s.fetches[len(req.path)]-- + owner, path := ResolvePath(req.path) + s.membatch.addNode(owner, path, req.data, req.hash) + + // Removed the completed node request + delete(s.nodeReqs, string(req.path)) + s.fetches[len(req.path)]-- + + // Check parent for completion + if req.parent != nil { + req.parent.deps-- + if req.parent.deps == 0 { + if err := s.commitNodeRequest(req.parent); err != nil { + return err + } + } } + return nil +} + +// commit finalizes a retrieval request and stores it into the membatch. If any +// of the referencing parent requests complete due to this commit, they are also +// committed themselves. +func (s *Sync) commitCodeRequest(req *codeRequest) error { + // Write the node content to the membatch + s.membatch.addCode(req.hash, req.data) + + // Removed the completed code request + delete(s.codeReqs, req.hash) + s.fetches[len(req.path)]-- + // Check all parents for completion for _, parent := range req.parents { parent.deps-- if parent.deps == 0 { - if err := s.commit(parent); err != nil { + if err := s.commitNodeRequest(parent); err != nil { return err } } } return nil } + +// hasNode reports whether the specified trie node is present in the database. +// 'exists' is true when the node exists in the database and matches the given root +// hash. The 'inconsistent' return value is true when the node exists but does not +// match the expected hash. +func (s *Sync) hasNode(owner common.Hash, path []byte, hash common.Hash) (exists bool, inconsistent bool) { + // If node is running with hash scheme, check the presence with node hash. + if s.scheme == rawdb.HashScheme { + return rawdb.HasLegacyTrieNode(s.database, hash), false + } + // If node is running with path scheme, check the presence with node path. + var blob []byte + var dbHash common.Hash + if owner == (common.Hash{}) { + blob, dbHash = rawdb.ReadAccountTrieNode(s.database, path) + } else { + blob, dbHash = rawdb.ReadStorageTrieNode(s.database, owner, path) + } + exists = hash == dbHash + inconsistent = !exists && len(blob) != 0 + return exists, inconsistent +} + +// ResolvePath resolves the provided composite node path by separating the +// path in account trie if it's existent. +func ResolvePath(path []byte) (common.Hash, []byte) { + var owner common.Hash + if len(path) >= 2*common.HashLength { + owner = common.BytesToHash(hexToKeybytes(path[:2*common.HashLength])) + path = path[2*common.HashLength:] + } + return owner, path +} diff --git a/trie/sync_test.go b/trie/sync_test.go index cb3283875d..75eb5809a2 100644 --- a/trie/sync_test.go +++ b/trie/sync_test.go @@ -18,18 +18,25 @@ package trie import ( "bytes" + "fmt" "testing" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/ethdb/memorydb" + "github.com/ethereum/go-ethereum/trie/trienode" ) // makeTestTrie create a sample test trie to test node-wise reconstruction. -func makeTestTrie() (*Database, *SecureTrie, map[string][]byte) { +func makeTestTrie(scheme string) (ethdb.Database, *Database, *SecureTrie, map[string][]byte) { // Create an empty trie - triedb := NewDatabase(memorydb.New()) - trie, _ := NewSecure(common.Hash{}, triedb) + db := rawdb.NewMemoryDatabase() + + triedb := newTestDatabase(db, scheme) + trie, _ := NewSecure(TrieID(types.EmptyRootHash), triedb) // Fill it with some arbitrary data content := make(map[string][]byte) @@ -50,52 +57,98 @@ func makeTestTrie() (*Database, *SecureTrie, map[string][]byte) { trie.Update(key, val) } } - trie.Commit(nil) + root, nodes, err := trie.Commit(false) + if err != nil { + panic(fmt.Errorf("failed to commit trie: %v", err)) + } + if err := triedb.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil); err != nil { + panic(fmt.Errorf("failed to commit db %v", err)) + } + if err := triedb.Commit(root, false); err != nil { + panic(err) + } + + // Re-create the trie based on the new state + trie, _ = NewSecure(TrieID(root), triedb) - // Return the generated trie - return triedb, trie, content + return db, triedb, trie, content } // checkTrieContents cross references a reconstructed trie with an expected data // content map. -func checkTrieContents(t *testing.T, db *Database, root []byte, content map[string][]byte) { +func checkTrieContents(t *testing.T, db ethdb.Database, scheme string, root []byte, content map[string][]byte, rawTrie bool) { // Check root availability and trie contents - trie, err := NewSecure(common.BytesToHash(root), db) - if err != nil { - t.Fatalf("failed to create trie at %x: %v", root, err) - } - if err := checkTrieConsistency(db, common.BytesToHash(root)); err != nil { + ndb := newTestDatabase(db, scheme) + if err := checkTrieConsistency(db, scheme, common.BytesToHash(root), rawTrie); err != nil { t.Fatalf("inconsistent trie at %x: %v", root, err) } + type reader interface { + Get(key []byte) []byte + } + var r reader + if rawTrie { + trie, err := New(TrieID(common.BytesToHash(root)), ndb) + if err != nil { + t.Fatalf("failed to create trie at %x: %v", root, err) + } + r = trie + } else { + trie, err := NewSecure(TrieID(common.BytesToHash(root)), ndb) + if err != nil { + t.Fatalf("failed to create trie at %x: %v", root, err) + } + r = trie + } for key, val := range content { - if have := trie.Get([]byte(key)); !bytes.Equal(have, val) { + if have := r.Get([]byte(key)); !bytes.Equal(have, val) { t.Errorf("entry %x: content mismatch: have %x, want %x", key, have, val) } } } // checkTrieConsistency checks that all nodes in a trie are indeed present. -func checkTrieConsistency(db *Database, root common.Hash) error { - // Create and iterate a trie rooted in a subnode - trie, err := NewSecure(root, db) - if err != nil { - return nil // Consider a non existent state consistent +func checkTrieConsistency(db ethdb.Database, scheme string, root common.Hash, rawTrie bool) error { + ndb := newTestDatabase(db, scheme) + var it NodeIterator + if rawTrie { + trie, err := New(TrieID(root), ndb) + if err != nil { + return nil // Consider a non existent state consistent + } + it = trie.MustNodeIterator(nil) + } else { + trie, err := NewSecure(TrieID(root), ndb) + if err != nil { + return nil // Consider a non existent state consistent + } + it = trie.MustNodeIterator(nil) } - it := trie.NodeIterator(nil) for it.Next(true) { } return it.Error() } +// trieElement represents the element in the state trie(bytecode or trie node). +type trieElement struct { + path string + hash common.Hash + syncPath SyncPath +} + // Tests that an empty trie is not scheduled for syncing. func TestEmptySync(t *testing.T) { - dbA := NewDatabase(memorydb.New()) - dbB := NewDatabase(memorydb.New()) - emptyA, _ := New(common.Hash{}, dbA) - emptyB, _ := New(emptyRoot, dbB) - - for i, trie := range []*Trie{emptyA, emptyB} { - sync := NewSync(trie.Hash(), memorydb.New(), nil, NewSyncBloom(1, memorydb.New())) + dbA := newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme) + dbB := newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme) + dbC := newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.PathScheme) + dbD := newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.PathScheme) + + emptyA := NewEmpty(dbA) + emptyB, _ := New(TrieID(emptyRoot), dbB) + emptyC := NewEmpty(dbC) + emptyD, _ := New(TrieID(types.EmptyRootHash), dbD) + + for i, trie := range []*Trie{emptyA, emptyB, emptyC, emptyD} { + sync := NewSync(trie.Hash(), memorydb.New(), nil, NewSyncBloom(1, memorydb.New()), []*Database{dbA, dbB, dbC, dbD}[i].Scheme()) if nodes, paths, codes := sync.Missing(1); len(nodes) != 0 || len(paths) != 0 || len(codes) != 0 { t.Errorf("test %d: content requested for empty trie: %v, %v, %v", i, nodes, paths, codes) } @@ -104,49 +157,62 @@ func TestEmptySync(t *testing.T) { // Tests that given a root hash, a trie can sync iteratively on a single thread, // requesting retrieval tasks and returning all of them in one go. -func TestIterativeSyncIndividual(t *testing.T) { testIterativeSync(t, 1, false) } -func TestIterativeSyncBatched(t *testing.T) { testIterativeSync(t, 100, false) } -func TestIterativeSyncIndividualByPath(t *testing.T) { testIterativeSync(t, 1, true) } -func TestIterativeSyncBatchedByPath(t *testing.T) { testIterativeSync(t, 100, true) } +func TestIterativeSync(t *testing.T) { + testIterativeSync(t, 1, false, rawdb.HashScheme) + testIterativeSync(t, 100, false, rawdb.HashScheme) + testIterativeSync(t, 1, true, rawdb.HashScheme) + testIterativeSync(t, 100, true, rawdb.HashScheme) + testIterativeSync(t, 1, false, rawdb.PathScheme) + testIterativeSync(t, 100, false, rawdb.PathScheme) + testIterativeSync(t, 1, true, rawdb.PathScheme) + testIterativeSync(t, 100, true, rawdb.PathScheme) +} -func testIterativeSync(t *testing.T, count int, bypath bool) { +func testIterativeSync(t *testing.T, count int, bypath bool, scheme string) { // Create a random trie to copy - srcDb, srcTrie, srcData := makeTestTrie() + _, srcDb, srcTrie, srcData := makeTestTrie(scheme) // Create a destination trie and sync with the scheduler - diskdb := memorydb.New() - triedb := NewDatabase(diskdb) - sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb)) - - nodes, paths, codes := sched.Missing(count) - var ( - hashQueue []common.Hash - pathQueue []SyncPath - ) - if !bypath { - hashQueue = append(append(hashQueue[:0], nodes...), codes...) - } else { - hashQueue = append(hashQueue[:0], codes...) - pathQueue = append(pathQueue[:0], paths...) + diskdb := rawdb.NewMemoryDatabase() + sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb), srcDb.Scheme()) + + // The code requests are ignored here since there is no code + // at the testing trie. + paths, nodes, _ := sched.Missing(count) + var elements []trieElement + for i := 0; i < len(paths); i++ { + elements = append(elements, trieElement{ + path: paths[i], + hash: nodes[i], + syncPath: NewSyncPath([]byte(paths[i])), + }) } - for len(hashQueue)+len(pathQueue) > 0 { - results := make([]SyncResult, len(hashQueue)+len(pathQueue)) - for i, hash := range hashQueue { - data, err := srcDb.Node(hash) - if err != nil { - t.Fatalf("failed to retrieve node data for hash %x: %v", hash, err) + for len(elements) > 0 { + results := make([]NodeSyncResult, len(elements)) + if !bypath { + for i, element := range elements { + owner, inner := ResolvePath([]byte(element.path)) + reader, err := srcDb.Reader(srcTrie.Hash()) + if err != nil { + t.Fatalf("failed to create reader for trie %x: %v", srcTrie.Hash(), err) + } + data, err := reader.Node(owner, inner, element.hash) + if err != nil { + t.Fatalf("failed to retrieve node data for hash %x: %v", element.hash, err) + } + results[i] = NodeSyncResult{element.path, data} } - results[i] = SyncResult{hash, data} - } - for i, path := range pathQueue { - data, _, err := srcTrie.TryGetNode(path[0]) - if err != nil { - t.Fatalf("failed to retrieve node data for path %x: %v", path, err) + } else { + for i, element := range elements { + data, _, err := srcTrie.TryGetNode(element.syncPath[len(element.syncPath)-1]) + if err != nil { + t.Fatalf("failed to retrieve node data for path %x: %v", element.path, err) + } + results[i] = NodeSyncResult{element.path, data} } - results[len(hashQueue)+i] = SyncResult{crypto.Keccak256Hash(data), data} } for _, result := range results { - if err := sched.Process(result); err != nil { + if err := sched.ProcessNode(result); err != nil { t.Fatalf("failed to process result %v", err) } } @@ -156,44 +222,62 @@ func testIterativeSync(t *testing.T, count int, bypath bool) { } batch.Write() - nodes, paths, codes = sched.Missing(count) - if !bypath { - hashQueue = append(append(hashQueue[:0], nodes...), codes...) - } else { - hashQueue = append(hashQueue[:0], codes...) - pathQueue = append(pathQueue[:0], paths...) + paths, nodes, _ = sched.Missing(count) + elements = elements[:0] + for i := 0; i < len(paths); i++ { + elements = append(elements, trieElement{ + path: paths[i], + hash: nodes[i], + syncPath: NewSyncPath([]byte(paths[i])), + }) } } // Cross check that the two tries are in sync - checkTrieContents(t, triedb, srcTrie.Hash().Bytes(), srcData) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData, false) } // Tests that the trie scheduler can correctly reconstruct the state even if only // partial results are returned, and the others sent only later. func TestIterativeDelayedSync(t *testing.T) { - // Create a random trie to copy - srcDb, srcTrie, srcData := makeTestTrie() + testIterativeDelayedSync(t, rawdb.HashScheme) + testIterativeDelayedSync(t, rawdb.PathScheme) +} +func testIterativeDelayedSync(t *testing.T, scheme string) { + // Create a random trie to copy + _, srcDb, srcTrie, srcData := makeTestTrie(scheme) // Create a destination trie and sync with the scheduler - diskdb := memorydb.New() - triedb := NewDatabase(diskdb) - sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb)) - - nodes, _, codes := sched.Missing(10000) - queue := append(append([]common.Hash{}, nodes...), codes...) - - for len(queue) > 0 { + diskdb := rawdb.NewMemoryDatabase() + sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb), srcDb.Scheme()) + + // The code requests are ignored here since there is no code + // at the testing trie. + paths, nodes, _ := sched.Missing(10000) + var elements []trieElement + for i := 0; i < len(paths); i++ { + elements = append(elements, trieElement{ + path: paths[i], + hash: nodes[i], + syncPath: NewSyncPath([]byte(paths[i])), + }) + } + for len(elements) > 0 { // Sync only half of the scheduled nodes - results := make([]SyncResult, len(queue)/2+1) - for i, hash := range queue[:len(results)] { - data, err := srcDb.Node(hash) + results := make([]NodeSyncResult, len(elements)/2+1) + for i, element := range elements[:len(results)] { + owner, inner := ResolvePath([]byte(element.path)) + reader, err := srcDb.Reader(srcTrie.Hash()) + if err != nil { + t.Fatalf("failed to create reader for trie %x: %v", srcTrie.Hash(), err) + } + data, err := reader.Node(owner, inner, element.hash) if err != nil { - t.Fatalf("failed to retrieve node data for %x: %v", hash, err) + t.Fatalf("failed to retrieve node data for %x: %v", element.hash, err) } - results[i] = SyncResult{hash, data} + results[i] = NodeSyncResult{element.path, data} } for _, result := range results { - if err := sched.Process(result); err != nil { + if err := sched.ProcessNode(result); err != nil { t.Fatalf("failed to process result %v", err) } } @@ -203,46 +287,67 @@ func TestIterativeDelayedSync(t *testing.T) { } batch.Write() - nodes, _, codes = sched.Missing(10000) - queue = append(append(queue[len(results):], nodes...), codes...) + paths, nodes, _ = sched.Missing(10000) + elements = elements[len(results):] + for i := 0; i < len(paths); i++ { + elements = append(elements, trieElement{ + path: paths[i], + hash: nodes[i], + syncPath: NewSyncPath([]byte(paths[i])), + }) + } } // Cross check that the two tries are in sync - checkTrieContents(t, triedb, srcTrie.Hash().Bytes(), srcData) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData, false) } // Tests that given a root hash, a trie can sync iteratively on a single thread, // requesting retrieval tasks and returning all of them in one go, however in a // random order. -func TestIterativeRandomSyncIndividual(t *testing.T) { testIterativeRandomSync(t, 1) } -func TestIterativeRandomSyncBatched(t *testing.T) { testIterativeRandomSync(t, 100) } +func TestIterativeRandomSyncIndividual(t *testing.T) { + testIterativeRandomSync(t, 1, rawdb.HashScheme) + testIterativeRandomSync(t, 100, rawdb.HashScheme) + testIterativeRandomSync(t, 1, rawdb.PathScheme) + testIterativeRandomSync(t, 100, rawdb.PathScheme) +} -func testIterativeRandomSync(t *testing.T, count int) { +func testIterativeRandomSync(t *testing.T, count int, scheme string) { // Create a random trie to copy - srcDb, srcTrie, srcData := makeTestTrie() + _, srcDb, srcTrie, srcData := makeTestTrie(scheme) // Create a destination trie and sync with the scheduler - diskdb := memorydb.New() - triedb := NewDatabase(diskdb) - sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb)) - - queue := make(map[common.Hash]struct{}) - nodes, _, codes := sched.Missing(count) - for _, hash := range append(nodes, codes...) { - queue[hash] = struct{}{} + diskdb := rawdb.NewMemoryDatabase() + sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb), srcDb.Scheme()) + + // The code requests are ignored here since there is no code + // at the testing trie. + paths, nodes, _ := sched.Missing(count) + queue := make(map[string]trieElement) + for i, path := range paths { + queue[path] = trieElement{ + path: paths[i], + hash: nodes[i], + syncPath: NewSyncPath([]byte(paths[i])), + } } for len(queue) > 0 { // Fetch all the queued nodes in a random order - results := make([]SyncResult, 0, len(queue)) - for hash := range queue { - data, err := srcDb.Node(hash) + results := make([]NodeSyncResult, 0, len(queue)) + for path, element := range queue { + owner, inner := ResolvePath([]byte(element.path)) + reader, err := srcDb.Reader(srcTrie.Hash()) + if err != nil { + t.Fatalf("failed to create reader for trie %x: %v", srcTrie.Hash(), err) + } + data, err := reader.Node(owner, inner, element.hash) if err != nil { - t.Fatalf("failed to retrieve node data for %x: %v", hash, err) + t.Fatalf("failed to retrieve node data for %x: %v", element.hash, err) } - results = append(results, SyncResult{hash, data}) + results = append(results, NodeSyncResult{path, data}) } // Feed the retrieved results back and queue new tasks for _, result := range results { - if err := sched.Process(result); err != nil { + if err := sched.ProcessNode(result); err != nil { t.Fatalf("failed to process result %v", err) } } @@ -252,41 +357,60 @@ func testIterativeRandomSync(t *testing.T, count int) { } batch.Write() - queue = make(map[common.Hash]struct{}) - nodes, _, codes = sched.Missing(count) - for _, hash := range append(nodes, codes...) { - queue[hash] = struct{}{} + paths, nodes, _ = sched.Missing(count) + queue = make(map[string]trieElement) + for i, path := range paths { + queue[path] = trieElement{ + path: path, + hash: nodes[i], + syncPath: NewSyncPath([]byte(path)), + } } } // Cross check that the two tries are in sync - checkTrieContents(t, triedb, srcTrie.Hash().Bytes(), srcData) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData, false) } // Tests that the trie scheduler can correctly reconstruct the state even if only // partial results are returned (Even those randomly), others sent only later. func TestIterativeRandomDelayedSync(t *testing.T) { + testIterativeRandomDelayedSync(t, rawdb.HashScheme) + testIterativeRandomDelayedSync(t, rawdb.PathScheme) +} + +func testIterativeRandomDelayedSync(t *testing.T, scheme string) { // Create a random trie to copy - srcDb, srcTrie, srcData := makeTestTrie() + _, srcDb, srcTrie, srcData := makeTestTrie(scheme) // Create a destination trie and sync with the scheduler - diskdb := memorydb.New() - triedb := NewDatabase(diskdb) - sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb)) - - queue := make(map[common.Hash]struct{}) - nodes, _, codes := sched.Missing(10000) - for _, hash := range append(nodes, codes...) { - queue[hash] = struct{}{} + diskdb := rawdb.NewMemoryDatabase() + sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb), srcDb.Scheme()) + + // The code requests are ignored here since there is no code + // at the testing trie. + paths, nodes, _ := sched.Missing(10000) + queue := make(map[string]trieElement) + for i, path := range paths { + queue[path] = trieElement{ + path: path, + hash: nodes[i], + syncPath: NewSyncPath([]byte(path)), + } } for len(queue) > 0 { // Sync only half of the scheduled nodes, even those in random order - results := make([]SyncResult, 0, len(queue)/2+1) - for hash := range queue { - data, err := srcDb.Node(hash) + results := make([]NodeSyncResult, 0, len(queue)/2+1) + for path, element := range queue { + owner, inner := ResolvePath([]byte(element.path)) + reader, err := srcDb.Reader(srcTrie.Hash()) if err != nil { - t.Fatalf("failed to retrieve node data for %x: %v", hash, err) + t.Fatalf("failed to create reader for trie %x: %v", srcTrie.Hash(), err) } - results = append(results, SyncResult{hash, data}) + data, err := reader.Node(owner, inner, element.hash) + if err != nil { + t.Fatalf("failed to retrieve node data for %x: %v", element.hash, err) + } + results = append(results, NodeSyncResult{path, data}) if len(results) >= cap(results) { break @@ -294,7 +418,7 @@ func TestIterativeRandomDelayedSync(t *testing.T) { } // Feed the retrieved results back and queue new tasks for _, result := range results { - if err := sched.Process(result); err != nil { + if err := sched.ProcessNode(result); err != nil { t.Fatalf("failed to process result %v", err) } } @@ -304,48 +428,71 @@ func TestIterativeRandomDelayedSync(t *testing.T) { } batch.Write() for _, result := range results { - delete(queue, result.Hash) - } - nodes, _, codes = sched.Missing(10000) - for _, hash := range append(nodes, codes...) { - queue[hash] = struct{}{} + delete(queue, result.Path) + } + paths, nodes, _ = sched.Missing(10000) + for i, path := range paths { + queue[path] = trieElement{ + path: path, + hash: nodes[i], + syncPath: NewSyncPath([]byte(path)), + } } } // Cross check that the two tries are in sync - checkTrieContents(t, triedb, srcTrie.Hash().Bytes(), srcData) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData, false) } // Tests that a trie sync will not request nodes multiple times, even if they // have such references. func TestDuplicateAvoidanceSync(t *testing.T) { + testDuplicateAvoidanceSync(t, rawdb.HashScheme) + testDuplicateAvoidanceSync(t, rawdb.PathScheme) +} + +func testDuplicateAvoidanceSync(t *testing.T, scheme string) { // Create a random trie to copy - srcDb, srcTrie, srcData := makeTestTrie() + _, srcDb, srcTrie, srcData := makeTestTrie(scheme) // Create a destination trie and sync with the scheduler - diskdb := memorydb.New() - triedb := NewDatabase(diskdb) - sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb)) - - nodes, _, codes := sched.Missing(0) - queue := append(append([]common.Hash{}, nodes...), codes...) + diskdb := rawdb.NewMemoryDatabase() + sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb), srcDb.Scheme()) + + // The code requests are ignored here since there is no code + // at the testing trie. + paths, nodes, _ := sched.Missing(0) + var elements []trieElement + for i := 0; i < len(paths); i++ { + elements = append(elements, trieElement{ + path: paths[i], + hash: nodes[i], + syncPath: NewSyncPath([]byte(paths[i])), + }) + } requested := make(map[common.Hash]struct{}) - for len(queue) > 0 { - results := make([]SyncResult, len(queue)) - for i, hash := range queue { - data, err := srcDb.Node(hash) + for len(elements) > 0 { + results := make([]NodeSyncResult, len(elements)) + for i, element := range elements { + owner, inner := ResolvePath([]byte(element.path)) + reader, err := srcDb.Reader(srcTrie.Hash()) + if err != nil { + t.Fatalf("failed to create reader for trie %x: %v", srcTrie.Hash(), err) + } + + data, err := reader.Node(owner, inner, element.hash) if err != nil { - t.Fatalf("failed to retrieve node data for %x: %v", hash, err) + t.Fatalf("failed to retrieve node data for %x: %v", element.hash, err) } - if _, ok := requested[hash]; ok { - t.Errorf("hash %x already requested once", hash) + if _, ok := requested[element.hash]; ok { + t.Errorf("hash %x already requested once", element.hash) } - requested[hash] = struct{}{} + requested[element.hash] = struct{}{} - results[i] = SyncResult{hash, data} + results[i] = NodeSyncResult{element.path, data} } for _, result := range results { - if err := sched.Process(result); err != nil { + if err := sched.ProcessNode(result); err != nil { t.Fatalf("failed to process result %v", err) } } @@ -355,41 +502,70 @@ func TestDuplicateAvoidanceSync(t *testing.T) { } batch.Write() - nodes, _, codes = sched.Missing(0) - queue = append(append(queue[:0], nodes...), codes...) + paths, nodes, _ = sched.Missing(0) + elements = elements[:0] + for i := 0; i < len(paths); i++ { + elements = append(elements, trieElement{ + path: paths[i], + hash: nodes[i], + syncPath: NewSyncPath([]byte(paths[i])), + }) + } } // Cross check that the two tries are in sync - checkTrieContents(t, triedb, srcTrie.Hash().Bytes(), srcData) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData, false) } // Tests that at any point in time during a sync, only complete sub-tries are in // the database. -func TestIncompleteSync(t *testing.T) { +func TestIncompleteSyncHash(t *testing.T) { + testIncompleteSync(t, rawdb.HashScheme) + testIncompleteSync(t, rawdb.PathScheme) +} + +func testIncompleteSync(t *testing.T, scheme string) { + // Create a random trie to copy - srcDb, srcTrie, _ := makeTestTrie() + _, srcDb, srcTrie, _ := makeTestTrie(scheme) // Create a destination trie and sync with the scheduler - diskdb := memorydb.New() - triedb := NewDatabase(diskdb) - sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb)) - - var added []common.Hash + diskdb := rawdb.NewMemoryDatabase() + sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb), srcDb.Scheme()) - nodes, _, codes := sched.Missing(1) - queue := append(append([]common.Hash{}, nodes...), codes...) - for len(queue) > 0 { + // The code requests are ignored here since there is no code + // at the testing trie. + var ( + addedKeys []string + addedHashes []common.Hash + elements []trieElement + root = srcTrie.Hash() + ) + paths, nodes, _ := sched.Missing(1) + for i := 0; i < len(paths); i++ { + elements = append(elements, trieElement{ + path: paths[i], + hash: nodes[i], + syncPath: NewSyncPath([]byte(paths[i])), + }) + } + for len(elements) > 0 { // Fetch a batch of trie nodes - results := make([]SyncResult, len(queue)) - for i, hash := range queue { - data, err := srcDb.Node(hash) + results := make([]NodeSyncResult, len(elements)) + for i, element := range elements { + owner, inner := ResolvePath([]byte(element.path)) + reader, err := srcDb.Reader(srcTrie.Hash()) + if err != nil { + t.Fatalf("failed to create reader for trie %x: %v", srcTrie.Hash(), err) + } + data, err := reader.Node(owner, inner, element.hash) if err != nil { - t.Fatalf("failed to retrieve node data for %x: %v", hash, err) + t.Fatalf("failed to retrieve node data for %x: %v", element.hash, err) } - results[i] = SyncResult{hash, data} + results[i] = NodeSyncResult{element.path, data} } // Process each of the trie nodes for _, result := range results { - if err := sched.Process(result); err != nil { + if err := sched.ProcessNode(result); err != nil { t.Fatalf("failed to process result %v", err) } } @@ -398,56 +574,86 @@ func TestIncompleteSync(t *testing.T) { t.Fatalf("failed to commit data: %v", err) } batch.Write() + for _, result := range results { - added = append(added, result.Hash) - // Check that all known sub-tries in the synced trie are complete - if err := checkTrieConsistency(triedb, result.Hash); err != nil { - t.Fatalf("trie inconsistent: %v", err) + hash := crypto.Keccak256Hash(result.Data) + if hash != root { + addedKeys = append(addedKeys, result.Path) + addedHashes = append(addedHashes, crypto.Keccak256Hash(result.Data)) } } // Fetch the next batch to retrieve - nodes, _, codes = sched.Missing(1) - queue = append(append(queue[:0], nodes...), codes...) + paths, nodes, _ = sched.Missing(1) + elements = elements[:0] + for i := 0; i < len(paths); i++ { + elements = append(elements, trieElement{ + path: paths[i], + hash: nodes[i], + syncPath: NewSyncPath([]byte(paths[i])), + }) + } } // Sanity check that removing any node from the database is detected - for _, node := range added[1:] { - key := node.Bytes() - value, _ := diskdb.Get(key) - - diskdb.Delete(key) - if err := checkTrieConsistency(triedb, added[0]); err == nil { - t.Fatalf("trie inconsistency not caught, missing: %x", key) - } - diskdb.Put(key, value) + for i, path := range addedKeys { + owner, inner := ResolvePath([]byte(path)) + nodeHash := addedHashes[i] + value := rawdb.ReadTrieNode(diskdb, owner, inner, nodeHash, scheme) + rawdb.DeleteTrieNode(diskdb, owner, inner, nodeHash, scheme) + if err := checkTrieConsistency(diskdb, srcDb.Scheme(), root, false); err == nil { + t.Fatalf("trie inconsistency not caught, missing: %x", path) + } + rawdb.WriteTrieNode(diskdb, owner, inner, nodeHash, value, scheme) } } // Tests that trie nodes get scheduled lexicographically when having the same // depth. func TestSyncOrdering(t *testing.T) { + testSyncOrdering(t, rawdb.HashScheme) + testSyncOrdering(t, rawdb.PathScheme) +} + +func testSyncOrdering(t *testing.T, scheme string) { // Create a random trie to copy - srcDb, srcTrie, srcData := makeTestTrie() + _, srcDb, srcTrie, srcData := makeTestTrie(scheme) // Create a destination trie and sync with the scheduler, tracking the requests - diskdb := memorydb.New() - triedb := NewDatabase(diskdb) - sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb)) + diskdb := rawdb.NewMemoryDatabase() + sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb), srcDb.Scheme()) - nodes, paths, _ := sched.Missing(1) - queue := append([]common.Hash{}, nodes...) - reqs := append([]SyncPath{}, paths...) + // The code requests are ignored here since there is no code + // at the testing trie. + var ( + reqs []SyncPath + elements []trieElement + ) + paths, nodes, _ := sched.Missing(1) + for i := 0; i < len(paths); i++ { + elements = append(elements, trieElement{ + path: paths[i], + hash: nodes[i], + syncPath: NewSyncPath([]byte(paths[i])), + }) + reqs = append(reqs, NewSyncPath([]byte(paths[i]))) + } - for len(queue) > 0 { - results := make([]SyncResult, len(queue)) - for i, hash := range queue { - data, err := srcDb.Node(hash) + for len(elements) > 0 { + results := make([]NodeSyncResult, len(elements)) + for i, element := range elements { + owner, inner := ResolvePath([]byte(element.path)) + reader, err := srcDb.Reader(srcTrie.Hash()) if err != nil { - t.Fatalf("failed to retrieve node data for %x: %v", hash, err) + t.Fatalf("failed to create reader for trie %x: %v", srcTrie.Hash(), err) } - results[i] = SyncResult{hash, data} + + data, err := reader.Node(owner, inner, element.hash) + if err != nil { + t.Fatalf("failed to retrieve node data for %x: %v", element.hash, err) + } + results[i] = NodeSyncResult{element.path, data} } for _, result := range results { - if err := sched.Process(result); err != nil { + if err := sched.ProcessNode(result); err != nil { t.Fatalf("failed to process result %v", err) } } @@ -457,12 +663,19 @@ func TestSyncOrdering(t *testing.T) { } batch.Write() - nodes, paths, _ = sched.Missing(1) - queue = append(queue[:0], nodes...) - reqs = append(reqs, paths...) + paths, nodes, _ = sched.Missing(1) + elements = elements[:0] + for i := 0; i < len(paths); i++ { + elements = append(elements, trieElement{ + path: paths[i], + hash: nodes[i], + syncPath: NewSyncPath([]byte(paths[i])), + }) + reqs = append(reqs, NewSyncPath([]byte(paths[i]))) + } } // Cross check that the two tries are in sync - checkTrieContents(t, triedb, srcTrie.Hash().Bytes(), srcData) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData, false) // Check that the trie nodes have been requested path-ordered for i := 0; i < len(reqs)-1; i++ { @@ -476,3 +689,332 @@ func TestSyncOrdering(t *testing.T) { } } } +func syncWith(t *testing.T, root common.Hash, db ethdb.Database, srcDb *Database) { + syncWithHookWriter(t, root, db, srcDb, nil) +} + +func syncWithHookWriter(t *testing.T, root common.Hash, db ethdb.Database, srcDb *Database, hookWriter ethdb.KeyValueWriter) { + // Create a destination trie and sync with the scheduler + sched := NewSync(root, db, nil, NewSyncBloom(1, db), srcDb.Scheme()) + + // The code requests are ignored here since there is no code + // at the testing trie. + paths, nodes, _ := sched.Missing(0) + var elements []trieElement + for i := 0; i < len(paths); i++ { + elements = append(elements, trieElement{ + path: paths[i], + hash: nodes[i], + syncPath: NewSyncPath([]byte(paths[i])), + }) + } + for len(elements) > 0 { + results := make([]NodeSyncResult, len(elements)) + for i, element := range elements { + owner, inner := ResolvePath([]byte(element.path)) + reader, err := srcDb.Reader(root) + if err != nil { + t.Fatalf("failed to create reader for trie %x: %v", root, err) + } + data, err := reader.Node(owner, inner, element.hash) + if err != nil { + t.Fatalf("failed to retrieve node data for hash %x: %v", element.hash, err) + } + results[i] = NodeSyncResult{element.path, data} + } + for index, result := range results { + if err := sched.ProcessNode(result); err != nil { + t.Fatalf("failed to process result[%d][%v] data %v %v", index, []byte(result.Path), result.Data, err) + } + } + batch := db.NewBatch() + if err := sched.Commit(batch); err != nil { + t.Fatalf("failed to commit data: %v", err) + } + if hookWriter != nil { + batch.Replay(hookWriter) + } else { + batch.Write() + } + paths, nodes, _ = sched.Missing(0) + elements = elements[:0] + for i := 0; i < len(paths); i++ { + elements = append(elements, trieElement{ + path: paths[i], + hash: nodes[i], + syncPath: NewSyncPath([]byte(paths[i])), + }) + } + } +} + +// Tests that the syncing target is keeping moving which may overwrite the stale +// states synced in the last cycle. +func TestSyncMovingTarget(t *testing.T) { + testSyncMovingTarget(t, rawdb.HashScheme) + // testSyncMovingTarget(t, rawdb.PathScheme) +} + +func testSyncMovingTarget(t *testing.T, scheme string) { + // Create a random trie to copy + _, srcDb, srcTrie, srcData := makeTestTrie(scheme) + + // Create a destination trie and sync with the scheduler + diskdb := rawdb.NewMemoryDatabase() + syncWith(t, srcTrie.Hash(), diskdb, srcDb) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData, false) + + // Push more modifications into the src trie, to see if dest trie can still + // sync with it(overwrite stale states) + var ( + preRoot = srcTrie.Hash() + diff = make(map[string][]byte) + ) + for i := byte(0); i < 10; i++ { + key, val := randBytes(32), randBytes(32) + srcTrie.Update(key, val) + diff[string(key)] = val + } + root, nodes, _ := srcTrie.Commit(false) + if err := srcDb.Update(root, preRoot, 0, trienode.NewWithNodeSet(nodes), nil); err != nil { + panic(err) + } + if err := srcDb.Commit(root, false); err != nil { + panic(err) + } + preRoot = root + srcTrie, _ = NewSecure(TrieID(root), srcDb) + + syncWith(t, srcTrie.Hash(), diskdb, srcDb) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), diff, false) + + // Revert added modifications from the src trie, to see if dest trie can still + // sync with it(overwrite reverted states) + var reverted = make(map[string][]byte) + for k := range diff { + srcTrie.Delete([]byte(k)) + reverted[k] = nil + } + for k := range srcData { + val := randBytes(32) + srcTrie.Update([]byte(k), val) + reverted[k] = val + } + root, nodes, _ = srcTrie.Commit(false) + if err := srcDb.Update(root, preRoot, 0, trienode.NewWithNodeSet(nodes), nil); err != nil { + panic(err) + } + if err := srcDb.Commit(root, false); err != nil { + panic(err) + } + srcTrie, _ = NewSecure(TrieID(root), srcDb) + + syncWith(t, srcTrie.Hash(), diskdb, srcDb) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), reverted, false) +} + +// Tests if state syncer can correctly catch up the pivot move. +func TestPivotMove(t *testing.T) { + testPivotMove(t, rawdb.HashScheme, true) + testPivotMove(t, rawdb.HashScheme, false) + testPivotMove(t, rawdb.PathScheme, true) + testPivotMove(t, rawdb.PathScheme, false) +} + +func testPivotMove(t *testing.T, scheme string, tiny bool) { + var ( + srcDisk = rawdb.NewMemoryDatabase() + srcTrieDB = newTestDatabase(srcDisk, scheme) + srcTrie, _ = New(TrieID(types.EmptyRootHash), srcTrieDB) + + deleteFn = func(key []byte, tr *Trie, states map[string][]byte) { + tr.Delete(key) + delete(states, string(key)) + } + writeFn = func(key []byte, val []byte, tr *Trie, states map[string][]byte) { + if val == nil { + if tiny { + val = randBytes(4) + } else { + val = randBytes(32) + } + } + tr.Update(key, val) + states[string(key)] = common.CopyBytes(val) + } + copyStates = func(states map[string][]byte) map[string][]byte { + cpy := make(map[string][]byte) + for k, v := range states { + cpy[k] = v + } + return cpy + } + ) + stateA := make(map[string][]byte) + writeFn([]byte{0x01, 0x23}, nil, srcTrie, stateA) + writeFn([]byte{0x01, 0x24}, nil, srcTrie, stateA) + writeFn([]byte{0x12, 0x33}, nil, srcTrie, stateA) + writeFn([]byte{0x12, 0x34}, nil, srcTrie, stateA) + writeFn([]byte{0x02, 0x34}, nil, srcTrie, stateA) + writeFn([]byte{0x13, 0x44}, nil, srcTrie, stateA) + + rootA, nodesA, _ := srcTrie.Commit(false) + if err := srcTrieDB.Update(rootA, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodesA), nil); err != nil { + panic(err) + } + if err := srcTrieDB.Commit(rootA, false); err != nil { + panic(err) + } + // Create a destination trie and sync with the scheduler + destDisk := rawdb.NewMemoryDatabase() + syncWith(t, rootA, destDisk, srcTrieDB) + checkTrieContents(t, destDisk, scheme, srcTrie.Hash().Bytes(), stateA, true) + + // Delete element to collapse trie + stateB := copyStates(stateA) + srcTrie, _ = New(TrieID(rootA), srcTrieDB) + deleteFn([]byte{0x02, 0x34}, srcTrie, stateB) + deleteFn([]byte{0x13, 0x44}, srcTrie, stateB) + writeFn([]byte{0x01, 0x24}, nil, srcTrie, stateB) + + rootB, nodesB, _ := srcTrie.Commit(false) + if err := srcTrieDB.Update(rootB, rootA, 0, trienode.NewWithNodeSet(nodesB), nil); err != nil { + panic(err) + } + if err := srcTrieDB.Commit(rootB, false); err != nil { + panic(err) + } + syncWith(t, rootB, destDisk, srcTrieDB) + checkTrieContents(t, destDisk, scheme, srcTrie.Hash().Bytes(), stateB, true) + + // Add elements to expand trie + stateC := copyStates(stateB) + srcTrie, _ = New(TrieID(rootB), srcTrieDB) + + writeFn([]byte{0x01, 0x24}, stateA[string([]byte{0x01, 0x24})], srcTrie, stateC) + writeFn([]byte{0x02, 0x34}, nil, srcTrie, stateC) + writeFn([]byte{0x13, 0x44}, nil, srcTrie, stateC) + + rootC, nodesC, _ := srcTrie.Commit(false) + if err := srcTrieDB.Update(rootC, rootB, 0, trienode.NewWithNodeSet(nodesC), nil); err != nil { + panic(err) + } + if err := srcTrieDB.Commit(rootC, false); err != nil { + panic(err) + } + syncWith(t, rootC, destDisk, srcTrieDB) + checkTrieContents(t, destDisk, scheme, srcTrie.Hash().Bytes(), stateC, true) +} + +func TestSyncAbort(t *testing.T) { + testSyncAbort(t, rawdb.PathScheme) + testSyncAbort(t, rawdb.HashScheme) +} + +type hookWriter struct { + db ethdb.KeyValueStore + filter func(key []byte, value []byte) bool +} + +// Put inserts the given value into the key-value data store. +func (w *hookWriter) Put(key []byte, value []byte) error { + if w.filter != nil && w.filter(key, value) { + return nil + } + return w.db.Put(key, value) +} + +// Delete removes the key from the key-value data store. +func (w *hookWriter) Delete(key []byte) error { + return w.db.Delete(key) +} + +func testSyncAbort(t *testing.T, scheme string) { + var ( + srcDisk = rawdb.NewMemoryDatabase() + srcTrieDB = newTestDatabase(srcDisk, scheme) + srcTrie, _ = New(TrieID(types.EmptyRootHash), srcTrieDB) + + deleteFn = func(key []byte, tr *Trie, states map[string][]byte) { + tr.Delete(key) + delete(states, string(key)) + } + writeFn = func(key []byte, val []byte, tr *Trie, states map[string][]byte) { + if val == nil { + val = randBytes(32) + } + tr.Update(key, val) + states[string(key)] = common.CopyBytes(val) + } + copyStates = func(states map[string][]byte) map[string][]byte { + cpy := make(map[string][]byte) + for k, v := range states { + cpy[k] = v + } + return cpy + } + ) + var ( + stateA = make(map[string][]byte) + key = randBytes(32) + val = randBytes(32) + ) + for i := 0; i < 256; i++ { + writeFn(randBytes(32), nil, srcTrie, stateA) + } + writeFn(key, val, srcTrie, stateA) + + rootA, nodesA, _ := srcTrie.Commit(false) + if err := srcTrieDB.Update(rootA, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodesA), nil); err != nil { + panic(err) + } + if err := srcTrieDB.Commit(rootA, false); err != nil { + panic(err) + } + // Create a destination trie and sync with the scheduler + destDisk := rawdb.NewMemoryDatabase() + syncWith(t, rootA, destDisk, srcTrieDB) + checkTrieContents(t, destDisk, scheme, srcTrie.Hash().Bytes(), stateA, true) + + // Delete the element from the trie + stateB := copyStates(stateA) + srcTrie, _ = New(TrieID(rootA), srcTrieDB) + deleteFn(key, srcTrie, stateB) + + rootB, nodesB, _ := srcTrie.Commit(false) + if err := srcTrieDB.Update(rootB, rootA, 0, trienode.NewWithNodeSet(nodesB), nil); err != nil { + panic(err) + } + if err := srcTrieDB.Commit(rootB, false); err != nil { + panic(err) + } + + // Sync the new state, but never persist the new root node. Before the + // fix #28595, the original old root node will still be left in database + // which breaks the next healing cycle. + syncWithHookWriter(t, rootB, destDisk, srcTrieDB, &hookWriter{db: destDisk, filter: func(key []byte, value []byte) bool { + if scheme == rawdb.HashScheme { + return false + } + if len(value) == 0 { + return false + } + ok, path := rawdb.ResolveAccountTrieNodeKey(key) + return ok && len(path) == 0 + }}) + + // Add elements to expand trie + stateC := copyStates(stateB) + srcTrie, _ = New(TrieID(rootB), srcTrieDB) + + writeFn(key, val, srcTrie, stateC) + rootC, nodesC, _ := srcTrie.Commit(false) + if err := srcTrieDB.Update(rootC, rootB, 0, trienode.NewWithNodeSet(nodesC), nil); err != nil { + panic(err) + } + if err := srcTrieDB.Commit(rootC, false); err != nil { + panic(err) + } + syncWith(t, rootC, destDisk, srcTrieDB) + checkTrieContents(t, destDisk, scheme, srcTrie.Hash().Bytes(), stateC, true) +} diff --git a/trie/testutil/utils.go b/trie/testutil/utils.go new file mode 100644 index 0000000000..a75d0431b0 --- /dev/null +++ b/trie/testutil/utils.go @@ -0,0 +1,61 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package testutil + +import ( + crand "crypto/rand" + "encoding/binary" + mrand "math/rand" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/trie/trienode" +) + +// Prng is a pseudo random number generator seeded by strong randomness. +// The randomness is printed on startup in order to make failures reproducible. +var prng = initRand() + +func initRand() *mrand.Rand { + var seed [8]byte + crand.Read(seed[:]) + rnd := mrand.New(mrand.NewSource(int64(binary.LittleEndian.Uint64(seed[:])))) + return rnd +} + +// RandBytes generates a random byte slice with specified length. +func RandBytes(n int) []byte { + r := make([]byte, n) + prng.Read(r) + return r +} + +// RandomHash generates a random blob of data and returns it as a hash. +func RandomHash() common.Hash { + return common.BytesToHash(RandBytes(common.HashLength)) +} + +// RandomAddress generates a random blob of data and returns it as an address. +func RandomAddress() common.Address { + return common.BytesToAddress(RandBytes(common.AddressLength)) +} + +// RandomNode generates a random node. +func RandomNode() *trienode.Node { + val := RandBytes(100) + return trienode.New(crypto.Keccak256Hash(val), val) +} diff --git a/trie/tracer.go b/trie/tracer.go new file mode 100644 index 0000000000..7ccd49f6b1 --- /dev/null +++ b/trie/tracer.go @@ -0,0 +1,127 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +// tracer tracks the changes of trie nodes. During the trie operations, +// some nodes can be deleted from the trie, while these deleted nodes +// won't be captured by trie.Hasher or trie.Commiter. Thus, these deleted +// nodes won't be removed from the disk at all. Tracer is an auxiliary tool +// used to track all insert and delete operations on trie and capture all +// deleted nodes eventually. +// +// The changed nodes can be mainly divided into two categories: the leaf +// nodes and intermediate nodes. The fromer is inserted/deleted by callers +// white the latter is iserted/deleted in order to follow the rule of trie. +// This tool can track all of them no matter is embedded in its +// parent or nit, but the valueNode is never tracked. +// +// Besides, it's also used for recording the original value of the nodes +// when they are resolved from the disk. The pre-value of the nodes will +// be used to construct reverse-diffs in the future. +// +// Note tracer is not thread-safe, callers should be responsible for handling +// the concurrency issues by themselves. +type tracer struct { + inserts map[string]struct{} + deletes map[string]struct{} + accessList map[string][]byte +} + +// newTracer initlializes tride node diff tracer. +func newTracer() *tracer { + return &tracer{ + inserts: make(map[string]struct{}), + deletes: make(map[string]struct{}), + accessList: make(map[string][]byte), + } +} + +// onRead tracks the newly loaded trie node and caches the rlp-encoded blob internally. +// Don't change the value outside of function since it's not deep-copied. +func (t *tracer) onRead(path []byte, val []byte) { + t.accessList[string(path)] = val +} + +// onInsert tracks the newly inserted trie node. If it's already +// in the delete set(resurrected node), then just wipe it from +// the deletion set as it's untouched. +func (t *tracer) onInsert(path []byte) { + // If the path is in the delete set, then it's a resurrected node, then wipe it. + if _, present := t.deletes[string(path)]; present { + delete(t.deletes, string(path)) + return + } + t.inserts[string(path)] = struct{}{} +} + +// OnDelete tracks the newly deleted trie node. If it's already +// in the addition set, then just wipe it from the addtion set +// as it's untouched. +func (t *tracer) onDelete(path []byte) { + if _, present := t.inserts[string(path)]; present { + delete(t.inserts, string(path)) + return + } + t.deletes[string(path)] = struct{}{} +} + +// reset clears the content tracked by tracer. +func (t *tracer) reset() { + t.inserts = make(map[string]struct{}) + t.deletes = make(map[string]struct{}) + t.accessList = make(map[string][]byte) +} + +// copy returns a deep copied tracer instance. +func (t *tracer) copy() *tracer { + var ( + inserts = make(map[string]struct{}) + deletes = make(map[string]struct{}) + accessList = make(map[string][]byte) + ) + for key := range t.inserts { + inserts[key] = struct{}{} + } + for key := range t.deletes { + deletes[key] = struct{}{} + } + for key, val := range t.accessList { + accessList[key] = val + } + return &tracer{ + inserts: inserts, + deletes: deletes, + accessList: accessList, + } +} + +// deletedNodes returns a list of node paths which are deleted from the trie. +func (t *tracer) deletedNodes() []string { + var paths []string + for path := range t.deletes { + // It's possible a few deleted nodes were embedded + // in their parent before, the deletions can be no + // effect by deleting nothing, filter them out. + _, ok := t.accessList[path] + + if !ok { + continue + } + paths = append(paths, path) + } + return paths +} diff --git a/trie/tracer_test.go b/trie/tracer_test.go new file mode 100644 index 0000000000..ca11e2ea92 --- /dev/null +++ b/trie/tracer_test.go @@ -0,0 +1,378 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "bytes" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/trie/trienode" +) + +var ( + tiny = []struct{ k, v string }{ + {"k1", "v1"}, + {"k2", "v2"}, + {"k3", "v3"}, + } + nonAligned = []struct{ k, v string }{ + {"do", "verb"}, + {"ether", "wookiedoo"}, + {"horse", "stallion"}, + {"shaman", "horse"}, + {"doge", "coin"}, + {"dog", "puppy"}, + {"somethingveryoddindeedthis is", "myothernodedata"}, + } + standard = []struct{ k, v string }{ + {string(randBytes(32)), "verb"}, + {string(randBytes(32)), "wookiedoo"}, + {string(randBytes(32)), "stallion"}, + {string(randBytes(32)), "horse"}, + {string(randBytes(32)), "coin"}, + {string(randBytes(32)), "puppy"}, + {string(randBytes(32)), "myothernodedata"}, + } +) + +func TestTrieTracer(t *testing.T) { + testTrieTracer(t, tiny) + testTrieTracer(t, nonAligned) + testTrieTracer(t, standard) +} + +// Tests if the trie diffs are tracked correctly. Tracer should capture +// all non-leaf dirty nodes, no matter the node is embedded or not. +func testTrieTracer(t *testing.T, vals []struct{ k, v string }) { + db := NewDatabase(rawdb.NewMemoryDatabase(), nil) + trie := NewEmpty(db) + + // Determine all new nodes are tracked + for _, val := range vals { + trie.Update([]byte(val.k), []byte(val.v)) + } + insertSet := copySet(trie.tracer.inserts) // copy before commit + deleteSet := copySet(trie.tracer.deletes) // copy before commit + root, nodes, _ := trie.Commit(false) + db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) + + seen := setKeys(iterNodes(db, root)) + if !compareSet(insertSet, seen) { + t.Fatal("Unexpected insertion set") + } + if !compareSet(deleteSet, nil) { + t.Fatal("Unexpected deletion set") + } + + // Determine all deletions are tracked + trie, _ = New(TrieID(root), db) + for _, val := range vals { + trie.Delete([]byte(val.k)) + } + insertSet, deleteSet = copySet(trie.tracer.inserts), copySet(trie.tracer.deletes) + if !compareSet(insertSet, nil) { + t.Fatal("Unexpected insertion set") + } + if !compareSet(deleteSet, seen) { + t.Fatal("Unexpected deletion set") + } +} + +// Test that after inserting a new batch of nodes and deleting them immediately, +// the trie tracer should be cleared normally as no operation happened. +func TestTrieTracerNoop(t *testing.T) { + testTrieTracerNoop(t, tiny) + testTrieTracerNoop(t, nonAligned) + testTrieTracerNoop(t, standard) +} + +func testTrieTracerNoop(t *testing.T, vals []struct{ k, v string }) { + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) + for _, val := range vals { + trie.Update([]byte(val.k), []byte(val.v)) + } + for _, val := range vals { + trie.Delete([]byte(val.k)) + } + if len(trie.tracer.inserts) != 0 { + t.Fatal("Unexpected insertion set") + } + if len(trie.tracer.deletes) != 0 { + t.Fatal("Unexpected deletion set") + } +} + +// Tests if the accessList is correctly tracked. +func TestAccessList(t *testing.T) { + testAccessList(t, tiny) + testAccessList(t, nonAligned) + testAccessList(t, standard) +} + +func testAccessList(t *testing.T, vals []struct{ k, v string }) { + var ( + db = NewDatabase(rawdb.NewMemoryDatabase(), nil) + trie = NewEmpty(db) + orig = trie.Copy() + ) + // Create trie from scratch + for _, val := range vals { + trie.Update([]byte(val.k), []byte(val.v)) + } + root, nodes, _ := trie.Commit(false) + db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) + + trie, _ = New(TrieID(root), db) + if err := verifyAccessList(orig, trie, nodes); err != nil { + t.Fatalf("Invalid accessList %v", err) + } + + // Update trie + parent := root + trie, _ = New(TrieID(root), db) + orig = trie.Copy() + for _, val := range vals { + trie.Update([]byte(val.k), randBytes(32)) + } + root, nodes, _ = trie.Commit(false) + db.Update(root, parent, 0, trienode.NewWithNodeSet(nodes), nil) + + trie, _ = New(TrieID(root), db) + if err := verifyAccessList(orig, trie, nodes); err != nil { + t.Fatalf("Invalid accessList %v", err) + } + + // Add more new nodes + parent = root + trie, _ = New(TrieID(root), db) + orig = trie.Copy() + var keys []string + for i := 0; i < 30; i++ { + key := randBytes(32) + keys = append(keys, string(key)) + trie.Update(key, randBytes(32)) + } + root, nodes, _ = trie.Commit(false) + db.Update(root, parent, 0, trienode.NewWithNodeSet(nodes), nil) + + trie, _ = New(TrieID(root), db) + if err := verifyAccessList(orig, trie, nodes); err != nil { + t.Fatalf("Invalid accessList %v", err) + } + + // Partial deletions + parent = root + trie, _ = New(TrieID(root), db) + orig = trie.Copy() + for _, key := range keys { + trie.Update([]byte(key), nil) + } + root, nodes, _ = trie.Commit(false) + db.Update(root, parent, 0, trienode.NewWithNodeSet(nodes), nil) + + trie, _ = New(TrieID(root), db) + if err := verifyAccessList(orig, trie, nodes); err != nil { + t.Fatalf("Invalid accessList %v", err) + } + + // Delete all + parent = root + trie, _ = New(TrieID(root), db) + orig = trie.Copy() + for _, val := range vals { + trie.Update([]byte(val.k), nil) + } + root, nodes, _ = trie.Commit(false) + db.Update(root, parent, 0, trienode.NewWithNodeSet(nodes), nil) + + trie, _ = New(TrieID(root), db) + if err := verifyAccessList(orig, trie, nodes); err != nil { + t.Fatalf("Invalid accessList %v", err) + } +} + +// Tests origin values won't be tracked in Iterator or Prover +func TestAccessListLeak(t *testing.T) { + var ( + db = NewDatabase(rawdb.NewMemoryDatabase(), nil) + trie = NewEmpty(db) + ) + // Create trie from scratch + for _, val := range standard { + trie.Update([]byte(val.k), []byte(val.v)) + } + root, nodes, _ := trie.Commit(false) + db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) + + var cases = []struct { + op func(tr *Trie) + }{ + { + func(tr *Trie) { + it := tr.MustNodeIterator(nil) + for it.Next(true) { + } + }, + }, + { + func(tr *Trie) { + it := NewIterator(tr.MustNodeIterator(nil)) + for it.Next() { + } + }, + }, + { + func(tr *Trie) { + for _, val := range standard { + tr.Prove([]byte(val.k), 0, rawdb.NewMemoryDatabase()) + } + }, + }, + } + for _, c := range cases { + trie, _ = New(TrieID(root), db) + n1 := len(trie.tracer.accessList) + c.op(trie) + n2 := len(trie.tracer.accessList) + + if n1 != n2 { + t.Fatalf("AccessList is leaked, prev %d after %d", n1, n2) + } + } +} + +// Tests whether the original tree node is correctly deleted after being embedded +// in its parent due to the smaller size of the original tree node. +func TestTinyTree(t *testing.T) { + var ( + db = NewDatabase(rawdb.NewMemoryDatabase(), nil) + trie = NewEmpty(db) + ) + for _, val := range tiny { + trie.Update([]byte(val.k), randBytes(32)) + } + root, set, _ := trie.Commit(false) + db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(set), nil) + + parent := root + trie, _ = New(TrieID(root), db) + orig := trie.Copy() + for _, val := range tiny { + trie.Update([]byte(val.k), []byte(val.v)) + } + root, set, _ = trie.Commit(false) + db.Update(root, parent, 0, trienode.NewWithNodeSet(set), nil) + + trie, _ = New(TrieID(root), db) + if err := verifyAccessList(orig, trie, set); err != nil { + t.Fatalf("Invalid accessList %v", err) + } +} + +func compareSet(setA, setB map[string]struct{}) bool { + if len(setA) != len(setB) { + return false + } + for key := range setA { + if _, ok := setB[key]; !ok { + return false + } + } + return true +} + +func forNodes(tr *Trie) map[string][]byte { + var ( + it = tr.MustNodeIterator(nil) + nodes = make(map[string][]byte) + ) + for it.Next(true) { + if it.Leaf() { + continue + } + blob := it.NodeBlob() + nodes[string(it.Path())] = common.CopyBytes(blob) + } + return nodes +} + +func iterNodes(db *Database, root common.Hash) map[string][]byte { + tr, _ := New(TrieID(root), db) + return forNodes(tr) +} + +func forHashedNodes(tr *Trie) map[string][]byte { + var ( + it = tr.MustNodeIterator(nil) + nodes = make(map[string][]byte) + ) + for it.Next(true) { + if it.Hash() == (common.Hash{}) { + continue + } + blob := it.NodeBlob() + nodes[string(it.Path())] = common.CopyBytes(blob) + } + return nodes +} + +// diffTries return the diff and shared nodes between 2 tries +func diffTries(trieA, trieB *Trie) (map[string][]byte, map[string][]byte, map[string][]byte) { + var ( + nodesA = forHashedNodes(trieA) + nodesB = forHashedNodes(trieB) + inA = make(map[string][]byte) // hashed nodes in trie a but not b + inB = make(map[string][]byte) // hashed nodes in trie b but not a + both = make(map[string][]byte) // hashed nodes in both tries but different value + ) + for path, blobA := range nodesA { + if blobB, ok := nodesB[path]; ok { + if bytes.Equal(blobA, blobB) { + continue + } + both[path] = blobA + continue + } + inA[path] = blobA + } + for path, blobB := range nodesB { + if _, ok := nodesA[path]; ok { + continue + } + inB[path] = blobB + } + return inA, inB, both +} + +func setKeys(set map[string][]byte) map[string]struct{} { + keys := make(map[string]struct{}) + for k := range set { + keys[k] = struct{}{} + } + return keys +} + +func copySet(set map[string]struct{}) map[string]struct{} { + copied := make(map[string]struct{}) + for k := range set { + copied[k] = struct{}{} + } + return copied +} diff --git a/trie/trie.go b/trie/trie.go index 13343112b8..05d136fb1b 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -21,13 +21,13 @@ import ( "bytes" "errors" "fmt" - "sync" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/trienode" ) var ( @@ -38,34 +38,30 @@ var ( emptyState = crypto.Keccak256Hash(nil) ) -// LeafCallback is a callback type invoked when a trie operation reaches a leaf -// node. -// -// The paths is a path tuple identifying a particular trie node either in a single -// trie (account) or a layered trie (account -> storage). Each path in the tuple -// is in the raw format(32 bytes). -// -// The hexpath is a composite hexary path identifying the trie node. All the key -// bytes are converted to the hexary nibbles and composited with the parent path -// if the trie node is in a layered trie. -// -// It's used by state sync and commit to allow handling external references -// between account and storage tries. And also it's used in the state healing -// for extracting the raw states(leaf nodes) with corresponding paths. -type LeafCallback func(paths [][]byte, hexpath []byte, leaf []byte, parent common.Hash) error - -// Trie is a Merkle Patricia Trie. -// The zero value is an empty trie with no database. -// Use New to create a trie that sits on top of a database. -// -// Trie is not safe for concurrent use. +// Trie is a Merkle Patricia Trie. Use New to create a trie that sits on +// top of Database. Whenever tries performance a commit operation, the generated nodes will be +// gathered and returned in a set. Once a trie is committed, it's node usable anymore. Callers have to +// re-create the trie with new root based on the updated trie database. type Trie struct { - db *Database - root node + owner common.Hash + root node + + // Flag whether the commit operation is already performed. If so the + // trie is not usable(latest states is invisible). + committed bool + // Keep track of the number leafs which have been inserted since the last // hashing operation. This number will not directly map to the number of // actually unhashed nodes unhashed int + + // db is the handler trie can retrieve nodes from. It's + // only for reading purpose and not available for writing. + reader *trieReader + // tracer is the tool to track the trie changes. + // It will be reset after each commit operation. + + tracer *tracer } // newFlag returns the cache flag value for a newly created node. @@ -73,21 +69,24 @@ func (t *Trie) newFlag() nodeFlag { return nodeFlag{dirty: true} } -// New creates a trie with an existing root node from db. -// -// If root is the zero hash or the sha3 hash of an empty string, the -// trie is initially empty and does not require a database. Otherwise, -// New will panic if db is nil and returns a MissingNodeError if root does -// not exist in the database. Accessing the trie loads nodes from db on demand. -func New(root common.Hash, db *Database) (*Trie, error) { - if db == nil { - panic("trie.New called without a database") +// New creates the trie instance with provided trie id and the read-only +// database. The state specified by trie id must be available, otherwise +// an error will be returned. The trie root specified by trie id can be +// zero hash or the sha3 hash of an empty string, then trie is initially +// empty, otherwise, the root node must be present in database or returns +// a MissingNodeError if not. +func New(id *ID, db *Database) (*Trie, error) { + reader, err := newTrieReader(id.StateRoot, id.Owner, db) + if err != nil { + return nil, err } trie := &Trie{ - db: db, + owner: id.Owner, + reader: reader, + tracer: newTracer(), } - if root != (common.Hash{}) && root != emptyRoot { - rootnode, err := trie.resolveHash(root[:], nil) + if id.Root != (common.Hash{}) && id.Root != emptyRoot { + rootnode, err := trie.resolveAndTrack(id.Root[:], nil) if err != nil { return nil, err } @@ -96,10 +95,30 @@ func New(root common.Hash, db *Database) (*Trie, error) { return trie, nil } +// NewEmpty is a shortcut to create empty tree. It's mostly used in tests. +func NewEmpty(db *Database) *Trie { + tr, _ := New(TrieID(common.Hash{}), db) + return tr +} + +// MustNodeIterator is a wrapper of NodeIterator and will omit any encountered +// error but just print out an error message. +func (t *Trie) MustNodeIterator(start []byte) NodeIterator { + it, err := t.NodeIterator(start) + if err != nil { + log.Error("Unhandled trie error in Trie.NodeIterator", "err", err) + } + return it +} + // NodeIterator returns an iterator that returns nodes of the trie. Iteration starts at // the key after the given start key. -func (t *Trie) NodeIterator(start []byte) NodeIterator { - return newNodeIterator(t, start) +func (t *Trie) NodeIterator(start []byte) (NodeIterator, error) { + // Short circuit if the trie is already committed and not usable. + if t.committed { + return nil, ErrCommitted + } + return newNodeIterator(t, start), nil } // Get returns the value for key stored in the trie. @@ -116,6 +135,10 @@ func (t *Trie) Get(key []byte) []byte { // The value bytes must not be modified by the caller. // If a node was not found in the database, a MissingNodeError is returned. func (t *Trie) TryGet(key []byte) ([]byte, error) { + // Short circuit if the trie is already committed and not usable. + if t.committed { + return nil, ErrCommitted + } value, newroot, didResolve, err := t.tryGet(t.root, keybytesToHex(key), 0) if err == nil && didResolve { t.root = newroot @@ -148,7 +171,7 @@ func (t *Trie) tryGet(origNode node, key []byte, pos int) (value []byte, newnode } return value, n, didResolve, err case hashNode: - child, err := t.resolveHash(n, key[:pos]) + child, err := t.resolveAndTrack(n, key[:pos]) if err != nil { return nil, n, true, err } @@ -162,6 +185,10 @@ func (t *Trie) tryGet(origNode node, key []byte, pos int) (value []byte, newnode // TryGetNode attempts to retrieve a trie node by compact-encoded path. It is not // possible to use keybyte-encoding as the path might contain odd nibbles. func (t *Trie) TryGetNode(path []byte) ([]byte, int, error) { + // Short circuit if the trie is already committed and not usable. + if t.committed { + return nil, 0, ErrCommitted + } item, newroot, resolved, err := t.tryGetNode(t.root, compactToHex(path), 0) if err != nil { return nil, resolved, err @@ -194,7 +221,8 @@ func (t *Trie) tryGetNode(origNode node, path []byte, pos int) (item []byte, new if hash == nil { return nil, origNode, 0, errors.New("non-consensus node") } - blob, err := t.db.Node(common.BytesToHash(hash)) + blob, err := t.reader.node(path, common.BytesToHash(hash)) + return blob, origNode, 1, err } // Path still needs to be traversed, descend into children @@ -224,7 +252,7 @@ func (t *Trie) tryGetNode(origNode node, path []byte, pos int) (item []byte, new return item, n, resolved, err case hashNode: - child, err := t.resolveHash(n, path[:pos]) + child, err := t.resolveAndTrack(n, path[:pos]) if err != nil { return nil, n, 1, err } @@ -265,6 +293,10 @@ func (t *Trie) TryUpdateAccount(key []byte, acc *types.StateAccount) error { // // If a node was not found in the database, a MissingNodeError is returned. func (t *Trie) TryUpdate(key, value []byte) error { + // Short circuit if the trie is already committed and not usable. + if t.committed { + return ErrCommitted + } t.unhashed++ k := keybytesToHex(key) if len(value) != 0 { @@ -317,6 +349,11 @@ func (t *Trie) insert(n node, prefix, key []byte, value node) (bool, node, error if matchlen == 0 { return true, branch, nil } + + // New branch node is created as a child of the original short node. + // Track the newly inserted node in the tracer. The node identifier + // passed is the path from the root node. + t.tracer.onInsert(append(prefix, key[:matchlen]...)) // Otherwise, replace it with a short node leading up to the branch. return true, &shortNode{key[:matchlen], branch, t.newFlag()}, nil @@ -331,13 +368,17 @@ func (t *Trie) insert(n node, prefix, key []byte, value node) (bool, node, error return true, n, nil case nil: + // New short node is created and track it in the tracer. The node identifier + // passed is the path from the root node. Note the valueNode won't be tracked + // since it's always embedded in its parent. + t.tracer.onInsert(prefix) return true, &shortNode{key, value, t.newFlag()}, nil case hashNode: // We've hit a part of the trie that isn't loaded yet. Load // the node and insert into it. This leaves all child nodes on // the path to the value in the trie. - rn, err := t.resolveHash(n, prefix) + rn, err := t.resolveAndTrack(n, prefix) if err != nil { return false, nil, err } @@ -362,6 +403,10 @@ func (t *Trie) Delete(key []byte) { // TryDelete removes any existing value for key from the trie. // If a node was not found in the database, a MissingNodeError is returned. func (t *Trie) TryDelete(key []byte) error { + // Short circuit if the trie is already committed and not usable. + if t.committed { + return ErrCommitted + } t.unhashed++ k := keybytesToHex(key) _, n, err := t.delete(t.root, nil, k) @@ -372,6 +417,33 @@ func (t *Trie) TryDelete(key []byte) error { return nil } +// traverse method mostly for learning and testing purposes. +func (t *Trie) traverse(n node, prefix []byte) { + switch n := n.(type) { + case *shortNode: + // If it's a short node, print the prefix and key + newPrefix := append(prefix, n.Key...) + fmt.Printf("[Traverse] Short Node: %+v\n", n) + t.traverse(n.Val, newPrefix) + case *fullNode: + // If it's a full node, print the prefix and each child + + for i, _ := range n.Children { + if n.Children[i] != nil { + fmt.Printf("[Traverse] Full Node: %+v\n", n.Children[i]) + newPrefix := append(prefix, byte(i)) + t.traverse(n.Children[i], newPrefix) + } + } + case valueNode: + // If it's a value node, print the prefix and value + fmt.Printf("Value Node: %s -> %s\n", string(prefix), string(n)) + case hashNode: + // If it's a hash node, resolve it and traverse the result + fmt.Printf("Hash Node: %s -> %s \n", string(prefix), string(n)) + } +} + // delete returns the new root of the trie with key deleted. // It reduces the trie to minimal form by simplifying // nodes on the way up after deleting recursively. @@ -383,6 +455,10 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) { return false, n, nil // don't replace n on mismatch } if matchlen == len(key) { + // It means that matched short node is deleted entirely, and track + // it in the deletion set. The same the valueNode doesn't need + // to be tracked at all since it's always be embedded in its parent. + t.tracer.onDelete(prefix) return true, nil, nil // remove n entirely for whole matches } // The key is longer than n.Key. Remove the remaining suffix @@ -395,6 +471,10 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) { } switch child := child.(type) { case *shortNode: + // The child shortNode is merged into its parent, track + // is deleted as well. + t.tracer.onDelete(append(prefix, n.Key...)) + // Deleting from the subtrie reduced it to another // short node. Merge the nodes to avoid creating a // shortNode{..., shortNode{...}}. Use concat (which @@ -451,11 +531,16 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) { // shortNode{..., shortNode{...}}. Since the entry // might not be loaded yet, resolve it just for this // check. - cnode, err := t.resolve(n.Children[pos], prefix) + cnode, err := t.resolve(n.Children[pos], append(prefix, byte(pos))) // Prefix mostly for tracking path. if err != nil { return false, nil, err } if cnode, ok := cnode.(*shortNode); ok { + // Replace the entire full node with the short node. + // Mark the original short nodes as delete since the value + // is embedded in its parent now. + t.tracer.onDelete(append(prefix, byte(pos))) + k := append([]byte{byte(pos)}, cnode.Key...) return true, &shortNode{k, cnode.Val, t.newFlag()}, nil } @@ -477,7 +562,7 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) { // We've hit a part of the trie that isn't loaded yet. Load // the node and delete from it. This leaves all child nodes on // the path to the value in the trie. - rn, err := t.resolveHash(n, prefix) + rn, err := t.resolveAndTrack(n, prefix) if err != nil { return false, nil, err } @@ -501,89 +586,115 @@ func concat(s1 []byte, s2 ...byte) []byte { func (t *Trie) resolve(n node, prefix []byte) (node, error) { if n, ok := n.(hashNode); ok { - return t.resolveHash(n, prefix) + return t.resolveAndTrack(n, prefix) } return n, nil } -func (t *Trie) resolveHash(n hashNode, prefix []byte) (node, error) { - hash := common.BytesToHash(n) - if node := t.db.node(hash); node != nil { - return node, nil +// resolveAndTrack loads node from the underlying store with the given node hash +// and path prefix and also tracks the loaded node blob in tracer treated as the +// node's original value. The rlp-encoded blob is preferred to be loaded from +// database because it's easy to decode node while complex to encode node to blob. +func (t *Trie) resolveAndTrack(n hashNode, prefix []byte) (node, error) { + blob, err := t.reader.node(prefix, common.BytesToHash(n)) + if err != nil { + return nil, err } - return nil, &MissingNodeError{NodeHash: hash, Path: prefix} + t.tracer.onRead(prefix, blob) + return mustDecodeNode(n, blob), nil } // Hash returns the root hash of the trie. It does not write to the // database and can be used even if the trie doesn't have one. func (t *Trie) Hash() common.Hash { - hash, cached, _ := t.hashRoot() + hash, cached := t.hashRoot() t.root = cached return common.BytesToHash(hash.(hashNode)) } -// Commit writes all nodes to the trie's memory database, tracking the internal -// and external (for account tries) references. -func (t *Trie) Commit(onleaf LeafCallback) (common.Hash, int, error) { - if t.db == nil { - panic("commit called on trie with nil database") - } +// Commit collects all dirty nodes in the trie and replace them with the +// corresponding node hash. All collected nodes(including dirty leaves if +// collectLeaf is true) will be encapsulated into a nodeset for return. +// The returned nodeset can be nil if the trie is clean(nothing to commit). +// Once the trie is committed, it's not usable anymore. A new trie must +// be created with new root and updated trie database for following usage +func (t *Trie) Commit(collectLeaf bool) (common.Hash, *trienode.NodeSet, error) { + defer t.tracer.reset() + + defer func() { + t.committed = true + }() + // (a) The trie was empty and no update happens => return nil + // (b) The trie was non-empty and all nodes are dropped => return + // the node set includes all deleted nodes if t.root == nil { - return emptyRoot, 0, nil + paths := t.tracer.deletedNodes() + if len(paths) == 0 { + return types.EmptyRootHash, nil, nil // case (a) + } + nodes := trienode.NewNodeSet(t.owner) + for _, path := range paths { + nodes.AddNode([]byte(path), trienode.NewDeleted()) + } + return types.EmptyRootHash, nodes, nil // case (b) } // Derive the hash for all dirty nodes first. We hold the assumption // in the following procedure that all nodes are hashed. rootHash := t.Hash() - h := newCommitter() - defer returnCommitterToPool(h) // Do a quick check if we really need to commit, before we spin // up goroutines. This can happen e.g. if we load a trie for reading storage // values, but don't write to it. - if _, dirty := t.root.cache(); !dirty { - return rootHash, 0, nil - } - var wg sync.WaitGroup - if onleaf != nil { - h.onleaf = onleaf - h.leafCh = make(chan *leaf, leafChanSize) - wg.Add(1) - go func() { - defer wg.Done() - h.commitLoop(t.db) - }() - } - newRoot, committed, err := h.Commit(t.root, t.db) - if onleaf != nil { - // The leafch is created in newCommitter if there was an onleaf callback - // provided. The commitLoop only _reads_ from it, and the commit - // operation was the sole writer. Therefore, it's safe to close this - // channel here. - close(h.leafCh) - wg.Wait() + if hashedNode, dirty := t.root.cache(); !dirty { + // Replace the root node with the origin hash in order to + // ensure all resolved nodes are dropped after the commit. + t.root = hashedNode + return rootHash, nil, nil } + nodes := trienode.NewNodeSet(t.owner) + for _, path := range t.tracer.deletedNodes() { + nodes.AddNode([]byte(path), trienode.NewDeleted()) + } + h := newCommitter(nodes, t.tracer, collectLeaf) + newRoot, nodes, err := h.Commit(t.root) if err != nil { - return common.Hash{}, 0, err + return common.Hash{}, nil, err } + t.root = newRoot - return rootHash, committed, nil + return rootHash, nodes, nil } // hashRoot calculates the root hash of the given trie -func (t *Trie) hashRoot() (node, node, error) { +func (t *Trie) hashRoot() (node, node) { if t.root == nil { - return hashNode(emptyRoot.Bytes()), nil, nil + return hashNode(emptyRoot.Bytes()), nil } // If the number of changes is below 100, we let one thread handle it h := newHasher(t.unhashed >= 100) defer returnHasherToPool(h) hashed, cached := h.hash(t.root, true) t.unhashed = 0 - return hashed, cached, nil + return hashed, cached } // Reset drops the referenced root node and cleans all internal state. func (t *Trie) Reset() { t.root = nil + t.owner = common.Hash{} t.unhashed = 0 + t.tracer.reset() + t.committed = false +} + +// Copy returns a copy of Trie. +func (t *Trie) Copy() *Trie { + return &Trie{ + root: t.root, + owner: t.owner, + committed: t.committed, + unhashed: t.unhashed, + reader: t.reader, + tracer: t.tracer.copy(), + } } diff --git a/trie/trie_id.go b/trie/trie_id.go new file mode 100644 index 0000000000..8ab490ca3b --- /dev/null +++ b/trie/trie_id.go @@ -0,0 +1,55 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see + +package trie + +import "github.com/ethereum/go-ethereum/common" + +// ID is the identifier for uniquely identifying a trie. +type ID struct { + StateRoot common.Hash // The root of the corresponding state(block.root) + Owner common.Hash // The contract address hash which the trie belongs to + Root common.Hash // The root hash of trie +} + +// StateTrieID constructs an identifier for state trie with the provided state root. +func StateTrieID(root common.Hash) *ID { + return &ID{ + StateRoot: root, + Owner: common.Hash{}, + Root: root, + } +} + +// StorageTrieID constructs an identifier for storage trie which belongs to a certain +// state and contract specified by the stateRoot and owner. +func StorageTrieID(stateRoot common.Hash, owner common.Hash, root common.Hash) *ID { + return &ID{ + StateRoot: stateRoot, + Owner: owner, + Root: root, + } +} + +// TrieID constructs an identifier for a standard trie(not a second-layer trie) +// with provided root. It's mostly used in tests and some other tries like CHT trie. +func TrieID(root common.Hash) *ID { + return &ID{ + StateRoot: root, + Owner: common.Hash{}, + Root: root, + } +} diff --git a/trie/trie_reader.go b/trie/trie_reader.go new file mode 100644 index 0000000000..4c17a438ec --- /dev/null +++ b/trie/trie_reader.go @@ -0,0 +1,107 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/trie/triestate" +) + +// Reader wraps the Node and NodeBlob method of a backing trie store. +type Reader interface { + // Node retrieves the trie node blob with the provided trie identifier, node path and + // the corresponding node hash. No error will be returned if the node is not found. + + // When looking up nodes in the account trie, 'owner' is the zero hash. For contract + // storage trie nodes, 'owner' is the hash of the account address that containing the + // storage. + Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, error) +} + +// NodeReader wraps all the necessary functions for accessing trie node. +type NodeReader interface { + // Reader returns a reader for accessing all trie nodes with provided + // state root. Nil is returned in case the state is not available. + Reader(root common.Hash) (Reader, error) +} + +// trieReader is a wrapper of the underlying node reader. It's not safe +// for concurrent usage. +type trieReader struct { + owner common.Hash + reader Reader + banned map[string]struct{} // Marker to prevent node from being accessed, for tests +} + +// newTrieReader initializes the trie reader with the given node reader. +func newTrieReader(stateRoot, owner common.Hash, db NodeReader) (*trieReader, error) { + if stateRoot == (common.Hash{}) || stateRoot == types.EmptyRootHash { + if stateRoot == (common.Hash{}) { + log.Error("zero state root") + } + return &trieReader{owner: owner}, nil + } + + reader, err := db.Reader(stateRoot) + if err != nil { + return nil, &MissingNodeError{Owner: owner, NodeHash: stateRoot, err: err} + } + return &trieReader{owner: owner, reader: reader}, nil +} + +// newEmptyReader initializes the pure in-memory reader. All read operations +// should be forbidden and returns the MissingNodeError. +func newEmptyReader() *trieReader { + return &trieReader{} +} + +// node retrieves the rlp-encoded trie node with the provided trie node +// information. An MissingNodeError will be returned in case the node is +// not found or any error is encountered. +func (r *trieReader) node(path []byte, hash common.Hash) ([]byte, error) { + // Perform the logics in tests for preventing trie node access. + if r.banned != nil { + if _, ok := r.banned[string(path)]; ok { + return nil, &MissingNodeError{Owner: r.owner, NodeHash: hash, Path: path} + } + } + if r.reader == nil { + return nil, &MissingNodeError{Owner: r.owner, NodeHash: hash, Path: path} + } + blob, err := r.reader.Node(r.owner, path, hash) + if err != nil || len(blob) == 0 { + return nil, &MissingNodeError{Owner: r.owner, NodeHash: hash, Path: path, err: err} + } + return blob, nil +} + +// trieLoader implements triestate.TrieLoader for constructing tries. +type trieLoader struct { + db *Database +} + +// OpenTrie opens the main account trie. +func (l *trieLoader) OpenTrie(root common.Hash) (triestate.Trie, error) { + return New(TrieID(root), l.db) +} + +// OpenStorageTrie opens the storage trie of an account. +func (l *trieLoader) OpenStorageTrie(stateRoot common.Hash, addrHash, root common.Hash) (triestate.Trie, error) { + return New(StorageTrieID(stateRoot, addrHash, root), l.db) +} diff --git a/trie/trie_test.go b/trie/trie_test.go index 806a8cc634..ff087b6ef9 100644 --- a/trie/trie_test.go +++ b/trie/trie_test.go @@ -25,19 +25,19 @@ import ( "io/ioutil" "math/big" "math/rand" - "os" "reflect" "testing" "testing/quick" "github.com/davecgh/go-spew/spew" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/ethdb/leveldb" - "github.com/ethereum/go-ethereum/ethdb/memorydb" "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/trienode" "golang.org/x/crypto/sha3" ) @@ -48,12 +48,12 @@ func init() { // Used for testing func newEmpty() *Trie { - trie, _ := New(common.Hash{}, NewDatabase(memorydb.New())) + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) return trie } func TestEmptyTrie(t *testing.T) { - var trie Trie + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) res := trie.Hash() exp := emptyRoot if res != exp { @@ -62,7 +62,7 @@ func TestEmptyTrie(t *testing.T) { } func TestNull(t *testing.T) { - var trie Trie + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) key := make([]byte, 32) value := []byte("test") trie.Update(key, value) @@ -72,7 +72,13 @@ func TestNull(t *testing.T) { } func TestMissingRoot(t *testing.T) { - trie, err := New(common.HexToHash("0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33"), NewDatabase(memorydb.New())) + testMissingRoot(t, rawdb.HashScheme) + testMissingRoot(t, rawdb.PathScheme) +} + +func testMissingRoot(t *testing.T, scheme string) { + root := common.HexToHash("0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33") + trie, err := New(TrieID(root), newTestDatabase(rawdb.NewMemoryDatabase(), scheme)) if trie != nil { t.Error("New returned non-nil trie for invalid root") } @@ -81,75 +87,85 @@ func TestMissingRoot(t *testing.T) { } } -func TestMissingNodeDisk(t *testing.T) { testMissingNode(t, false) } -func TestMissingNodeMemonly(t *testing.T) { testMissingNode(t, true) } +func TestMissingNode(t *testing.T) { + testMissingNode(t, false, rawdb.HashScheme) + testMissingNode(t, false, rawdb.PathScheme) + testMissingNode(t, true, rawdb.HashScheme) + testMissingNode(t, true, rawdb.PathScheme) +} -func testMissingNode(t *testing.T, memonly bool) { - diskdb := memorydb.New() - triedb := NewDatabase(diskdb) +func testMissingNode(t *testing.T, memonly bool, scheme string) { + diskdb := rawdb.NewMemoryDatabase() + triedb := newTestDatabase(diskdb, scheme) - trie, _ := New(common.Hash{}, triedb) + trie := NewEmpty(triedb) updateString(trie, "120000", "qwerqwerqwerqwerqwerqwerqwerqwer") updateString(trie, "123456", "asdfasdfasdfasdfasdfasdfasdfasdf") - root, _, _ := trie.Commit(nil) + root, nodes, _ := trie.Commit(false) + triedb.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) if !memonly { - triedb.Commit(root, true, nil) + triedb.Commit(root, true) } - trie, _ = New(root, triedb) + trie, _ = New(TrieID(root), triedb) _, err := trie.TryGet([]byte("120000")) if err != nil { t.Errorf("Unexpected error: %v", err) } - trie, _ = New(root, triedb) + trie, _ = New(TrieID(root), triedb) _, err = trie.TryGet([]byte("120099")) if err != nil { t.Errorf("Unexpected error: %v", err) } - trie, _ = New(root, triedb) + trie, _ = New(TrieID(root), triedb) _, err = trie.TryGet([]byte("123456")) if err != nil { t.Errorf("Unexpected error: %v", err) } - trie, _ = New(root, triedb) + trie, _ = New(TrieID(root), triedb) err = trie.TryUpdate([]byte("120099"), []byte("zxcvzxcvzxcvzxcvzxcvzxcvzxcvzxcv")) if err != nil { t.Errorf("Unexpected error: %v", err) } - trie, _ = New(root, triedb) + trie, _ = New(TrieID(root), triedb) err = trie.TryDelete([]byte("123456")) if err != nil { t.Errorf("Unexpected error: %v", err) } - hash := common.HexToHash("0xe1d943cc8f061a0c0b98162830b970395ac9315654824bf21b73b891365262f9") + var ( + path []byte + hash = common.HexToHash("0xe1d943cc8f061a0c0b98162830b970395ac9315654824bf21b73b891365262f9") + ) + for p, n := range nodes.Nodes { + if n.Hash == hash { + path = common.CopyBytes([]byte(p)) + break + } + } + trie, _ = New(TrieID(root), triedb) if memonly { - delete(triedb.dirties, hash) + trie.reader.banned = map[string]struct{}{string(path): {}} } else { - diskdb.Delete(hash[:]) + rawdb.DeleteTrieNode(diskdb, common.Hash{}, path, hash, scheme) } - trie, _ = New(root, triedb) _, err = trie.TryGet([]byte("120000")) if _, ok := err.(*MissingNodeError); !ok { t.Errorf("Wrong error: %v", err) } - trie, _ = New(root, triedb) _, err = trie.TryGet([]byte("120099")) if _, ok := err.(*MissingNodeError); !ok { t.Errorf("Wrong error: %v", err) } - trie, _ = New(root, triedb) _, err = trie.TryGet([]byte("123456")) if err != nil { t.Errorf("Unexpected error: %v", err) } - trie, _ = New(root, triedb) err = trie.TryUpdate([]byte("120099"), []byte("zxcv")) if _, ok := err.(*MissingNodeError); !ok { t.Errorf("Wrong error: %v", err) } - trie, _ = New(root, triedb) err = trie.TryDelete([]byte("123456")) if _, ok := err.(*MissingNodeError); !ok { t.Errorf("Wrong error: %v", err) @@ -157,7 +173,7 @@ func testMissingNode(t *testing.T, memonly bool) { } func TestInsert(t *testing.T) { - trie := newEmpty() + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) updateString(trie, "doe", "reindeer") updateString(trie, "dog", "puppy") @@ -173,7 +189,7 @@ func TestInsert(t *testing.T) { updateString(trie, "A", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") exp = common.HexToHash("d23786fb4a010da3ce639d66d5e904a11dbc02746d1ce25029e53290cabf28ab") - root, _, err := trie.Commit(nil) + root, _, err := trie.Commit(false) if err != nil { t.Fatalf("commit error: %v", err) } @@ -183,7 +199,8 @@ func TestInsert(t *testing.T) { } func TestGet(t *testing.T) { - trie := newEmpty() + db := NewDatabase(rawdb.NewMemoryDatabase(), nil) + trie := NewEmpty(db) updateString(trie, "doe", "reindeer") updateString(trie, "dog", "puppy") updateString(trie, "dogglesworth", "cat") @@ -202,12 +219,14 @@ func TestGet(t *testing.T) { if i == 1 { return } - trie.Commit(nil) + root, nodes, _ := trie.Commit(false) + db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) + trie, _ = New(TrieID(root), db) } } func TestDelete(t *testing.T) { - trie := newEmpty() + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) vals := []struct{ k, v string }{ {"do", "verb"}, {"ether", "wookiedoo"}, @@ -234,7 +253,7 @@ func TestDelete(t *testing.T) { } func TestEmptyValues(t *testing.T) { - trie := newEmpty() + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) vals := []struct{ k, v string }{ {"do", "verb"}, @@ -258,7 +277,8 @@ func TestEmptyValues(t *testing.T) { } func TestReplication(t *testing.T) { - trie := newEmpty() + db := NewDatabase(rawdb.NewMemoryDatabase(), nil) + trie := NewEmpty(db) vals := []struct{ k, v string }{ {"do", "verb"}, {"ether", "wookiedoo"}, @@ -271,27 +291,36 @@ func TestReplication(t *testing.T) { for _, val := range vals { updateString(trie, val.k, val.v) } - exp, _, err := trie.Commit(nil) + root, nodes, err := trie.Commit(false) if err != nil { t.Fatalf("commit error: %v", err) } + db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) // create a new trie on top of the database and check that lookups work. - trie2, err := New(exp, trie.db) + trie2, err := New(TrieID(root), db) if err != nil { - t.Fatalf("can't recreate trie at %x: %v", exp, err) + t.Fatalf("can't recreate trie at %x: %v", root, err) } for _, kv := range vals { if string(getString(trie2, kv.k)) != kv.v { t.Errorf("trie2 doesn't have %q => %q", kv.k, kv.v) } } - hash, _, err := trie2.Commit(nil) + hash, nodes, err := trie2.Commit(false) if err != nil { t.Fatalf("commit error: %v", err) } - if hash != exp { - t.Errorf("root failure. expected %x got %x", exp, hash) + if hash != root { + t.Errorf("root failure. expected %x got %x", root, hash) + } + // recreate the trie after commit + if nodes != nil { + db.Update(hash, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) + } + trie2, err = New(TrieID(hash), db) + if err != nil { + t.Fatalf("can't recreate trie at %x: %v", root, err) } // perform some insertions on the new trie. @@ -309,13 +338,13 @@ func TestReplication(t *testing.T) { for _, val := range vals2 { updateString(trie2, val.k, val.v) } - if hash := trie2.Hash(); hash != exp { - t.Errorf("root failure. expected %x got %x", exp, hash) + if trie2.Hash() != hash { + t.Errorf("root failure. expected %x got %x", hash, hash) } } func TestLargeValue(t *testing.T) { - trie := newEmpty() + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) trie.Update([]byte("key1"), []byte{99, 99, 99, 99}) trie.Update([]byte("key2"), bytes.Repeat([]byte{1}, 32)) trie.Hash() @@ -372,8 +401,8 @@ const ( opGet opCommit opHash - opReset opItercheckhash + opProve opMax // boundary value, not an actual op ) @@ -399,7 +428,7 @@ func (randTest) Generate(r *rand.Rand, size int) reflect.Value { step.key = genKey() step.value = make([]byte, 8) binary.BigEndian.PutUint64(step.value, uint64(i)) - case opGet, opDelete: + case opGet, opDelete, opProve: step.key = genKey() } steps = append(steps, step) @@ -407,16 +436,69 @@ func (randTest) Generate(r *rand.Rand, size int) reflect.Value { return reflect.ValueOf(steps) } -func runRandTest(rt randTest) bool { - triedb := NewDatabase(memorydb.New()) +// verifyAccessList verifies the access list of the new trie against the old trie. +func verifyAccessList(old *Trie, new *Trie, set *trienode.NodeSet) error { + deletes, inserts, updates := diffTries(old, new) + + // Check insertion set + for path := range inserts { + n, ok := set.Nodes[path] + if !ok || n.IsDeleted() { + return errors.New("expect new node") + } + // if len(n.Prev) > 0 { + // return errors.New("unexpected origin value") + // } + } + // Check deletion set + for path, _ := range deletes { + n, ok := set.Nodes[path] + if !ok || !n.IsDeleted() { + return errors.New("expect deleted node") + } + // if len(n.Prev) == 0 { + // return errors.New("expect origin value") + // } + // if !bytes.Equal(n.Prev, blob) { + // return errors.New("invalid origin value") + // } + } + // Check update set + for path, _ := range updates { + n, ok := set.Nodes[path] + if !ok || n.IsDeleted() { + return errors.New("expect updated node") + } + // if len(n.Prev) == 0 { + // return errors.New("expect origin value") + // } + // if !bytes.Equal(n.Prev, blob) { + // return errors.New("invalid origin value") + // } + } + return nil +} - tr, _ := New(common.Hash{}, triedb) - values := make(map[string]string) // tracks content of the trie +func runRandTest(rt randTest) bool { + var scheme = rawdb.HashScheme + if rand.Intn(2) == 0 { + scheme = rawdb.PathScheme + } + var ( + origin = types.EmptyRootHash + triedb = newTestDatabase(rawdb.NewMemoryDatabase(), scheme) + tr = NewEmpty(triedb) + origTrie = NewEmpty(triedb) + values = make(map[string]string) // tracks content of the trie + ) + tr.tracer = newTracer() for i, step := range rt { - fmt.Printf("{op: %d, key: common.Hex2Bytes(\"%x\"), value: common.Hex2Bytes(\"%x\")}, // step %d\n", - step.op, step.key, step.value, i) + // fmt.Printf("{op: %d, key: common.Hex2Bytes(\"%x\"), value: common.Hex2Bytes(\"%x\")}, // step %d\n", + // step.op, step.key, step.value, i) + switch step.op { + case opUpdate: tr.Update(step.key, step.value) values[string(step.key)] = string(step.value) @@ -429,25 +511,53 @@ func runRandTest(rt randTest) bool { if string(v) != want { rt[i].err = fmt.Errorf("mismatch for key 0x%x, got 0x%x want 0x%x", step.key, v, want) } - case opCommit: - _, _, rt[i].err = tr.Commit(nil) + case opProve: + hash := tr.Hash() + if hash == emptyRoot { + continue + } + proofDb := rawdb.NewMemoryDatabase() + err := tr.Prove(step.key, 0, proofDb) + if err != nil { + rt[i].err = fmt.Errorf("failed for proving key %#x, %v", step.key, err) + } + _, err = VerifyProof(hash, step.key, proofDb) + if err != nil { + rt[i].err = fmt.Errorf("failed for verifying key %#x, %v", step.key, err) + } case opHash: tr.Hash() - case opReset: - hash, _, err := tr.Commit(nil) + case opCommit: + root, nodes, err := tr.Commit(true) if err != nil { rt[i].err = err return false } - newtr, err := New(hash, triedb) + if nodes != nil { + triedb.Update(root, origin, 0, trienode.NewWithNodeSet(nodes), nil) + } + newtr, err := New(TrieID(root), triedb) if err != nil { rt[i].err = err return false } + if nodes != nil { + if err := verifyAccessList(origTrie, newtr, nodes); err != nil { + rt[i].err = err + return false + } + } tr = newtr + // Enable node tracing. Resolve the root node again explicitly + // since it's not captured at the beginning. + tr.tracer = newTracer() + tr.resolveAndTrack(root.Bytes(), nil) + origTrie = tr.Copy() + origin = root + case opItercheckhash: - checktr, _ := New(common.Hash{}, triedb) - it := NewIterator(tr.NodeIterator(nil)) + checktr := NewEmpty(triedb) + it := NewIterator(tr.MustNodeIterator(nil)) for it.Next() { checktr.Update(it.Key, it.Value) } @@ -480,10 +590,11 @@ func BenchmarkUpdateLE(b *testing.B) { benchUpdate(b, binary.LittleEndian) } const benchElemCount = 20000 func benchGet(b *testing.B, commit bool) { - trie := new(Trie) + triedb := NewDatabase(rawdb.NewMemoryDatabase(), nil) + trie := NewEmpty(triedb) if commit { _, tmpdb := tempDB() - trie, _ = New(common.Hash{}, tmpdb) + trie = NewEmpty(tmpdb) } k := make([]byte, 32) for i := 0; i < benchElemCount; i++ { @@ -491,21 +602,12 @@ func benchGet(b *testing.B, commit bool) { trie.Update(k, k) } binary.LittleEndian.PutUint64(k, benchElemCount/2) - if commit { - trie.Commit(nil) - } b.ResetTimer() for i := 0; i < b.N; i++ { trie.Get(k) } b.StopTimer() - - if commit { - ldb := trie.db.diskdb.(*leveldb.Database) - ldb.Close() - os.RemoveAll(ldb.Path()) - } } func benchUpdate(b *testing.B, e binary.ByteOrder) *Trie { @@ -560,22 +662,17 @@ func BenchmarkHash(b *testing.B) { // insert into the trie before measuring the hashing. func BenchmarkCommitAfterHash(b *testing.B) { b.Run("no-onleaf", func(b *testing.B) { - benchmarkCommitAfterHash(b, nil) + benchmarkCommitAfterHash(b, false) }) - var a types.StateAccount - onleaf := func(paths [][]byte, hexpath []byte, leaf []byte, parent common.Hash) error { - rlp.DecodeBytes(leaf, &a) - return nil - } b.Run("with-onleaf", func(b *testing.B) { - benchmarkCommitAfterHash(b, onleaf) + benchmarkCommitAfterHash(b, true) }) } -func benchmarkCommitAfterHash(b *testing.B, onleaf LeafCallback) { +func benchmarkCommitAfterHash(b *testing.B, collectLeaf bool) { // Make the random benchmark deterministic addresses, accounts := makeAccounts(b.N) - trie := newEmpty() + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) for i := 0; i < len(addresses); i++ { trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i]) } @@ -583,7 +680,7 @@ func benchmarkCommitAfterHash(b *testing.B, onleaf LeafCallback) { trie.Hash() b.ResetTimer() b.ReportAllocs() - trie.Commit(onleaf) + trie.Commit(collectLeaf) } func TestTinyTrie(t *testing.T) { @@ -602,8 +699,8 @@ func TestTinyTrie(t *testing.T) { if exp, root := common.HexToHash("0608c1d1dc3905fa22204c7a0e43644831c3b6d3def0f274be623a948197e64a"), trie.Hash(); exp != root { t.Errorf("3: got %x, exp %x", root, exp) } - checktr, _ := New(common.Hash{}, trie.db) - it := NewIterator(trie.NodeIterator(nil)) + checktr := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) + it := NewIterator(trie.MustNodeIterator(nil)) for it.Next() { checktr.Update(it.Key, it.Value) } @@ -615,19 +712,19 @@ func TestTinyTrie(t *testing.T) { func TestCommitAfterHash(t *testing.T) { // Create a realistic account trie to hash addresses, accounts := makeAccounts(1000) - trie := newEmpty() + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase(), nil)) for i := 0; i < len(addresses); i++ { trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i]) } // Insert the accounts into the trie and hash it trie.Hash() - trie.Commit(nil) + trie.Commit(false) root := trie.Hash() exp := common.HexToHash("72f9d3f3fe1e1dd7b8936442e7642aef76371472d94319900790053c493f3fe6") if exp != root { t.Errorf("got %x, exp %x", root, exp) } - root, _, _ = trie.Commit(nil) + root, _, _ = trie.Commit(false) if exp != root { t.Errorf("got %x, exp %x", root, exp) } @@ -681,11 +778,17 @@ func (s *spongeDb) Stat(property string) (string, error) { panic("implement func (s *spongeDb) Compact(start []byte, limit []byte) error { panic("implement me") } func (s *spongeDb) Close() error { return nil } func (s *spongeDb) Put(key []byte, value []byte) error { - valbrief := value + var ( + keybrief = key + valbrief = value + ) + if len(keybrief) > 8 { + keybrief = keybrief[:8] + } if len(valbrief) > 8 { valbrief = valbrief[:8] } - s.journal = append(s.journal, fmt.Sprintf("%v: PUT([%x...], [%d bytes] %x...)\n", s.id, key[:8], len(value), valbrief)) + s.journal = append(s.journal, fmt.Sprintf("%v: PUT([%x...], [%d bytes] %x...)\n", s.id, keybrief, len(value), valbrief)) s.sponge.Write(key) s.sponge.Write(value) return nil @@ -713,41 +816,30 @@ func (b *spongeBatch) Replay(w ethdb.KeyValueWriter) error { return nil } // to check whether changes to the trie modifies the write order or data in any way. func TestCommitSequence(t *testing.T) { for i, tc := range []struct { - count int - expWriteSeqHash []byte - expCallbackSeqHash []byte + count int + expWriteSeqHash []byte }{ - {20, common.FromHex("873c78df73d60e59d4a2bcf3716e8bfe14554549fea2fc147cb54129382a8066"), - common.FromHex("ff00f91ac05df53b82d7f178d77ada54fd0dca64526f537034a5dbe41b17df2a")}, - {200, common.FromHex("ba03d891bb15408c940eea5ee3d54d419595102648d02774a0268d892add9c8e"), - common.FromHex("f3cd509064c8d319bbdd1c68f511850a902ad275e6ed5bea11547e23d492a926")}, - {2000, common.FromHex("f7a184f20df01c94f09537401d11e68d97ad0c00115233107f51b9c287ce60c7"), - common.FromHex("ff795ea898ba1e4cfed4a33b4cf5535a347a02cf931f88d88719faf810f9a1c9")}, + {20, common.FromHex("873c78df73d60e59d4a2bcf3716e8bfe14554549fea2fc147cb54129382a8066")}, + {200, common.FromHex("ba03d891bb15408c940eea5ee3d54d419595102648d02774a0268d892add9c8e")}, + {2000, common.FromHex("f7a184f20df01c94f09537401d11e68d97ad0c00115233107f51b9c287ce60c7")}, } { addresses, accounts := makeAccounts(tc.count) // This spongeDb is used to check the sequence of disk-db-writes s := &spongeDb{sponge: sha3.NewLegacyKeccak256()} - db := NewDatabase(s) - trie, _ := New(common.Hash{}, db) - // Another sponge is used to check the callback-sequence - callbackSponge := sha3.NewLegacyKeccak256() + db := NewDatabase(rawdb.NewDatabase(s), nil) + trie := NewEmpty(db) // Fill the trie with elements for i := 0; i < tc.count; i++ { trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i]) } // Flush trie -> database - root, _, _ := trie.Commit(nil) + root, nodes, _ := trie.Commit(false) + db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) // Flush memdb -> disk (sponge) - db.Commit(root, false, func(c common.Hash) { - // And spongify the callback-order - callbackSponge.Write(c[:]) - }) + db.Commit(root, false) if got, exp := s.sponge.Sum(nil), tc.expWriteSeqHash; !bytes.Equal(got, exp) { t.Errorf("test %d, disk write sequence wrong:\ngot %x exp %x\n", i, got, exp) } - if got, exp := callbackSponge.Sum(nil), tc.expCallbackSeqHash; !bytes.Equal(got, exp) { - t.Errorf("test %d, call back sequence wrong:\ngot: %x exp %x\n", i, got, exp) - } } } @@ -755,24 +847,18 @@ func TestCommitSequence(t *testing.T) { // but uses random blobs instead of 'accounts' func TestCommitSequenceRandomBlobs(t *testing.T) { for i, tc := range []struct { - count int - expWriteSeqHash []byte - expCallbackSeqHash []byte + count int + expWriteSeqHash []byte }{ - {20, common.FromHex("8e4a01548551d139fa9e833ebc4e66fc1ba40a4b9b7259d80db32cff7b64ebbc"), - common.FromHex("450238d73bc36dc6cc6f926987e5428535e64be403877c4560e238a52749ba24")}, - {200, common.FromHex("6869b4e7b95f3097a19ddb30ff735f922b915314047e041614df06958fc50554"), - common.FromHex("0ace0b03d6cb8c0b82f6289ef5b1a1838306b455a62dafc63cada8e2924f2550")}, - {2000, common.FromHex("444200e6f4e2df49f77752f629a96ccf7445d4698c164f962bbd85a0526ef424"), - common.FromHex("117d30dafaa62a1eed498c3dfd70982b377ba2b46dd3e725ed6120c80829e518")}, + {20, common.FromHex("8e4a01548551d139fa9e833ebc4e66fc1ba40a4b9b7259d80db32cff7b64ebbc")}, + {200, common.FromHex("6869b4e7b95f3097a19ddb30ff735f922b915314047e041614df06958fc50554")}, + {2000, common.FromHex("444200e6f4e2df49f77752f629a96ccf7445d4698c164f962bbd85a0526ef424")}, } { prng := rand.New(rand.NewSource(int64(i))) // This spongeDb is used to check the sequence of disk-db-writes s := &spongeDb{sponge: sha3.NewLegacyKeccak256()} - db := NewDatabase(s) - trie, _ := New(common.Hash{}, db) - // Another sponge is used to check the callback-sequence - callbackSponge := sha3.NewLegacyKeccak256() + db := NewDatabase(rawdb.NewDatabase(s), nil) + trie := NewEmpty(db) // Fill the trie with elements for i := 0; i < tc.count; i++ { key := make([]byte, 32) @@ -788,18 +874,13 @@ func TestCommitSequenceRandomBlobs(t *testing.T) { trie.Update(key, val) } // Flush trie -> database - root, _, _ := trie.Commit(nil) + root, nodes, _ := trie.Commit(false) + db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) // Flush memdb -> disk (sponge) - db.Commit(root, false, func(c common.Hash) { - // And spongify the callback-order - callbackSponge.Write(c[:]) - }) + db.Commit(root, false) if got, exp := s.sponge.Sum(nil), tc.expWriteSeqHash; !bytes.Equal(got, exp) { t.Fatalf("test %d, disk write sequence wrong:\ngot %x exp %x\n", i, got, exp) } - if got, exp := callbackSponge.Sum(nil), tc.expCallbackSeqHash; !bytes.Equal(got, exp) { - t.Fatalf("test %d, call back sequence wrong:\ngot: %x exp %x\n", i, got, exp) - } } } @@ -808,13 +889,17 @@ func TestCommitSequenceStackTrie(t *testing.T) { prng := rand.New(rand.NewSource(int64(count))) // This spongeDb is used to check the sequence of disk-db-writes s := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "a"} - db := NewDatabase(s) - trie, _ := New(common.Hash{}, db) + db := NewDatabase(rawdb.NewDatabase(s), nil) + trie := NewEmpty(db) // Another sponge is used for the stacktrie commits stackTrieSponge := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "b"} - stTrie := NewStackTrie(stackTrieSponge) - // Fill the trie with elements - for i := 1; i < count; i++ { + options := NewStackTrieOptions() + options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + rawdb.WriteTrieNode(stackTrieSponge, common.Hash{}, path, hash, blob, db.Scheme()) + }) + stTrie := NewStackTrie(options) + // Fill the trie with elements, should start 0, otherwise nodes will be nil in the first time. + for i := 0; i < count; i++ { // For the stack trie, we need to do inserts in proper order key := make([]byte, 32) binary.BigEndian.PutUint64(key, uint64(i)) @@ -830,14 +915,12 @@ func TestCommitSequenceStackTrie(t *testing.T) { stTrie.TryUpdate(key, val) } // Flush trie -> database - root, _, _ := trie.Commit(nil) + root, nodes, _ := trie.Commit(false) + db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) // Flush memdb -> disk (sponge) - db.Commit(root, false, nil) + db.Commit(root, false) // And flush stacktrie -> disk - stRoot, err := stTrie.Commit() - if err != nil { - t.Fatalf("Failed to commit stack trie %v", err) - } + stRoot := stTrie.Commit() if stRoot != root { t.Fatalf("root wrong, got %x exp %x", stRoot, root) } @@ -864,29 +947,32 @@ func TestCommitSequenceStackTrie(t *testing.T) { // not fit into 32 bytes, rlp-encoded. However, it's still the correct thing to do. func TestCommitSequenceSmallRoot(t *testing.T) { s := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "a"} - db := NewDatabase(s) - trie, _ := New(common.Hash{}, db) + db := NewDatabase(rawdb.NewDatabase(s), nil) + trie := NewEmpty(db) // Another sponge is used for the stacktrie commits stackTrieSponge := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "b"} - stTrie := NewStackTrie(stackTrieSponge) + options := NewStackTrieOptions() + options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + rawdb.WriteTrieNode(stackTrieSponge, common.Hash{}, path, hash, blob, db.Scheme()) + }) + stTrie := NewStackTrie(options) // Add a single small-element to the trie(s) key := make([]byte, 5) key[0] = 1 trie.TryUpdate(key, []byte{0x1}) stTrie.TryUpdate(key, []byte{0x1}) // Flush trie -> database - root, _, _ := trie.Commit(nil) + root, nodes, _ := trie.Commit(false) + db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) // Flush memdb -> disk (sponge) - db.Commit(root, false, nil) + db.Commit(root, false) // And flush stacktrie -> disk - stRoot, err := stTrie.Commit() - if err != nil { - t.Fatalf("Failed to commit stack trie %v", err) - } + stRoot := stTrie.Commit() if stRoot != root { t.Fatalf("root wrong, got %x exp %x", stRoot, root) } - fmt.Printf("root: %x\n", stRoot) + + t.Logf("root: %x\n", stRoot) if got, exp := stackTrieSponge.sponge.Sum(nil), s.sponge.Sum(nil); !bytes.Equal(got, exp) { t.Fatalf("test, disk write sequence wrong:\ngot %x exp %x\n", got, exp) } @@ -995,7 +1081,7 @@ func benchmarkCommitAfterHashFixedSize(b *testing.B, addresses [][20]byte, accou // Insert the accounts into the trie and hash it trie.Hash() b.StartTimer() - trie.Commit(nil) + trie.Commit(false) b.StopTimer() } @@ -1040,14 +1126,16 @@ func BenchmarkDerefRootFixedSize(b *testing.B) { func benchmarkDerefRootFixedSize(b *testing.B, addresses [][20]byte, accounts [][]byte) { b.ReportAllocs() - trie := newEmpty() + triedb := NewDatabase(rawdb.NewMemoryDatabase(), nil) + trie := NewEmpty(triedb) for i := 0; i < len(addresses); i++ { trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i]) } h := trie.Hash() - trie.Commit(nil) + root, nodes, _ := trie.Commit(false) + triedb.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) b.StartTimer() - trie.db.Dereference(h) + triedb.Dereference(h) b.StopTimer() } @@ -1060,7 +1148,7 @@ func tempDB() (string, *Database) { if err != nil { panic(fmt.Sprintf("can't create temporary database: %v", err)) } - return dir, NewDatabase(diskdb) + return dir, NewDatabase(rawdb.NewDatabase(diskdb), nil) } func getString(trie *Trie, k string) []byte { diff --git a/trie/triedb/hashdb/database.go b/trie/triedb/hashdb/database.go new file mode 100644 index 0000000000..a86095916a --- /dev/null +++ b/trie/triedb/hashdb/database.go @@ -0,0 +1,666 @@ +// Copyright 2018 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package hashdb + +import ( + "errors" + "fmt" + "reflect" + "sync" + "time" + + "github.com/VictoriaMetrics/fastcache" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/metrics" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/trienode" + "github.com/ethereum/go-ethereum/trie/triestate" +) + +var ( + memcacheCleanHitMeter = metrics.NewRegisteredMeter("hashdb/memcache/clean/hit", nil) + memcacheCleanMissMeter = metrics.NewRegisteredMeter("hashdb/memcache/clean/miss", nil) + memcacheCleanReadMeter = metrics.NewRegisteredMeter("hashdb/memcache/clean/read", nil) + memcacheCleanWriteMeter = metrics.NewRegisteredMeter("hashdb/memcache/clean/write", nil) + + memcacheDirtyHitMeter = metrics.NewRegisteredMeter("hashdb/memcache/dirty/hit", nil) + memcacheDirtyMissMeter = metrics.NewRegisteredMeter("hashdb/memcache/dirty/miss", nil) + memcacheDirtyReadMeter = metrics.NewRegisteredMeter("hashdb/memcache/dirty/read", nil) + memcacheDirtyWriteMeter = metrics.NewRegisteredMeter("hashdb/memcache/dirty/write", nil) + + memcacheFlushTimeTimer = metrics.NewRegisteredResettingTimer("hashdb/memcache/flush/time", nil) + memcacheFlushNodesMeter = metrics.NewRegisteredMeter("hashdb/memcache/flush/nodes", nil) + memcacheFlushBytesMeter = metrics.NewRegisteredMeter("hashdb/memcache/flush/bytes", nil) + + memcacheGCTimeTimer = metrics.NewRegisteredResettingTimer("hashdb/memcache/gc/time", nil) + memcacheGCNodesMeter = metrics.NewRegisteredMeter("hashdb/memcache/gc/nodes", nil) + memcacheGCBytesMeter = metrics.NewRegisteredMeter("hashdb/memcache/gc/bytes", nil) + + memcacheCommitTimeTimer = metrics.NewRegisteredResettingTimer("hashdb/memcache/commit/time", nil) + memcacheCommitNodesMeter = metrics.NewRegisteredMeter("hashdb/memcache/commit/nodes", nil) + memcacheCommitBytesMeter = metrics.NewRegisteredMeter("hashdb/memcache/commit/bytes", nil) +) + +// ChildResolver defines the required method to decode the provided +// trie node and iterate the children on top. +type ChildResolver interface { + ForEach(node []byte, onChild func(common.Hash)) +} + +// Config contains the settings for database. +type Config struct { + CleanCacheSize int // Maximum memory allowance (in bytes) for caching clean nodes +} + +// Defaults is the default setting for database if it's not specified. +// Notably, clean cache is disabled explicitly +var Defaults = &Config{ + // Explicitly set clean cache size to 0 to avoid creating fastcache, + // otherwise database must be closed when it's no longer needed to + // prevent memory leak. + CleanCacheSize: 0, +} + +// Database is an intermediate write layer between the trie data structures and +// the disk database. The aim is to accumulate trie writes in-memory and only +// periodically flush a couple tries to disk, garbage collecting the remainder. +// +// Note, the trie Database is **not** thread safe in its mutations, but it **is** +// thread safe in providing individual, independent node access. The rationale +// behind this split design is to provide read access to RPC handlers and sync +// servers even while the trie is executing expensive garbage collection. +type Database struct { + diskdb ethdb.Database // Persistent storage for matured trie nodes + resolver ChildResolver // Resolver for trie node children + + cleans *fastcache.Cache // GC friendly memory cache of clean node RLPs + dirties map[common.Hash]*cachedNode // Data and references relationships of dirty trie nodes + oldest common.Hash // Oldest tracked node, flush-list head + newest common.Hash // Newest tracked node, flush-list tail + + gctime time.Duration // Time spent on garbage collection since last commit + gcnodes uint64 // Nodes garbage collected since last commit + gcsize common.StorageSize // Data storage garbage collected since last commit + + flushtime time.Duration // Time spent on data flushing since last commit + flushnodes uint64 // Nodes flushed since last commit + flushsize common.StorageSize // Data storage flushed since last commit + + dirtiesSize common.StorageSize // Storage size of the dirty node cache (exc. metadata) + childrenSize common.StorageSize // Storage size of the external children tracking + lock sync.RWMutex +} + +// cachedNode is all the information we know about a single cached trie node +// in the memory database write layer. +type cachedNode struct { + node []byte // Encoded node blob + parents uint32 // Number of live nodes referencing this one + external map[common.Hash]struct{} // The set of external children + flushPrev common.Hash // Previous node in the flush-list + flushNext common.Hash // Next node in the flush-list +} + +// cachedNodeSize is the raw size of a cachedNode data structure without any +// node data included. It's an approximate size, but should be a lot better +// than not counting them. +var cachedNodeSize = int(reflect.TypeOf(cachedNode{}).Size()) + +// forChildren invokes the callback for all the tracked children of this node, +// both the implicit ones from inside the node as well as the explicit ones +// from outside the node. +func (n *cachedNode) forChildren(resolver ChildResolver, onChild func(hash common.Hash)) { + for child := range n.external { + onChild(child) + } + resolver.ForEach(n.node, onChild) +} + +// New initializes the hash-based node database. +func New(diskdb ethdb.Database, config *Config, resolver ChildResolver) *Database { + if config == nil { + config = Defaults + } + // Initialize the clean cache if the specified cache allowance + // is non-zero. Note, the size is in bytes. + var cleans *fastcache.Cache + + if config.CleanCacheSize > 0 { + cleans = fastcache.New(config.CleanCacheSize) + } + db := &Database{ + diskdb: diskdb, + resolver: resolver, + cleans: cleans, + dirties: make(map[common.Hash]*cachedNode), + } + return db +} + +// DiskDB retrieves the persistent storage backing the trie database. +func (db *Database) DiskDB() ethdb.KeyValueStore { + return db.diskdb +} + +// inserts a simplified trie node into the memory database. +// The blob size must be specified to allow proper size tracking. +// All nodes inserted by this function will be reference tracked +// and in theory should only used for **trie nodes** insertion. +func (db *Database) insert(hash common.Hash, node []byte) { + // If the node's already cached, skip + if _, ok := db.dirties[hash]; ok { + return + } + memcacheDirtyWriteMeter.Mark(int64(len(node))) + + // Create the cached entry for this node + entry := &cachedNode{ + node: node, + flushPrev: db.newest, + } + entry.forChildren(db.resolver, func(child common.Hash) { + if c := db.dirties[child]; c != nil { + c.parents++ + } + }) + db.dirties[hash] = entry + + // Update the flush-list endpoints + if db.oldest == (common.Hash{}) { + db.oldest, db.newest = hash, hash + } else { + db.dirties[db.newest].flushNext, db.newest = hash, hash + } + db.dirtiesSize += common.StorageSize(common.HashLength + len(node)) +} + +// Node retrieves an encoded cached trie node from memory. If it cannot be found +// cached, the method queries the persistent database for the content. +func (db *Database) Node(hash common.Hash) ([]byte, error) { + // It doesn't make sense to retrieve the metaroot + if hash == (common.Hash{}) { + return nil, errors.New("not found") + } + // Retrieve the node from the clean cache if available + if db.cleans != nil { + if enc := db.cleans.Get(nil, hash[:]); enc != nil { + memcacheCleanHitMeter.Mark(1) + memcacheCleanReadMeter.Mark(int64(len(enc))) + return enc, nil + } + } + // Retrieve the node from the dirty cache if available + db.lock.RLock() + dirty := db.dirties[hash] + db.lock.RUnlock() + + if dirty != nil { + memcacheDirtyHitMeter.Mark(1) + memcacheDirtyReadMeter.Mark(int64(len(dirty.node))) + return dirty.node, nil + } + memcacheDirtyMissMeter.Mark(1) + + // Content unavailable in memory, attempt to retrieve from disk + enc := rawdb.ReadLegacyTrieNode(db.diskdb, hash) + if len(enc) != 0 { + if db.cleans != nil { + db.cleans.Set(hash[:], enc) + memcacheCleanMissMeter.Mark(1) + memcacheCleanWriteMeter.Mark(int64(len(enc))) + } + return enc, nil + } + return nil, errors.New("not found") +} + +// Reference adds a new reference from a parent node to a child node. +// This function is used to add reference between internal trie node +// and external node(e.g. storage trie root), all internal trie nodes +// are referenced together by database itself. +func (db *Database) Reference(child common.Hash, parent common.Hash) { + db.lock.Lock() + defer db.lock.Unlock() + + db.reference(child, parent) +} + +// reference is the private locked version of Reference. +func (db *Database) reference(child common.Hash, parent common.Hash) { + // If the node does not exist, it's a node pulled from disk, skip + node, ok := db.dirties[child] + if !ok { + return + } + // The reference is for state root, increase the reference counter. + if parent == (common.Hash{}) { + node.parents += 1 + return + } + // The reference is for external storage trie, don't duplicate if + // the reference is already existent. + if db.dirties[parent].external == nil { + db.dirties[parent].external = make(map[common.Hash]struct{}) + } + if _, ok := db.dirties[parent].external[child]; ok { + return + } + node.parents++ + db.dirties[parent].external[child] = struct{}{} + db.childrenSize += common.HashLength +} + +// Dereference removes an existing reference from a root node. +func (db *Database) Dereference(root common.Hash) { + // Sanity check to ensure that the meta-root is not removed + if root == (common.Hash{}) { + log.Error("Attempted to dereference the trie cache meta root") + return + } + db.lock.Lock() + defer db.lock.Unlock() + + nodes, storage, start := len(db.dirties), db.dirtiesSize, time.Now() + db.dereference(root) + + db.gcnodes += uint64(nodes - len(db.dirties)) + db.gcsize += storage - db.dirtiesSize + db.gctime += time.Since(start) + + memcacheGCTimeTimer.Update(time.Since(start)) + memcacheGCBytesMeter.Mark(int64(storage - db.dirtiesSize)) + memcacheGCNodesMeter.Mark(int64(nodes - len(db.dirties))) + + log.Debug("Dereferenced trie from memory database", "nodes", nodes-len(db.dirties), "size", storage-db.dirtiesSize, "time", time.Since(start), + "gcnodes", db.gcnodes, "gcsize", db.gcsize, "gctime", db.gctime, "livenodes", len(db.dirties), "livesize", db.dirtiesSize) +} + +// dereference is the private locked version of Dereference. +func (db *Database) dereference(hash common.Hash) { + // If the hash does not exist, it's a previously committed node. + node, ok := db.dirties[hash] + if !ok { + return + } + // If there are no more references to the node, delete it and cascade + if node.parents > 0 { + // This is a special cornercase where a node loaded from disk (i.e. not in the + // memcache any more) gets reinjected as a new node (short node split into full, + // then reverted into short), causing a cached node to have no parents. That is + // no problem in itself, but don't make maxint parents out of it. + node.parents-- + } + if node.parents == 0 { + // Remove the node from the flush-list + switch hash { + case db.oldest: + db.oldest = node.flushNext + if node.flushNext != (common.Hash{}) { + db.dirties[node.flushNext].flushPrev = common.Hash{} + } + case db.newest: + db.newest = node.flushPrev + if node.flushPrev != (common.Hash{}) { + db.dirties[node.flushPrev].flushNext = common.Hash{} + } + default: + db.dirties[node.flushPrev].flushNext = node.flushNext + db.dirties[node.flushNext].flushPrev = node.flushPrev + } + // Dereference all children and delete the node + node.forChildren(db.resolver, func(child common.Hash) { + db.dereference(child) + }) + delete(db.dirties, hash) + db.dirtiesSize -= common.StorageSize(common.HashLength + len(node.node)) + if node.external != nil { + db.childrenSize -= common.StorageSize(len(node.external) * common.HashLength) + } + } +} + +// Cap iteratively flushes old but still referenced trie nodes until the total +// memory usage goes below the given threshold. +// +// Note, this method is a non-synchronized mutator. It is unsafe to call this +// concurrently with other mutators. +func (db *Database) Cap(limit common.StorageSize) error { + // Create a database batch to flush persistent data out. It is important that + // outside code doesn't see an inconsistent state (referenced data removed from + // memory cache during commit but not yet in persistent storage). This is ensured + // by only uncaching existing data when the database write finalizes. + nodes, storage, start := len(db.dirties), db.dirtiesSize, time.Now() + batch := db.diskdb.NewBatch() + + // db.dirtiesSize only contains the useful data in the cache, but when reporting + // the total memory consumption, the maintenance metadata is also needed to be + // counted. + size := db.dirtiesSize + common.StorageSize(len(db.dirties)*cachedNodeSize) + size += db.childrenSize + + // Keep committing nodes from the flush-list until we're below allowance + oldest := db.oldest + for size > limit && oldest != (common.Hash{}) { + // Fetch the oldest referenced node and push into the batch + node := db.dirties[oldest] + rawdb.WriteLegacyTrieNode(batch, oldest, node.node) + + // If we exceeded the ideal batch size, commit and reset + if batch.ValueSize() >= ethdb.IdealBatchSize { + if err := batch.Write(); err != nil { + log.Error("Failed to write flush list to disk", "err", err) + return err + } + batch.Reset() + } + // Iterate to the next flush item, or abort if the size cap was achieved. Size + // is the total size, including the useful cached data (hash -> blob), the + // cache item metadata, as well as external children mappings. + size -= common.StorageSize(common.HashLength + len(node.node) + cachedNodeSize) + if node.external != nil { + size -= common.StorageSize(len(node.external) * common.HashLength) + } + oldest = node.flushNext + } + // Flush out any remainder data from the last batch + if err := batch.Write(); err != nil { + log.Error("Failed to write flush list to disk", "err", err) + return err + } + // Write successful, clear out the flushed data + db.lock.Lock() + defer db.lock.Unlock() + + for db.oldest != oldest { + node := db.dirties[db.oldest] + delete(db.dirties, db.oldest) + db.oldest = node.flushNext + + db.dirtiesSize -= common.StorageSize(common.HashLength + len(node.node)) + if node.external != nil { + db.childrenSize -= common.StorageSize(len(node.external) * common.HashLength) + } + } + if db.oldest != (common.Hash{}) { + db.dirties[db.oldest].flushPrev = common.Hash{} + } + db.flushnodes += uint64(nodes - len(db.dirties)) + db.flushsize += storage - db.dirtiesSize + db.flushtime += time.Since(start) + + memcacheFlushTimeTimer.Update(time.Since(start)) + memcacheFlushBytesMeter.Mark(int64(storage - db.dirtiesSize)) + memcacheFlushNodesMeter.Mark(int64(nodes - len(db.dirties))) + + log.Debug("Persisted nodes from memory database", "nodes", nodes-len(db.dirties), "size", storage-db.dirtiesSize, "time", time.Since(start), + "flushnodes", db.flushnodes, "flushsize", db.flushsize, "flushtime", db.flushtime, "livenodes", len(db.dirties), "livesize", db.dirtiesSize) + + return nil +} + +// Commit iterates over all the children of a particular node, writes them out +// to disk, forcefully tearing down all references in both directions. As a side +// effect, all pre-images accumulated up to this point are also written. +// +// Note, this method is a non-synchronized mutator. It is unsafe to call this +// concurrently with other mutators. +func (db *Database) Commit(node common.Hash, report bool) error { + // Create a database batch to flush persistent data out. It is important that + // outside code doesn't see an inconsistent state (referenced data removed from + // memory cache during commit but not yet in persistent storage). This is ensured + // by only uncaching existing data when the database write finalizes. + start := time.Now() + batch := db.diskdb.NewBatch() + + // Move the trie itself into the batch, flushing if enough data is accumulated + nodes, storage := len(db.dirties), db.dirtiesSize + + uncacher := &cleaner{db} + if err := db.commit(node, batch, uncacher); err != nil { + log.Error("Failed to commit trie from trie database", "err", err) + return err + } + // Trie mostly committed to disk, flush any batch leftovers + if err := batch.Write(); err != nil { + log.Error("Failed to write trie to disk", "err", err) + return err + } + // Uncache any leftovers in the last batch + db.lock.Lock() + defer db.lock.Unlock() + + batch.Replay(uncacher) + batch.Reset() + + // Reset the storage counters and bumped metrics + + memcacheCommitTimeTimer.Update(time.Since(start)) + memcacheCommitBytesMeter.Mark(int64(storage - db.dirtiesSize)) + memcacheCommitNodesMeter.Mark(int64(nodes - len(db.dirties))) + + logger := log.Info + if !report { + logger = log.Debug + } + logger("Persisted trie from memory database", "nodes", nodes-len(db.dirties)+int(db.flushnodes), "size", storage-db.dirtiesSize+db.flushsize, "time", time.Since(start)+db.flushtime, + "gcnodes", db.gcnodes, "gcsize", db.gcsize, "gctime", db.gctime, "livenodes", len(db.dirties), "livesize", db.dirtiesSize) + + // Reset the garbage collection statistics + db.gcnodes, db.gcsize, db.gctime = 0, 0, 0 + db.flushnodes, db.flushsize, db.flushtime = 0, 0, 0 + + return nil +} + +// commit is the private locked version of Commit. +func (db *Database) commit(hash common.Hash, batch ethdb.Batch, uncacher *cleaner) error { + // If the node does not exist, it's a previously committed node + node, ok := db.dirties[hash] + if !ok { + return nil + } + var err error + + // Dereference all children and delete the node + node.forChildren(db.resolver, func(child common.Hash) { + if err == nil { + err = db.commit(child, batch, uncacher) + } + }) + if err != nil { + return err + } + // If we've reached an optimal batch size, commit and start over + rawdb.WriteLegacyTrieNode(batch, hash, node.node) + if batch.ValueSize() >= ethdb.IdealBatchSize { + if err := batch.Write(); err != nil { + return err + } + db.lock.Lock() + batch.Replay(uncacher) + batch.Reset() + db.lock.Unlock() + } + return nil +} + +// cleaner is a database batch replayer that takes a batch of write operations +// and cleans up the trie database from anything written to disk. +type cleaner struct { + db *Database +} + +// Put reacts to database writes and implements dirty data uncaching. This is the +// post-processing step of a commit operation where the already persisted trie is +// removed from the dirty cache and moved into the clean cache. The reason behind +// the two-phase commit is to ensure data availability while moving from memory +// to disk. +func (c *cleaner) Put(key []byte, rlp []byte) error { + hash := common.BytesToHash(key) + + // If the node does not exist, we're done on this path + node, ok := c.db.dirties[hash] + if !ok { + return nil + } + // Node still exists, remove it from the flush-list + switch hash { + case c.db.oldest: + c.db.oldest = node.flushNext + if node.flushNext != (common.Hash{}) { + c.db.dirties[node.flushNext].flushPrev = common.Hash{} + } + case c.db.newest: + c.db.newest = node.flushPrev + if node.flushPrev != (common.Hash{}) { + c.db.dirties[node.flushPrev].flushNext = common.Hash{} + } + default: + c.db.dirties[node.flushPrev].flushNext = node.flushNext + c.db.dirties[node.flushNext].flushPrev = node.flushPrev + } + // Remove the node from the dirty cache + delete(c.db.dirties, hash) + c.db.dirtiesSize -= common.StorageSize(common.HashLength + len(node.node)) + if node.external != nil { + c.db.childrenSize -= common.StorageSize(len(node.external) * common.HashLength) + } + // Move the flushed node into the clean cache to prevent insta-reloads + if c.db.cleans != nil { + c.db.cleans.Set(hash[:], rlp) + memcacheCleanWriteMeter.Mark(int64(len(rlp))) + } + return nil +} + +func (c *cleaner) Delete(key []byte) error { + panic("not implemented") +} + +// Initialized returns an indicator if state data is already initialized +// in hash-based scheme by checking the presence of genesis state. +func (db *Database) Initialized(genesisRoot common.Hash) bool { + return rawdb.HasLegacyTrieNode(db.diskdb, genesisRoot) +} + +// Update inserts the dirty nodes in provided nodeset into database and link the +// account trie with multiple storage tries if necessary. +// +// root and parent are used for path-based only +func (db *Database) Update(root common.Hash, parent common.Hash, block uint64, nodes *trienode.MergedNodeSet, states *triestate.Set) error { + // Ensure the parent state is present and signal a warning if not. + if parent != types.EmptyRootHash { + if blob, _ := db.Node(parent); len(blob) == 0 { + log.Error("parent state is not present") + } + } + db.lock.Lock() + defer db.lock.Unlock() + // Insert dirty nodes into the database. In the same tree, it must be + // ensured that children are inserted first, then parent so that children + // can be linked with their parent correctly. + // + // Note, the storage tries must be flushed before the account trie to + // retain the invariant that children go into the dirty cache first. + var order []common.Hash + for owner := range nodes.Sets { + if owner == (common.Hash{}) { + continue + } + order = append(order, owner) + } + if _, ok := nodes.Sets[common.Hash{}]; ok { + order = append(order, common.Hash{}) + } + for _, owner := range order { + subset := nodes.Sets[owner] + subset.ForEachWithOrder(func(path string, n *trienode.Node) { + if n.IsDeleted() { + return // ignore deletion + } + db.insert(n.Hash, n.Blob) + }) + } + // Link up the account trie and storage trie if the node points + // to an account trie leaf. + if set, present := nodes.Sets[common.Hash{}]; present { + for _, leaf := range set.Leaves { + // Looping node leaf, then reference the leaf node to the root node + var account types.StateAccount + if err := rlp.DecodeBytes(leaf.Blob, &account); err != nil { + return err + } + if account.Root != types.EmptyRootHash { + db.reference(account.Root, leaf.Parent) + } + } + } + return nil +} + +// Close closes the trie database and releases all held resources. +func (db *Database) Close() error { + + if db.cleans != nil { + db.cleans.Reset() + db.cleans = nil + } + return nil +} + +// Size returns the current storage size of the memory cache in front of the +// persistent database layer. +func (db *Database) Size() common.StorageSize { + db.lock.RLock() + defer db.lock.RUnlock() + + // db.dirtiesSize only contains the useful data in the cache, but when reporting + // the total memory consumption, the maintenance metadata is also needed to be + // counted. + var metadataSize = common.StorageSize(len(db.dirties) * cachedNodeSize) + return db.dirtiesSize + db.childrenSize + metadataSize +} + +// Scheme returns the node scheme used in the database. +func (db *Database) Scheme() string { + return rawdb.HashScheme +} + +// Reader retrieves a node reader belonging to the given state root. +func (db *Database) Reader(root common.Hash) (*reader, error) { + if _, err := db.Node(root); err != nil { + return nil, fmt.Errorf("state %#x is not available, %v", root, err) + } + + return &reader{db: db}, nil +} + +// reader is a state reader of Database which implements the Reader interface. +type reader struct { + db *Database +} + +// Node retrieves the trie node with the given node hash. +// No error will be returned if the node is not found. +func (reader *reader) Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, error) { + blob, _ := reader.db.Node(hash) + return blob, nil +} diff --git a/trie/triedb/pathdb/database.go b/trie/triedb/pathdb/database.go new file mode 100644 index 0000000000..7843f78de0 --- /dev/null +++ b/trie/triedb/pathdb/database.go @@ -0,0 +1,504 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "errors" + "fmt" + "io" + "sync" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/params" + "github.com/ethereum/go-ethereum/trie/trienode" + "github.com/ethereum/go-ethereum/trie/triestate" +) + +const ( + // maxDiffLayers is the maximum diff layers allowed in the layer tree. + + maxDiffLayers = 128 + + // defaultCleanSize is the default memory allowance of clean cache. + defaultCleanSize = 16 * 1024 * 1024 + + // maxBufferSize is the maximum memory allowance of node buffer. + // Too large nodebuffer will cause the system to pause for a long + // time when write happens. Also, the largest batch that pebble can + // support is 4GB, node will panic if batch size exceeds this limit. + maxBufferSize = 256 * 1024 * 1024 + + // DefaultBufferSize is the default memory allowance of node buffer + // that aggregates the writes from above until it's flushed into the + // disk. It's meant to be used once the initial sync is finished. + // Do not increase the buffer size arbitrarily, otherwise the system + // pause time will increase when the database writes happen. + DefaultBufferSize = 64 * 1024 * 1024 +) + +// layer is the interface implemented by all state layers which includes some +// public methods and some additional methods for internal usage. +type layer interface { + // Node retrieves the trie node with the node info. An error will be returned + // if the read operation exits abnormally. For example, if the layer is already + // stale, or the associated state is regarded as corrupted. Notably, no error + // will be returned if the requested node is not found in database. + Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, error) + + // rootHash returns the root hash for which this layer was made. + rootHash() common.Hash + + // stateID returns the associated state id of layer. + stateID() uint64 + + // parentLayer returns the subsequent layer of it, or nil if the disk was reached. + parentLayer() layer + + // update creates a new layer on top of the existing layer diff tree with + // the provided dirty trie nodes along with the state change set. + // + // Note, the maps are retained by the method to avoid copying everything. + update(root common.Hash, id uint64, block uint64, nodes map[common.Hash]map[string]*trienode.Node, states *triestate.Set) *diffLayer + + // journal commits an entire diff hierarchy to disk into a single journal entry. + // This is meant to be used during shutdown to persist the layer without + // flattening everything down (bad for reorgs). + journal(w io.Writer) error +} + +// Config contains the settings for database. +type Config struct { + StateHistory uint64 // Number of recent blocks to maintain state history for + CleanCacheSize int // Maximum memory allowance (in bytes) for caching clean nodes + DirtyCacheSize int // Maximum memory allowance (in bytes) for caching dirty nodes + ReadOnly bool // Flag whether the database is opened in read only mode. +} + +// Defaults contains default settings for Ethereum mainnet. +var Defaults = &Config{ + StateHistory: params.FullImmutabilityThreshold, + CleanCacheSize: defaultCleanSize, + DirtyCacheSize: DefaultBufferSize, +} + +// ReadOnly is the config in order to open database in read only mode. +var ReadOnly = &Config{ReadOnly: true} + +// sanitize checks the provided user configurations and changes anything that's +// unreasonable or unworkable. +func (c *Config) sanitize() { + if c.DirtyCacheSize > maxBufferSize { + log.Warn("Sanitizing invalid node buffer size", "provided", common.StorageSize(c.DirtyCacheSize), "updated", common.StorageSize(maxBufferSize)) + c.DirtyCacheSize = maxBufferSize + } +} + +// Database is a multiple-layered structure for maintaining in-memory trie nodes. +// It consists of one persistent base layer backed by a key-value store, on top +// of which arbitrarily many in-memory diff layers are stacked. The memory diffs +// can form a tree with branching, but the disk layer is singleton and common to +// all. If a reorg goes deeper than the disk layer, a batch of reverse diffs can +// be applied to rollback. The deepest reorg that can be handled depends on the +// amount of state histories tracked in the disk. +// +// At most one readable and writable database can be opened at the same time in +// the whole system which ensures that only one database writer can operate disk +// state. Unexpected open operations can cause the system to panic. +type Database struct { + // readOnly is the flag whether the mutation is allowed to be applied. + // It will be set automatically when the database is journaled during + // the shutdown to reject all following unexpected mutations. + readOnly bool // Flag if database is opened in read only mode + waitSync bool // Flag if database is deactivated due to initial state sync + bufferSize int // Memory allowance (in bytes) for caching dirty nodes + config *Config // Configuration for database + diskdb ethdb.Database // Persistent storage for matured trie nodes + tree *layerTree // The group for all known layers + freezer *rawdb.ResettableFreezer // Freezer for storing trie histories, nil possible in tests + lock sync.RWMutex // Lock to prevent mutations from happening at the same time +} + +// New attempts to load an already existing layer from a persistent key-value +// store (with a number of memory layers from a journal). If the journal is not +// matched with the base persistent layer, all the recorded diff layers are discarded. +func New(diskdb ethdb.Database, config *Config) *Database { + if config == nil { + config = Defaults + } + config.sanitize() + db := &Database{ + readOnly: config.ReadOnly, + bufferSize: config.DirtyCacheSize, + config: config, + diskdb: diskdb, + } + // Construct the layer tree by resolving the in-disk singleton state + // and in-memory layer journal. + db.tree = newLayerTree(db.loadLayers()) + + // Open the freezer for state history if the passed database contains an + // ancient store. Otherwise, all the relevant functionalities are disabled. + // + // Because the freezer can only be opened once at the same time, this + // mechanism also ensures that at most one **non-readOnly** database + // is opened at the same time to prevent accidental mutation. + if ancient, err := diskdb.AncientDatadir(); err == nil && ancient != "" && !db.readOnly { + db.freezer, err = rawdb.NewStateFreezer(ancient, false) + if err != nil { + log.Crit("Failed to open state history freezer", "err", err) + } + + diskLayerID := db.tree.bottom().stateID() + if diskLayerID == 0 { + // Reset the entire state histories in case the trie database is + // not initialized yet, as these state histories are not expected. + frozen, err := db.freezer.Ancients() + if err != nil { + log.Crit("Failed to retrieve head of state history", "err", err) + } + if frozen != 0 { + err := db.freezer.Reset() + if err != nil { + log.Crit("Failed to reset state histories", "err", err) + } + log.Info("Truncated extraneous state history") + } + } else { + // Truncate the extra state histories above in freezer in case + // it's not aligned with the disk layer. + pruned, err := truncateFromHead(db.diskdb, db.freezer, diskLayerID) + if err != nil { + log.Crit("Failed to truncate extra state histories", "err", err) + } + if pruned != 0 { + log.Warn("Truncated extra state histories", "number", pruned) + } + } + } + // Disable database in case node is still in the initial state sync stage. + if rawdb.ReadSnapSyncStatusFlag(diskdb) == rawdb.StateSyncRunning && !db.readOnly { + if err := db.Disable(); err != nil { + log.Crit("Failed to disable database", "err", err) // impossible to happen + } + } + log.Warn("Path-based state scheme is an experimental feature") + return db +} + +// Reader retrieves a layer belonging to the given state root. +func (db *Database) Reader(root common.Hash) (layer, error) { + l := db.tree.get(root) + if l == nil { + return nil, fmt.Errorf("state %#x is not available", root) + } + return l, nil +} + +// Update adds a new layer into the tree, if that can be linked to an existing +// old parent. It is disallowed to insert a disk layer (the origin of all). Apart +// from that this function will flatten the extra diff layers at bottom into disk +// to only keep 128 diff layerReaders in memory by default. +// +// The passed in maps(nodes, states) will be retained to avoid copying everything. +// Therefore, these maps must not be changed afterwards. +func (db *Database) Update(root common.Hash, parentRoot common.Hash, block uint64, nodes *trienode.MergedNodeSet, states *triestate.Set) error { + // Hold the lock to prevent concurrent mutations. + db.lock.Lock() + defer db.lock.Unlock() + + // Short circuit if the mutation is not allowed. + if err := db.modifyAllowed(); err != nil { + return err + } + if err := db.tree.add(root, parentRoot, block, nodes, states); err != nil { + return err + } + + // Keep 128 diff layers in the memory, persistent layer is 129th. + // - head layer is paired with HEAD state + // - head-1 layer is paired with HEAD-1 state + // - ... + // - head-127 layer(bottom-most diff layer) is paired with HEAD-127 state + // - head-128 layer(disk layer) is paired with HEAD-128 state + + // If the number of diff layers exceeds 128, all the excess diff layers will flattened down + return db.tree.cap(root, maxDiffLayers) +} + +// Commit traverses downwards the layer tree from a specified layer with the +// provided state root and all the layers below are flattened downwards. +// It can be used alone and mostly for test purposes. +func (db *Database) Commit(root common.Hash, report bool) error { + // Hold the lock to prevent concurrent mutations. + db.lock.Lock() + defer db.lock.Unlock() + + // Short circuit if the mutation is not allowed. + if err := db.modifyAllowed(); err != nil { + return err + } + return db.tree.cap(root, 0) +} + +// Disable deactivates the database and invalidates all available state layers +// as stale to prevent access to the persistent state, which is in the syncing +// stage. +func (db *Database) Disable() error { + db.lock.Lock() + defer db.lock.Unlock() + + // Short circuit if the database is in read only mode. + if db.readOnly { + return errDatabaseReadOnly + } + // Prevent duplicated disable operation. + if db.waitSync { + log.Error("Reject duplicated disable operation") + return nil + } + db.waitSync = true + + // Mark the disk layer as stale to prevent access to persistent state. + db.tree.bottom().markStale() + + // Write the initial sync flag to persist it across restarts. + rawdb.WriteSnapSyncStatusFlag(db.diskdb, rawdb.StateSyncRunning) + log.Info("Disabled trie database due to state sync") + return nil +} + +// Enable activates database and resets the state tree with the provided persistent +// state root once the state sync is finished. +func (db *Database) Enable(root common.Hash) error { + db.lock.Lock() + defer db.lock.Unlock() + + // Short circuit if the database is in read only mode. + if db.readOnly { + return errDatabaseReadOnly + } + // Ensure the provided state root matches the stored one. + root = types.TrieRootHash(root) + _, stored := rawdb.ReadAccountTrieNode(db.diskdb, nil) + if stored != root { + return fmt.Errorf("state root mismatch: stored %x, synced %x", stored, root) + } + // Drop the stale state journal in persistent database and + // reset the persistent state id back to zero. + batch := db.diskdb.NewBatch() + rawdb.DeleteTrieJournal(batch) + rawdb.WritePersistentStateID(batch, 0) + if err := batch.Write(); err != nil { + return err + } + + // Clean up all state histories in freezer. Theoretically + // all root->id mappings should be removed as well. Since + // mappings can be huge and might take a while to clear + // them, just leave them in disk and wait for overwriting. + if db.freezer != nil { + if err := db.freezer.Reset(); err != nil { + return err + } + } + + // Re-construct a new disk layer backed by persistent state + // with **empty clean cache and node buffer**. + db.tree.reset(newDiskLayer(root, 0, db, nil, newNodeBuffer(db.bufferSize, nil, 0))) + + // Re-enable the database as the final step. + db.waitSync = false + rawdb.WriteSnapSyncStatusFlag(db.diskdb, rawdb.StateSyncFinished) + log.Info("Rebuilt trie database", "root", root) + return nil +} + +// Recover rollbacks the database to a specified historical point. +// The state is supported as the rollback destination only if it's +// canonical state and the corresponding trie histories are existent. +func (db *Database) Recover(root common.Hash, loader triestate.TrieLoader) error { + db.lock.Lock() + defer db.lock.Unlock() + + // Short circuit if rollback operation is not supported. + if err := db.modifyAllowed(); err != nil { + return err + } + if db.freezer == nil { + return errors.New("state rollback is non-supported") + } + + // Short circuit if the target state is not recoverable. + root = types.TrieRootHash(root) + if !db.Recoverable(root) { + return errStateUnrecoverable + } + + // Apply the state histories upon the disk layer in order. + var ( + start = time.Now() + dl = db.tree.bottom() + ) + for dl.rootHash() != root { + h, err := readHistory(db.freezer, dl.stateID()) + if err != nil { + return err + } + dl, err = dl.revert(h, loader) + if err != nil { + return err + } + // reset layer with newly created disk layer. It must be + // done after each revert operation, otherwise the new + // disk layer won't be accessible from outside. + db.tree.reset(dl) + } + rawdb.DeleteTrieJournal(db.diskdb) + _, err := truncateFromHead(db.diskdb, db.freezer, dl.stateID()) + if err != nil { + return err + } + log.Debug("Recovered state", "root", root, "elapsed", common.PrettyDuration(time.Since(start))) + return nil +} + +// Recoverable returns the indicator if the specified state is recoverable. +func (db *Database) Recoverable(root common.Hash) bool { + // Ensure the requested state is a known state. + root = types.TrieRootHash(root) + id := rawdb.ReadStateID(db.diskdb, root) + if id == nil { + return false + } + + // Recoverable state must below the disk layer. The recoverable + // state only refers the state that is currently not available, + // but can be restored by applying state history. + dl := db.tree.bottom() + if *id >= dl.stateID() { + return false + } + // This is a temporary workaround for the unavailability of the freezer in + // dev mode. As a consequence, the Pathdb loses the ability for deep reorg + // in certain cases. + // TODO(rjl493456442): Implement the in-memory ancient store. + if db.freezer == nil { + return false + } + // Ensure the requested state is a canonical state and all state + // histories in range [id+1, disklayer.ID] are present and complete. + return checkHistories(db.freezer, *id+1, dl.stateID()-*id, func(m *meta) error { + if m.parent != root { + return errors.New("unexpected state history") + } + if len(m.incomplete) > 0 { + return errors.New("incomplete state history") + } + root = m.root + return nil + }) == nil +} + +// Initialized returns an indicator if the state data is already +// initialized in path-based scheme. +func (db *Database) Initialized(genesisRoot common.Hash) bool { + var inited bool + db.tree.forEach(func(layer layer) { + if layer.rootHash() != types.EmptyRootHash { + inited = true + } + }) + if !inited { + inited = rawdb.ReadSnapSyncStatusFlag(db.diskdb) != rawdb.StateSyncUnknown + } + return inited +} + +// Size returns the current storage size of the memory cache in front of the +// persistent database layer. +func (db *Database) Size() (size common.StorageSize) { + db.tree.forEach(func(layer layer) { + if diff, ok := layer.(*diffLayer); ok { + size += common.StorageSize(diff.memory) + } + if disk, ok := layer.(*diskLayer); ok { + size += disk.size() + } + }) + return size +} + +// SetBufferSize sets the node buffer size to the provided value(in bytes). +func (db *Database) SetBufferSize(size int) error { + db.lock.Lock() + defer db.lock.Unlock() + + if size > maxBufferSize { + log.Info("Capped node buffer size", "provided", common.StorageSize(size), "adjusted", common.StorageSize(maxBufferSize)) + size = maxBufferSize + } + db.bufferSize = size + + return db.tree.bottom().setBufferSize(db.bufferSize) +} + +// DiskDB retrieves the persistent storage backing the trie database. +func (db *Database) DiskDB() ethdb.KeyValueStore { + return db.diskdb +} + +// Scheme returns the node scheme used in the database. +func (db *Database) Scheme() string { + return rawdb.PathScheme +} + +// Close closes the trie database and the held freezer. +func (db *Database) Close() error { + db.lock.Lock() + defer db.lock.Unlock() + + // Set the database to read-only mode to prevent all + // following mutations. + + db.readOnly = true + + // Release the memory held by clean cache. + db.tree.bottom().resetCache() + + // Close the attached state history freezer. + if db.freezer == nil { + return nil + } + return db.freezer.Close() +} + +// modifyAllowed returns the indicator if mutation is allowed. This function +// assumes the db.lock is already held. +func (db *Database) modifyAllowed() error { + if db.readOnly { + return errDatabaseReadOnly + } + if db.waitSync { + return errDatabaseWaitSync + } + return nil +} diff --git a/trie/triedb/pathdb/database_test.go b/trie/triedb/pathdb/database_test.go new file mode 100644 index 0000000000..d4ef368c71 --- /dev/null +++ b/trie/triedb/pathdb/database_test.go @@ -0,0 +1,609 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "bytes" + "errors" + "fmt" + "math/big" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/internal/testrand" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/testutil" + "github.com/ethereum/go-ethereum/trie/trienode" + "github.com/ethereum/go-ethereum/trie/triestate" + "golang.org/x/exp/rand" +) + +func generateAccount(storageRoot common.Hash) types.StateAccount { + return types.StateAccount{ + Nonce: uint64(rand.Intn(100)), + Balance: new(big.Int).SetUint64(rand.Uint64()), + CodeHash: testrand.Bytes(32), + Root: storageRoot, + } +} + +func updateTrie(addrHash common.Hash, root common.Hash, dirties, cleans map[common.Hash][]byte) (common.Hash, *trienode.NodeSet) { + h, err := newTestHasher(addrHash, root, cleans) + if err != nil { + panic(fmt.Errorf("failed to create hasher, err: %w", err)) + } + for key, val := range dirties { + if len(val) == 0 { + h.TryDelete(key.Bytes()) + } else { + h.TryUpdate(key.Bytes(), val) + } + } + root, nodes, _ := h.Commit(false) + return root, nodes +} + +const ( + createAccountOp int = iota + modifyAccountOp + deleteAccountOp + opLen +) + +type genctx struct { + accounts map[common.Hash][]byte + storages map[common.Hash]map[common.Hash][]byte + accountOrigin map[common.Address][]byte + storageOrigin map[common.Address]map[common.Hash][]byte + nodes *trienode.MergedNodeSet +} + +func newCtx() *genctx { + return &genctx{ + accounts: make(map[common.Hash][]byte), + storages: make(map[common.Hash]map[common.Hash][]byte), + accountOrigin: make(map[common.Address][]byte), + storageOrigin: make(map[common.Address]map[common.Hash][]byte), + nodes: trienode.NewMergedNodeSet(), + } +} + +type tester struct { + db *Database + roots []common.Hash + preimages map[common.Hash]common.Address + accounts map[common.Hash][]byte + storages map[common.Hash]map[common.Hash][]byte + + // state snapshots + snapAccounts map[common.Hash]map[common.Hash][]byte + snapStorages map[common.Hash]map[common.Hash]map[common.Hash][]byte +} + +func newTester(t *testing.T, historyLimit uint64) *tester { + var ( + disk, _ = rawdb.NewDatabaseWithFreezer(rawdb.NewMemoryDatabase(), t.TempDir(), "", false) + db = New(disk, &Config{ + StateHistory: historyLimit, + CleanCacheSize: 256 * 1024, + DirtyCacheSize: 256 * 1024, + }) + obj = &tester{ + db: db, + preimages: make(map[common.Hash]common.Address), + accounts: make(map[common.Hash][]byte), + storages: make(map[common.Hash]map[common.Hash][]byte), + snapAccounts: make(map[common.Hash]map[common.Hash][]byte), + snapStorages: make(map[common.Hash]map[common.Hash]map[common.Hash][]byte), + } + ) + for i := 0; i < 2*128; i++ { + var parent = types.EmptyRootHash + if len(obj.roots) != 0 { + parent = obj.roots[len(obj.roots)-1] + } + root, nodes, states := obj.generate(parent) + if err := db.Update(root, parent, uint64(i), nodes, states); err != nil { + panic(fmt.Errorf("failed to update state changes, err: %w", err)) + } + obj.roots = append(obj.roots, root) + } + return obj +} + +func (t *tester) release() { + t.db.Close() + t.db.diskdb.Close() +} + +func (t *tester) randAccount() (common.Address, []byte) { + for addrHash, account := range t.accounts { + return t.preimages[addrHash], account + } + return common.Address{}, nil +} + +func (t *tester) generateStorage(ctx *genctx, addr common.Address) common.Hash { + var ( + addrHash = crypto.Keccak256Hash(addr.Bytes()) + storage = make(map[common.Hash][]byte) + origin = make(map[common.Hash][]byte) + ) + for i := 0; i < 10; i++ { + v, _ := rlp.EncodeToBytes(common.TrimLeftZeroes(testutil.RandBytes(32))) + hash := testutil.RandomHash() + + storage[hash] = v + origin[hash] = nil + } + root, set := updateTrie(addrHash, types.EmptyRootHash, storage, nil) + + ctx.storages[addrHash] = storage + ctx.storageOrigin[addr] = origin + ctx.nodes.Merge(set) + return root +} + +func (t *tester) mutateStorage(ctx *genctx, addr common.Address, root common.Hash) common.Hash { + var ( + addrHash = crypto.Keccak256Hash(addr.Bytes()) + storage = make(map[common.Hash][]byte) + origin = make(map[common.Hash][]byte) + ) + for hash, val := range t.storages[addrHash] { + origin[hash] = val + storage[hash] = nil + + if len(origin) == 3 { + break + } + } + for i := 0; i < 3; i++ { + v, _ := rlp.EncodeToBytes(common.TrimLeftZeroes(testutil.RandBytes(32))) + hash := testutil.RandomHash() + + storage[hash] = v + origin[hash] = nil + } + root, set := updateTrie(crypto.Keccak256Hash(addr.Bytes()), root, storage, t.storages[addrHash]) + + ctx.storages[addrHash] = storage + ctx.storageOrigin[addr] = origin + ctx.nodes.Merge(set) + return root +} + +func (t *tester) clearStorage(ctx *genctx, addr common.Address, root common.Hash) common.Hash { + var ( + addrHash = crypto.Keccak256Hash(addr.Bytes()) + storage = make(map[common.Hash][]byte) + origin = make(map[common.Hash][]byte) + ) + for hash, val := range t.storages[addrHash] { + origin[hash] = val + storage[hash] = nil + } + root, set := updateTrie(addrHash, root, storage, t.storages[addrHash]) + if root != types.EmptyRootHash { + panic("failed to clear storage trie") + } + ctx.storages[addrHash] = storage + ctx.storageOrigin[addr] = origin + ctx.nodes.Merge(set) + return root +} + +func (t *tester) generate(parent common.Hash) (common.Hash, *trienode.MergedNodeSet, *triestate.Set) { + var ( + ctx = newCtx() + dirties = make(map[common.Hash]struct{}) + ) + for i := 0; i < 20; i++ { + switch rand.Intn(opLen) { + case createAccountOp: + // account creation + addr := testutil.RandomAddress() + addrHash := crypto.Keccak256Hash(addr.Bytes()) + if _, ok := t.accounts[addrHash]; ok { + continue + } + if _, ok := dirties[addrHash]; ok { + continue + } + dirties[addrHash] = struct{}{} + + root := t.generateStorage(ctx, addr) + ctx.accounts[addrHash] = types.SlimAccountRLP(generateAccount(root)) + ctx.accountOrigin[addr] = nil + t.preimages[addrHash] = addr + + case modifyAccountOp: + // account mutation + addr, account := t.randAccount() + if addr == (common.Address{}) { + continue + } + addrHash := crypto.Keccak256Hash(addr.Bytes()) + if _, ok := dirties[addrHash]; ok { + continue + } + dirties[addrHash] = struct{}{} + + acct, _ := types.FullAccount(account) + stRoot := t.mutateStorage(ctx, addr, acct.Root) + newAccount := types.SlimAccountRLP(generateAccount(stRoot)) + + ctx.accounts[addrHash] = newAccount + ctx.accountOrigin[addr] = account + + case deleteAccountOp: + // account deletion + addr, account := t.randAccount() + if addr == (common.Address{}) { + continue + } + addrHash := crypto.Keccak256Hash(addr.Bytes()) + if _, ok := dirties[addrHash]; ok { + continue + } + dirties[addrHash] = struct{}{} + + acct, _ := types.FullAccount(account) + if acct.Root != types.EmptyRootHash { + t.clearStorage(ctx, addr, acct.Root) + } + ctx.accounts[addrHash] = nil + ctx.accountOrigin[addr] = account + } + } + root, set := updateTrie(common.Hash{}, parent, ctx.accounts, t.accounts) + ctx.nodes.Merge(set) + + // Save state snapshot before commit + t.snapAccounts[parent] = copyAccounts(t.accounts) + t.snapStorages[parent] = copyStorages(t.storages) + + // Commit all changes to live state set + for addrHash, account := range ctx.accounts { + if len(account) == 0 { + delete(t.accounts, addrHash) + } else { + t.accounts[addrHash] = account + } + } + for addrHash, slots := range ctx.storages { + if _, ok := t.storages[addrHash]; !ok { + t.storages[addrHash] = make(map[common.Hash][]byte) + } + for sHash, slot := range slots { + if len(slot) == 0 { + delete(t.storages[addrHash], sHash) + } else { + t.storages[addrHash][sHash] = slot + } + } + } + return root, ctx.nodes, triestate.New(ctx.accountOrigin, ctx.storageOrigin, nil) +} + +// lastRoot returns the latest root hash, or empty if nothing is cached. +func (t *tester) lastHash() common.Hash { + if len(t.roots) == 0 { + return common.Hash{} + } + return t.roots[len(t.roots)-1] +} + +func (t *tester) verifyState(root common.Hash) error { + reader, err := t.db.Reader(root) + if err != nil { + return err + } + _, err = reader.Node(common.Hash{}, nil, root) + if err != nil { + return errors.New("root node is not available") + } + for addrHash, account := range t.snapAccounts[root] { + blob, err := reader.Node(common.Hash{}, addrHash.Bytes(), crypto.Keccak256Hash(account)) + if err != nil || !bytes.Equal(blob, account) { + return fmt.Errorf("account is mismatched: %w", err) + } + } + for addrHash, slots := range t.snapStorages[root] { + for hash, slot := range slots { + blob, err := reader.Node(addrHash, hash.Bytes(), crypto.Keccak256Hash(slot)) + if err != nil || !bytes.Equal(blob, slot) { + return fmt.Errorf("slot is mismatched: %w", err) + } + } + } + return nil +} + +func (t *tester) verifyHistory() error { + bottom := t.bottomIndex() + for i, root := range t.roots { + // The state history related to the state above disk layer should not exist. + if i > bottom { + _, err := readHistory(t.db.freezer, uint64(i+1)) + if err == nil { + return errors.New("unexpected state history") + } + continue + } + // The state history related to the state below or equal to the disk layer + // should exist. + obj, err := readHistory(t.db.freezer, uint64(i+1)) + if err != nil { + return err + } + parent := types.EmptyRootHash + if i != 0 { + parent = t.roots[i-1] + } + if obj.meta.parent != parent { + return fmt.Errorf("unexpected parent, want: %x, got: %x", parent, obj.meta.parent) + } + if obj.meta.root != root { + return fmt.Errorf("unexpected root, want: %x, got: %x", root, obj.meta.root) + } + } + return nil +} + +// bottomIndex returns the index of current disk layer. +func (t *tester) bottomIndex() int { + bottom := t.db.tree.bottom() + for i := 0; i < len(t.roots); i++ { + if t.roots[i] == bottom.rootHash() { + return i + } + } + return -1 +} + +func TestDatabaseRollback(t *testing.T) { + // Verify state histories + tester := newTester(t, 0) + defer tester.release() + + if err := tester.verifyHistory(); err != nil { + t.Fatalf("Invalid state history, err: %v", err) + } + // Revert database from top to bottom + for i := tester.bottomIndex(); i >= 0; i-- { + root := tester.roots[i] + parent := types.EmptyRootHash + if i > 0 { + parent = tester.roots[i-1] + } + loader := newHashLoader(tester.snapAccounts[root], tester.snapStorages[root]) + if err := tester.db.Recover(parent, loader); err != nil { + t.Fatalf("Failed to revert db, err: %v", err) + } + tester.verifyState(parent) + } + if tester.db.tree.len() != 1 { + t.Fatal("Only disk layer is expected") + } +} + +func TestDatabaseRecoverable(t *testing.T) { + var ( + tester = newTester(t, 0) + index = tester.bottomIndex() + ) + defer tester.release() + + var cases = []struct { + root common.Hash + expect bool + }{ + // Unknown state should be unrecoverable + {common.Hash{0x1}, false}, + + // Initial state should be recoverable + {types.EmptyRootHash, true}, + + // Initial state should be recoverable + {common.Hash{}, true}, + + // Layers below current disk layer are recoverable + {tester.roots[index-1], true}, + + // Disklayer itself is not recoverable, since it's + // available for accessing. + {tester.roots[index], false}, + + // Layers above current disk layer are not recoverable + // since they are available for accessing. + {tester.roots[index+1], false}, + } + for i, c := range cases { + result := tester.db.Recoverable(c.root) + if result != c.expect { + t.Fatalf("case: %d, unexpected result, want %t, got %t", i, c.expect, result) + } + } +} + +func TestDisable(t *testing.T) { + tester := newTester(t, 0) + defer tester.release() + + _, stored := rawdb.ReadAccountTrieNode(tester.db.diskdb, nil) + if err := tester.db.Disable(); err != nil { + t.Fatal("Failed to deactivate database") + } + if err := tester.db.Enable(types.EmptyRootHash); err == nil { + t.Fatalf("Invalid activation should be rejected") + } + if err := tester.db.Enable(stored); err != nil { + t.Fatal("Failed to activate database") + } + + // Ensure journal is deleted from disk + if blob := rawdb.ReadTrieJournal(tester.db.diskdb); len(blob) != 0 { + t.Fatal("Failed to clean journal") + } + // Ensure all trie histories are removed + n, err := tester.db.freezer.Ancients() + if err != nil { + t.Fatal("Failed to clean state history") + } + if n != 0 { + t.Fatal("Failed to clean state history") + } + // Verify layer tree structure, single disk layer is expected + if tester.db.tree.len() != 1 { + t.Fatalf("Extra layer kept %d", tester.db.tree.len()) + } + if tester.db.tree.bottom().rootHash() != stored { + t.Fatalf("Root hash is not matched exp %x got %x", stored, tester.db.tree.bottom().rootHash()) + } +} + +func TestCommit(t *testing.T) { + tester := newTester(t, 0) + defer tester.release() + + if err := tester.db.Commit(tester.lastHash(), false); err != nil { + t.Fatalf("Failed to cap database, err: %v", err) + } + // Verify layer tree structure, single disk layer is expected + if tester.db.tree.len() != 1 { + t.Fatal("Layer tree structure is invalid") + } + if tester.db.tree.bottom().rootHash() != tester.lastHash() { + t.Fatal("Layer tree structure is invalid") + } + // Verify states + if err := tester.verifyState(tester.lastHash()); err != nil { + t.Fatalf("State is invalid, err: %v", err) + } + // Verify state histories + if err := tester.verifyHistory(); err != nil { + t.Fatalf("State history is invalid, err: %v", err) + } +} + +func TestJournal(t *testing.T) { + tester := newTester(t, 0) + defer tester.release() + + if err := tester.db.Journal(tester.lastHash()); err != nil { + t.Errorf("Failed to journal, err: %v", err) + } + tester.db.Close() + tester.db = New(tester.db.diskdb, nil) + + // Verify states including disk layer and all diff on top. + for i := 0; i < len(tester.roots); i++ { + if i >= tester.bottomIndex() { + if err := tester.verifyState(tester.roots[i]); err != nil { + t.Fatalf("Invalid state, err: %v", err) + } + continue + } + if err := tester.verifyState(tester.roots[i]); err == nil { + t.Fatal("Unexpected state") + } + } +} + +func TestCorruptedJournal(t *testing.T) { + tester := newTester(t, 0) + defer tester.release() + + if err := tester.db.Journal(tester.lastHash()); err != nil { + t.Errorf("Failed to journal, err: %v", err) + } + tester.db.Close() + _, root := rawdb.ReadAccountTrieNode(tester.db.diskdb, nil) + + // Mutate the journal in disk, it should be regarded as invalid + blob := rawdb.ReadTrieJournal(tester.db.diskdb) + blob[0] = 1 + rawdb.WriteTrieJournal(tester.db.diskdb, blob) + + // Verify states, all not-yet-written states should be discarded + tester.db = New(tester.db.diskdb, nil) + for i := 0; i < len(tester.roots); i++ { + if tester.roots[i] == root { + if err := tester.verifyState(root); err != nil { + t.Fatalf("Disk state is corrupted, err: %v", err) + } + continue + } + if err := tester.verifyState(tester.roots[i]); err == nil { + t.Fatal("Unexpected state") + } + } +} + +// TestTailTruncateHistory function is designed to test a specific edge case where, +// when history objects are removed from the end, it should trigger a state flush +// if the ID of the new tail object is even higher than the persisted state ID. +// +// For example, let's say the ID of the persistent state is 10, and the current +// history objects range from ID(5) to ID(15). As we accumulate six more objects, +// the history will expand to cover ID(11) to ID(21). ID(11) then becomes the +// oldest history object, and its ID is even higher than the stored state. +// +// In this scenario, it is mandatory to update the persistent state before +// truncating the tail histories. This ensures that the ID of the persistent state +// always falls within the range of [oldest-history-id, latest-history-id]. +func TestTailTruncateHistory(t *testing.T) { + tester := newTester(t, 10) + defer tester.release() + + tester.db.Close() + tester.db = New(tester.db.diskdb, &Config{StateHistory: 10}) + + head, err := tester.db.freezer.Ancients() + if err != nil { + t.Fatalf("Failed to obtain freezer head") + } + stored := rawdb.ReadPersistentStateID(tester.db.diskdb) + if head != stored { + t.Fatalf("Failed to truncate excess history object above, stored: %d, head: %d", stored, head) + } +} + +// copyAccounts returns a deep-copied account set of the provided one. +func copyAccounts(set map[common.Hash][]byte) map[common.Hash][]byte { + copied := make(map[common.Hash][]byte, len(set)) + for key, val := range set { + copied[key] = common.CopyBytes(val) + } + return copied +} + +// copyStorages returns a deep-copied storage set of the provided one. +func copyStorages(set map[common.Hash]map[common.Hash][]byte) map[common.Hash]map[common.Hash][]byte { + copied := make(map[common.Hash]map[common.Hash][]byte, len(set)) + for addrHash, subset := range set { + copied[addrHash] = make(map[common.Hash][]byte, len(subset)) + for key, val := range subset { + copied[addrHash][key] = common.CopyBytes(val) + } + } + return copied +} diff --git a/trie/triedb/pathdb/difflayer.go b/trie/triedb/pathdb/difflayer.go new file mode 100644 index 0000000000..4cd4a8f376 --- /dev/null +++ b/trie/triedb/pathdb/difflayer.go @@ -0,0 +1,178 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "fmt" + "sync" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/trie/trienode" + "github.com/ethereum/go-ethereum/trie/triestate" +) + +// diffLayer represents a collection of modifications made to the in-memory tries +// along with associated state changes after running a block on top. +// +// The goal of a diff layer is to act as a journal, tracking recent modifications +// made to the state, that have not yet graduated into a semi-immutable state. +type diffLayer struct { + // Immutables + root common.Hash // Root hash to which this layer diff belongs to + id uint64 // Corresponding state id + block uint64 // Associated block number + nodes map[common.Hash]map[string]*trienode.Node // Cached trie nodes indexed by owner and path + states *triestate.Set // Associated state change set for building history + memory uint64 // Approximate guess as to how much memory we use + + // Parent layer modified by this one, never nil, **can be changed** + parent layer + + lock sync.RWMutex // Lock used to protect parent +} + +// newDiffLayer creates a new diff layer on top of an existing layer. +func newDiffLayer(parent layer, root common.Hash, id uint64, block uint64, nodes map[common.Hash]map[string]*trienode.Node, states *triestate.Set) *diffLayer { + var ( + size int64 + count int + ) + dl := &diffLayer{ + root: root, + id: id, + block: block, + nodes: nodes, + states: states, + parent: parent, + } + for _, subset := range nodes { + for path, n := range subset { + dl.memory += uint64(n.Size() + len(path)) + size += int64(len(n.Blob) + len(path)) + } + count += len(subset) + } + if states != nil { + dl.memory += uint64(states.Size()) + } + dirtyWriteMeter.Mark(size) + diffLayerNodesMeter.Mark(int64(count)) + diffLayerBytesMeter.Mark(int64(dl.memory)) + log.Debug("Created new diff layer", "id", id, "block", block, "nodes", count, "size", common.StorageSize(dl.memory)) + return dl +} + +// rootHash implements the layer interface, returning the root hash of +// corresponding state. +func (dl *diffLayer) rootHash() common.Hash { + return dl.root +} + +// stateID implements the layer interface, returning the state id of the layer. +func (dl *diffLayer) stateID() uint64 { + return dl.id +} + +// parentLayer implements the layer interface, returning the subsequent +// layer of the diff layer. +func (dl *diffLayer) parentLayer() layer { + dl.lock.RLock() + defer dl.lock.RUnlock() + + return dl.parent +} + +// node retrieves the node with provided node information. +// It's the internal version of Node function with additional accessed layer tracked. +// No error will be returned if node is not found. +func (dl *diffLayer) node(owner common.Hash, path []byte, hash common.Hash, depth int) ([]byte, error) { + // hold the lock to prevent the parent layer from being changed + dl.lock.RLock() + defer dl.lock.RUnlock() + + // If the trie node is known locally, return it + subset, ok := dl.nodes[owner] + if ok { + n, ok := subset[string(path)] + if ok { + // If the trie node is not hash matched, or marked as removed, + // bubble up an error here. It shouldn't happen at all. + if n.Hash != hash { + dirtyFalseMeter.Mark(1) + log.Error("Unexpected trie node in diff layer", "owner", owner, "path", path, "expect", hash, "got", n.Hash) + return nil, newUnexpectedNodeError("diff", hash, n.Hash, owner, path) + } + dirtyHitMeter.Mark(1) + dirtyNodeHitDepthHist.Update(int64(depth)) + dirtyReadMeter.Mark(int64(len(n.Blob))) + return n.Blob, nil + } + } + + // Trie node unknown to this layer, resolve from parent with lower depth + if diff, ok := dl.parent.(*diffLayer); ok { + return diff.node(owner, path, hash, depth+1) + } + + // Parent is a disk layer, resolve from there + return dl.parent.Node(owner, path, hash) +} + +// Node implements the layer interface, retrieving the trie node blob with the +// provided node information. No error will be returned if the node is not found. +func (dl *diffLayer) Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, error) { + return dl.node(owner, path, hash, 0) +} + +// update implements the layer interface, creating a new layer on top of the +// existing layer tree with the specified data items. +func (dl *diffLayer) update(root common.Hash, id uint64, block uint64, nodes map[common.Hash]map[string]*trienode.Node, states *triestate.Set) *diffLayer { + return newDiffLayer(dl, root, id, block, nodes, states) +} + +// persist flushes the diff layer and all its parent layers to disk layer. +func (dl *diffLayer) persist(force bool) (layer, error) { + if parent, ok := dl.parentLayer().(*diffLayer); ok { + // Hold the lock to prevent any read operation until the new + // parent is linked correctly. + dl.lock.Lock() + + // The merging of diff layers starts at the bottom-most layer, + // therefore we recurse down here, flattening on the way up + // (diffToDisk). + result, err := parent.persist(force) + if err != nil { + dl.lock.Unlock() + return nil, err + } + + dl.parent = result + dl.lock.Unlock() + } + return diffToDisk(dl, force) +} + +// diffToDisk merges a bottom-most diff into the persistent disk layer underneath +// it. The method will panic if called onto a non-bottom-most diff layer. +func diffToDisk(layer *diffLayer, force bool) (layer, error) { + disk, ok := layer.parentLayer().(*diskLayer) + if !ok { + panic(fmt.Sprintf("unknown layer type: %T", layer.parentLayer())) + } + return disk.commit(layer, force) +} diff --git a/trie/triedb/pathdb/difflayer_test.go b/trie/triedb/pathdb/difflayer_test.go new file mode 100644 index 0000000000..6b33ae758a --- /dev/null +++ b/trie/triedb/pathdb/difflayer_test.go @@ -0,0 +1,171 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "bytes" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/trie/testutil" + "github.com/ethereum/go-ethereum/trie/trienode" +) + +func emptyLayer() *diskLayer { + return &diskLayer{ + db: New(rawdb.NewMemoryDatabase(), nil), + buffer: newNodeBuffer(DefaultBufferSize, nil, 0), + } +} + +// goos: darwin +// goarch: arm64 +// pkg: github.com/ethereum/go-ethereum/trie +// BenchmarkSearch128Layers +// BenchmarkSearch128Layers-8 243826 4755 ns/op +func BenchmarkSearch128Layers(b *testing.B) { benchmarkSearch(b, 0, 128) } + +// goos: darwin +// goarch: arm64 +// pkg: github.com/ethereum/go-ethereum/trie +// BenchmarkSearch512Layers +// BenchmarkSearch512Layers-8 49686 24256 ns/op +func BenchmarkSearch512Layers(b *testing.B) { benchmarkSearch(b, 0, 512) } + +// goos: darwin +// goarch: arm64 +// pkg: github.com/ethereum/go-ethereum/trie +// BenchmarkSearch1Layer +// BenchmarkSearch1Layer-8 14062725 88.40 ns/op +func BenchmarkSearch1Layer(b *testing.B) { benchmarkSearch(b, 127, 128) } + +func benchmarkSearch(b *testing.B, depth int, total int) { + var ( + npath []byte + nhash common.Hash + nblob []byte + ) + // First, we set up 128 diff layers, with 3K items each + fill := func(parent layer, index int) *diffLayer { + nodes := make(map[common.Hash]map[string]*trienode.Node) + nodes[common.Hash{}] = make(map[string]*trienode.Node) + for i := 0; i < 3000; i++ { + var ( + path = testutil.RandBytes(32) + node = testutil.RandomNode() + ) + nodes[common.Hash{}][string(path)] = trienode.New(node.Hash, node.Blob) + if npath == nil && depth == index { + npath = common.CopyBytes(path) + nblob = common.CopyBytes(node.Blob) + nhash = node.Hash + } + } + return newDiffLayer(parent, common.Hash{}, 0, 0, nodes, nil) + } + var layer layer + layer = emptyLayer() + for i := 0; i < total; i++ { + layer = fill(layer, i) + } + b.ResetTimer() + + var ( + have []byte + err error + ) + for i := 0; i < b.N; i++ { + have, err = layer.Node(common.Hash{}, npath, nhash) + if err != nil { + b.Fatal(err) + } + } + if !bytes.Equal(have, nblob) { + b.Fatalf("have %x want %x", have, nblob) + } +} + +// goos: darwin +// goarch: arm64 +// pkg: github.com/ethereum/go-ethereum/trie +// BenchmarkPersist +// BenchmarkPersist-8 10 111252975 ns/op +func BenchmarkPersist(b *testing.B) { + // First, we set up 128 diff layers, with 3K items each + fill := func(parent layer) *diffLayer { + nodes := make(map[common.Hash]map[string]*trienode.Node) + nodes[common.Hash{}] = make(map[string]*trienode.Node) + for i := 0; i < 3000; i++ { + var ( + path = testutil.RandBytes(32) + node = testutil.RandomNode() + ) + nodes[common.Hash{}][string(path)] = trienode.New(node.Hash, node.Blob) + } + return newDiffLayer(parent, common.Hash{}, 0, 0, nodes, nil) + } + for i := 0; i < b.N; i++ { + b.StopTimer() + var layer layer + layer = emptyLayer() + for i := 1; i < 128; i++ { + layer = fill(layer) + } + b.StartTimer() + + dl, ok := layer.(*diffLayer) + if !ok { + break + } + dl.persist(false) + } +} + +// BenchmarkJournal benchmarks the performance for journaling the layers. +// +// BenchmarkJournal +// BenchmarkJournal-8 10 110969279 ns/op +func BenchmarkJournal(b *testing.B) { + b.SkipNow() + + // First, we set up 128 diff layers, with 3K items each + fill := func(parent layer) *diffLayer { + nodes := make(map[common.Hash]map[string]*trienode.Node) + nodes[common.Hash{}] = make(map[string]*trienode.Node) + for i := 0; i < 3000; i++ { + var ( + path = testutil.RandBytes(32) + node = testutil.RandomNode() + ) + nodes[common.Hash{}][string(path)] = trienode.New(node.Hash, node.Blob) + } + // TODO(rjl493456442) a non-nil state set is expected. + return newDiffLayer(parent, common.Hash{}, 0, 0, nodes, nil) + } + var layer layer + layer = emptyLayer() + for i := 0; i < 128; i++ { + + layer = fill(layer) + } + b.ResetTimer() + + for i := 0; i < b.N; i++ { + layer.journal(new(bytes.Buffer)) + } +} diff --git a/trie/triedb/pathdb/disklayer.go b/trie/triedb/pathdb/disklayer.go new file mode 100644 index 0000000000..ba9381c932 --- /dev/null +++ b/trie/triedb/pathdb/disklayer.go @@ -0,0 +1,342 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "errors" + "fmt" + "sync" + + "github.com/VictoriaMetrics/fastcache" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/trie/trienode" + "github.com/ethereum/go-ethereum/trie/triestate" + "golang.org/x/crypto/sha3" +) + +// diskLayer is a low level persistent layer built on top of a key-value store. +type diskLayer struct { + root common.Hash // Immutable, root hash to which this layer was made for + id uint64 // Immutable, corresponding state id + db *Database // Path-based trie database + cleans *fastcache.Cache // GC friendly memory cache of clean node RLPs + buffer *nodebuffer // Node buffer to aggregate writes + // A stale state means that the data or information is outdated compared to a newer version or a more recent state. + stale bool // Signals that the layer became stale (state progressed) + lock sync.RWMutex // Lock used to protect stale flag +} + +// newDiskLayer creates a new disk layer based on the passing arguments. +func newDiskLayer(root common.Hash, id uint64, db *Database, cleans *fastcache.Cache, buffer *nodebuffer) *diskLayer { + // Initialize a clean cache if the memory allowance is not zero + // or reuse the provided cache if it is not nil (inherited from + // the original disk layer). + if cleans == nil && db.config.CleanCacheSize != 0 { + cleans = fastcache.New(db.config.CleanCacheSize) + } + return &diskLayer{ + root: root, + id: id, + db: db, + cleans: cleans, + buffer: buffer, + } +} + +// root implements the layer interface, returning root hash of corresponding state. +func (dl *diskLayer) rootHash() common.Hash { + return dl.root +} + +// stateID implements the layer interface, returning the state id of disk layer. +func (dl *diskLayer) stateID() uint64 { + return dl.id +} + +// parent implements the layer interface, returning nil as there's no layer +// below the disk. +func (dl *diskLayer) parentLayer() layer { + return nil +} + +// isStale return whether this layer has become stale (was flattened across) or if +// it's still live. +func (dl *diskLayer) isStale() bool { + dl.lock.RLock() + defer dl.lock.RUnlock() + + return dl.stale +} + +// markStale sets the stale flag as true. +func (dl *diskLayer) markStale() { + dl.lock.Lock() + defer dl.lock.Unlock() + + if dl.stale { + panic("triedb disk layer is stale") // we've committed into the same base from two children, boom + } + dl.stale = true +} + +// Node implements the layer interface, retrieving the trie node with the provided node info. +// No error will be returned if the node is not found. +func (dl *diskLayer) Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, error) { + dl.lock.RLock() + defer dl.lock.RUnlock() + + if dl.stale { + return nil, errSnapshotStale + } + // Try to retrieve the trie node from the not-yet-written + // node buffer first. Note the buffer is lock free since + // it's impossible to mutate the buffer before tagging the + // layer as stale. + n, err := dl.buffer.node(owner, path, hash) + if err != nil { + return nil, err + } + if n != nil { + dirtyHitMeter.Mark(1) + dirtyReadMeter.Mark(int64(len(n.Blob))) + return n.Blob, nil + } + dirtyMissMeter.Mark(1) + + // Try to retrieve the trie node from the clean memory cache + key := cacheKey(owner, path) + if dl.cleans != nil { + if blob := dl.cleans.Get(nil, key); len(blob) > 0 { + h := newHasher() + defer h.release() + + got := h.hash(blob) + if got == hash { + cleanHitMeter.Mark(1) + cleanReadMeter.Mark(int64(len(blob))) + return blob, nil + } + cleanFalseMeter.Mark(1) + log.Error("Unexpected trie node in clean cache", "owner", owner, "path", path, "expect", hash, "got", got) + } + cleanMissMeter.Mark(1) + } + + // Try to retrieve the trie node from the disk. + var ( + nBlob []byte + nHash common.Hash + ) + if owner == (common.Hash{}) { + nBlob, nHash = rawdb.ReadAccountTrieNode(dl.db.diskdb, path) + } else { + nBlob, nHash = rawdb.ReadStorageTrieNode(dl.db.diskdb, owner, path) + } + if nHash != hash { + diskFalseMeter.Mark(1) + log.Error("Unexpected trie node in disk", "owner", owner, "path", path, "expect", hash, "got", nHash) + return nil, newUnexpectedNodeError("disk", hash, nHash, owner, path) + } + if dl.cleans != nil && len(nBlob) > 0 { + dl.cleans.Set(key, nBlob) + cleanWriteMeter.Mark(int64(len(nBlob))) + } + return nBlob, nil +} + +// update implements the layer interface, returning a new diff layer on top with the given state set. +func (dl *diskLayer) update(root common.Hash, id uint64, block uint64, nodes map[common.Hash]map[string]*trienode.Node, states *triestate.Set) *diffLayer { + return newDiffLayer(dl, root, id, block, nodes, states) +} + +// commit merges the given bottom-most diff layer into the node buffer +// and returns a newly constructed disk layer. Note the current disk +// layer must be tagged as stale first to prevent re-access. +func (dl *diskLayer) commit(bottom *diffLayer, force bool) (*diskLayer, error) { + dl.lock.Lock() + defer dl.lock.Unlock() + + // Construct and store the state history first. If crash happens after storing + // the state history but without flushing the corresponding states(journal), + // the stored state history will be truncated from head in the next restart. + var ( + overflow bool + oldest uint64 + ) + if dl.db.freezer != nil { + err := writeHistory(dl.db.freezer, bottom) + if err != nil { + return nil, err + } + // Determine if the persisted history object has exceeded the configured + // limitation, set the overflow as true if so. + tail, err := dl.db.freezer.Tail() + if err != nil { + return nil, err + } + limit := dl.db.config.StateHistory + if limit != 0 && bottom.stateID()-tail > limit { + overflow = true + oldest = bottom.stateID() - limit + 1 // track the id of history **after truncation** + } + } + + // Mark the diskLayer as stale before applying any mutations on top. + dl.stale = true + + // Store the root->id lookup afterwards. All stored lookups are + // identified by the **unique** state root. It's impossible that + // in the same chain blocks are not adjacent but have the same root. + if dl.id == 0 { + rawdb.WriteStateID(dl.db.diskdb, dl.root, 0) + } + rawdb.WriteStateID(dl.db.diskdb, bottom.rootHash(), bottom.stateID()) + + // Construct a new disk layer by merging the nodes from the provided diff + // layer, and flush the content in disk layer if there are too many nodes + // cached. The clean cache is inherited from the original disk layer. + ndl := newDiskLayer(bottom.root, bottom.stateID(), dl.db, dl.cleans, dl.buffer.commit(bottom.nodes)) + + // In a unique scenario where the ID of the oldest history object (after tail + // truncation) surpasses the persisted state ID, we take the necessary action + // of forcibly committing the cached dirty nodes to ensure that the persisted + // state ID remains higher. + if !force && rawdb.ReadPersistentStateID(dl.db.diskdb) < oldest { + force = true + } + if err := ndl.buffer.flush(ndl.db.diskdb, ndl.cleans, ndl.id, force); err != nil { + return nil, err + } + // To remove outdated history objects from the end, we set the 'tail' parameter + // to 'oldest-1' due to the offset between the freezer index and the history ID. + if overflow { + pruned, err := truncateFromTail(ndl.db.diskdb, ndl.db.freezer, oldest-1) + if err != nil { + return nil, err + } + log.Debug("Pruned state history", "items", pruned, "tailid", oldest) + } + return ndl, nil +} + +// revert applies the given state history and return a reverted disk layer. +func (dl *diskLayer) revert(h *history, loader triestate.TrieLoader) (*diskLayer, error) { + if h.meta.root != dl.rootHash() { + return nil, errUnexpectedHistory + } + + // Reject if the provided state history is incomplete. It's due to + // a large construct SELF-DESTRUCT which can't be handled because + // of memory limitation. + if len(h.meta.incomplete) > 0 { + return nil, errors.New("incomplete state history") + } + if dl.id == 0 { + return nil, fmt.Errorf("%w: zero state id", errStateUnrecoverable) + } + + // Apply the reverse state changes upon the current state. This must + // be done before holding the lock in order to access state in "this" + // layer. + nodes, err := triestate.Apply(h.meta.parent, h.meta.root, h.accounts, h.storages, loader) + if err != nil { + return nil, err + } + + dl.lock.Lock() + defer dl.lock.Unlock() + // Mark the diskLayer as stale before applying any mutations on top. + dl.stale = true + + // State change may be applied to node buffer, or the persistent + // state, depends on if node buffer is empty or not. If the node + // buffer is not empty, it means that the state transition that + // needs to be reverted is not yet flushed and cached in node + // buffer, otherwise, manipulate persistent state directly. + if !dl.buffer.empty() { + err := dl.buffer.revert(dl.db.diskdb, nodes) + if err != nil { + return nil, err + } + } else { + batch := dl.db.diskdb.NewBatch() + writeNodes(batch, nodes, dl.cleans) + rawdb.WritePersistentStateID(batch, dl.id-1) + if err := batch.Write(); err != nil { + log.Crit("Failed to write states", "err", err) + } + } + return newDiskLayer(h.meta.parent, dl.id-1, dl.db, dl.cleans, dl.buffer), nil +} + +// setBufferSize sets the node buffer size to the provided value. +func (dl *diskLayer) setBufferSize(size int) error { + dl.lock.RLock() + defer dl.lock.RUnlock() + + if dl.stale { + return errSnapshotStale + } + return dl.buffer.setSize(size, dl.db.diskdb, dl.cleans, dl.id) +} + +// size returns the approximate size of cached nodes in the disk layer. +func (dl *diskLayer) size() common.StorageSize { + dl.lock.RLock() + defer dl.lock.RUnlock() + + if dl.stale { + return 0 + } + return common.StorageSize(dl.buffer.size) +} + +// resetCache releases the memory held by clean cache to prevent memory leak. +func (dl *diskLayer) resetCache() { + dl.lock.RLock() + defer dl.lock.RUnlock() + + // Stale disk layer loses the ownership of clean cache. + if dl.stale { + return + } + if dl.cleans != nil { + dl.cleans.Reset() + } +} + +// hasher is used to compute the sha256 hash of the provided data. +type hasher struct{ sha crypto.KeccakState } + +var hasherPool = sync.Pool{ + New: func() interface{} { return &hasher{sha: sha3.NewLegacyKeccak256().(crypto.KeccakState)} }, +} + +func newHasher() *hasher { + return hasherPool.Get().(*hasher) +} + +func (h *hasher) hash(data []byte) common.Hash { + return crypto.HashData(h.sha, data) +} + +func (h *hasher) release() { + hasherPool.Put(h) +} diff --git a/trie/triedb/pathdb/errors.go b/trie/triedb/pathdb/errors.go new file mode 100644 index 0000000000..450cbaa4bc --- /dev/null +++ b/trie/triedb/pathdb/errors.go @@ -0,0 +1,55 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see + +package pathdb + +import ( + "errors" + "fmt" + + "github.com/ethereum/go-ethereum/common" +) + +var ( + // errDatabaseReadOnly is returned if the database is opened in read only mode + // to prevent any mutation. + errDatabaseReadOnly = errors.New("read only") + + // errDatabaseWaitSync is returned if the initial state sync is not completed + // yet and database is disabled to prevent accessing state. + errDatabaseWaitSync = errors.New("waiting for sync") + + // errSnapshotStale is returned from data accessors if the underlying layer + // layer had been invalidated due to the chain progressing forward far enough + // to not maintain the layer's original state. + errSnapshotStale = errors.New("layer stale") + + // errUnexpectedHistory is returned if an unmatched state history is applied + // to the database for state rollback. + errUnexpectedHistory = errors.New("unexpected state history") + + // errStateUnrecoverable is returned if state is required to be reverted to + // a destination without associated state history available. + errStateUnrecoverable = errors.New("state is unrecoverable") + + // errUnexpectedNode is returned if the requested node with specified path is + // not hash matched with expectation. + errUnexpectedNode = errors.New("unexpected node") +) + +func newUnexpectedNodeError(loc string, expHash common.Hash, gotHash common.Hash, owner common.Hash, path []byte) error { + return fmt.Errorf("%w, loc: %s, node: (%x %v), %x!=%x", errUnexpectedNode, loc, owner, path, expHash, gotHash) +} diff --git a/trie/triedb/pathdb/history.go b/trie/triedb/pathdb/history.go new file mode 100644 index 0000000000..a13068fa50 --- /dev/null +++ b/trie/triedb/pathdb/history.go @@ -0,0 +1,669 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/trie/triestate" + "golang.org/x/exp/slices" +) + +// State history records the state changes involved in executing a block. The +// state can be reverted to the previous version by applying the associated +// history object (state reverse diff). State history objects are kept to +// guarantee that the system can perform state rollbacks in case of deep reorg. +// +// Each state transition will generate a state history object. Note that not +// every block has a corresponding state history object. If a block performs +// no state changes whatsoever, no state is created for it. Each state history +// will have a sequentially increasing number acting as its unique identifier. +// +// The state history is written to disk (ancient store normally naming "state") when the corresponding +// diff layer is merged into the disk layer. At the same time, system can prune +// the oldest histories according to config. + +// +// Disk State +// ^ +// | +// +------------+ +---------+ +---------+ +---------+ +// | Init State |---->| State 1 |---->| ... |---->| State n | +// +------------+ +---------+ +---------+ +---------+ +// +// +-----------+ +------+ +-----------+ +// | History 1 |----> | ... |---->| History n | +// +-----------+ +------+ +-----------+ +// + +// # Rollback +// +// If the system wants to roll back to a previous state n, it needs to ensure +// all history objects from n+1 up to the current disk layer are existent. The +// history objects are applied to the state in reverse order, starting from the +// current disk layer. +// For example, If current state is 9 and revert 5, 6-9 need to be existent in ancient store. + +// We have 5 types ( metad, account (index +data), storage (index+data)). + +const ( + accountIndexSize = common.AddressLength + 13 // 20 + 13 The length of encoded account index + slotIndexSize = common.HashLength + 5 // 32 + 5 The length of encoded slot index + historyMetaSize = 9 + 2*common.HashLength // 9 + 32*2 The length of fixed size part of meta object + + stateHistoryVersion = uint8(0) // initial version of state history structure. +) + +// Each state history entry is consisted of five elements: +// +// # metadata +// This object contains a few meta fields, such as the associated state root, +// block number, version tag and so on. This object may contain an extra +// accountHash list which means the storage changes belong to these accounts +// are not complete due to large contract destruction. The incomplete history +// can not be used for rollback and serving archive state request + +// # account index +// This object contains some index information of account. For example, offset +// and length indicate the location of the data belonging to the account. Besides, +// storageOffset and storageSlots indicate the storage modification location +// belonging to the account. +// +// The size of each account index is *fixed*, and all indexes are sorted +// lexicographically. Thus binary search can be performed to quickly locate a +// specific account. +// +// # account data +// Account data is a concatenated byte stream composed of all account data. +// The account data can be solved by the offset and length info indicated +// by corresponding account index. It means we can use offset and length from +// account index to seek a specific account in the account data. +// +// fixed size +// ^ ^ +// / \ +// +-----------------+-----------------+----------------+-----------------+ +// | Account index 1 | Account index 2 | ... | Account index N | +// +-----------------+-----------------+----------------+-----------------+ +// | +// | length +// offset |----------------+ +// v v +// +----------------+----------------+----------------+----------------+ +// | Account data 1 | Account data 2 | ... | Account data N | +// +----------------+----------------+----------------+----------------+ +// +// # storage index +// This object is similar with account index. It's also fixed size and contains +// the location info of storage slot data. +// # storage data +// Storage data is a concatenated byte stream composed of all storage slot data. +// The storage slot data can be solved by the location info indicated by +// corresponding account index and storage slot index. +// We need to get storageOffset and storageSlots from account index to get thr offset and length of storage data from storage index. +// fixed size +// ^ ^ +// / \ +// +-----------------+-----------------+----------------+-----------------+ +// | Account index 1 | Account index 2 | ... | Account index N | +// +-----------------+-----------------+----------------+-----------------+ +// | +// | storage slots +// storage offset |-----------------------------------------------------+ +// v v +// +-----------------+-----------------+-----------------+ +// | storage index 1 | storage index 2 | storage index 3 | +// +-----------------+-----------------+-----------------+ +// | length +// offset |-------------+ +// v v +// +-------------+ +// | slot data 1 | +// + +// accountIndex describes the metadata belonging to an account. +type accountIndex struct { + address common.Address // The address of the account + length uint8 // The length of account data, size limited by 255 + offset uint32 // The offset of item in account data table + storageOffset uint32 // The offset of storage index in storage index table, belong to the account + storageSlots uint32 // The number of mutated storage slots belonging to the account +} + +// encode packs account index into byte stream. +func (i *accountIndex) encode() []byte { + var buf [accountIndexSize]byte + copy(buf[:], i.address.Bytes()) // 20 bytes + buf[common.AddressLength] = i.length // 1 byte + binary.BigEndian.PutUint32(buf[common.AddressLength+1:], i.offset) // 4 bytes + binary.BigEndian.PutUint32(buf[common.AddressLength+5:], i.storageOffset) // 4 bytes + binary.BigEndian.PutUint32(buf[common.AddressLength+9:], i.storageSlots) // 4 bytes + return buf[:] +} + +// decode unpacks account index from byte stream. +func (i *accountIndex) decode(blob []byte) { + i.address = common.BytesToAddress(blob[:common.AddressLength]) + i.length = blob[common.AddressLength] + i.offset = binary.BigEndian.Uint32(blob[common.AddressLength+1:]) + i.storageOffset = binary.BigEndian.Uint32(blob[common.AddressLength+5:]) + i.storageSlots = binary.BigEndian.Uint32(blob[common.AddressLength+9:]) +} + +// slotIndex describes the metadata belonging to a storage slot. +// Per account can have multiple storage slots. +type slotIndex struct { + hash common.Hash // The hash of slot key + length uint8 // The length of storage slot, up to 32 bytes defined in protocol + offset uint32 // The offset of item in storage slot data table +} + +// encode packs slot index into byte stream. +func (i *slotIndex) encode() []byte { + var buf [slotIndexSize]byte + copy(buf[:common.HashLength], i.hash.Bytes()) + buf[common.HashLength] = i.length + binary.BigEndian.PutUint32(buf[common.HashLength+1:], i.offset) + return buf[:] +} + +// decode unpack slot index from the byte stream. +func (i *slotIndex) decode(blob []byte) { + i.hash = common.BytesToHash(blob[:common.HashLength]) + i.length = blob[common.HashLength] + i.offset = binary.BigEndian.Uint32(blob[common.HashLength+1:]) +} + +// meta describes the meta data of state history object. +type meta struct { + version uint8 // version tag of history object + parent common.Hash // prev-state root before the state transition + root common.Hash // post-state root after the state transition + block uint64 // associated block number + incomplete []common.Address // list of address whose storage set is incomplete +} + +// encode packs the meta object into byte stream. +func (m *meta) encode() []byte { + buf := make([]byte, historyMetaSize+len(m.incomplete)*common.AddressLength) // 73 bytes + 20* current incomplete address. + buf[0] = m.version + copy(buf[1:1+common.HashLength], m.parent.Bytes()) + copy(buf[1+common.HashLength:1+2*common.HashLength], m.root.Bytes()) + binary.BigEndian.PutUint64(buf[1+2*common.HashLength:historyMetaSize], m.block) + for i, h := range m.incomplete { + copy(buf[i*common.AddressLength+historyMetaSize:], h.Bytes()) + } + return buf[:] +} + +// decode unpacks the meta object from byte stream. +func (m *meta) decode(blob []byte) error { + if len(blob) < 1 { + return fmt.Errorf("no version tag") + } + switch blob[0] { // Check the version tag + case stateHistoryVersion: + // Check base history meta size + if len(blob) < historyMetaSize { + return fmt.Errorf("invalid state history meta, len: %d", len(blob)) + } + if (len(blob)-historyMetaSize)%common.AddressLength != 0 { + return fmt.Errorf("corrupted state history meta, len: %d", len(blob)) + } + m.version = blob[0] + m.parent = common.BytesToHash(blob[1 : 1+common.HashLength]) + m.root = common.BytesToHash(blob[1+common.HashLength : 1+2*common.HashLength]) + m.block = binary.BigEndian.Uint64(blob[1+2*common.HashLength : historyMetaSize]) + for pos := historyMetaSize; pos < len(blob); { + m.incomplete = append(m.incomplete, common.BytesToAddress(blob[pos:pos+common.AddressLength])) + pos += common.AddressLength + } + return nil + default: + return fmt.Errorf("unknown version %d", blob[0]) + } +} + +// history represents a set of state changes belong to a block along with +// the metadata including the state roots involved in the state transition. +// State history objects in disk are linked with each other by a unique id +// (8-bytes integer), the oldest state history object can be pruned on demand +// in order to control the storage size. + +type history struct { + meta *meta // Meta data of history + accounts map[common.Address][]byte // Account data keyed by its address hash + accountList []common.Address // Sorted account hash list + storages map[common.Address]map[common.Hash][]byte // Storage data keyed by its address hash and slot hash + storageList map[common.Address][]common.Hash // Sorted slot hash list +} + +// newHistory constructs the state history object with provided state change set.( We need to track block and states) +func newHistory(root common.Hash, parent common.Hash, block uint64, states *triestate.Set) *history { + var ( + accountList []common.Address + storageList = make(map[common.Address][]common.Hash) + incomplete []common.Address + ) + for addr := range states.Accounts { + accountList = append(accountList, addr) + } + // Sort by comparing bytes of address + slices.SortFunc(accountList, common.Address.Cmp) + // Construct storage list + for addr, slots := range states.Storages { + slist := make([]common.Hash, 0, len(slots)) + + for slotHash := range slots { + slist = append(slist, slotHash) + } + slices.SortFunc(slist, common.Hash.Cmp) + storageList[addr] = slist + } + + for addr := range states.Incomplete { + incomplete = append(incomplete, addr) + } + slices.SortFunc(incomplete, common.Address.Cmp) + return &history{ + meta: &meta{ + version: stateHistoryVersion, + parent: parent, + root: root, + block: block, + incomplete: incomplete, + }, + accounts: states.Accounts, + accountList: accountList, + storages: states.Storages, + storageList: storageList, + } +} + +// encode serializes the currnet state history and returns four byte streams represent +// concatenated account/storage data, account/storage indexes respectively. +func (h *history) encode() ([]byte, []byte, []byte, []byte) { + var ( + slotNumber uint32 // the numbber of processed storage slots, 4 bytes + accountData []byte // the buffer for concatenated account data + storageData []byte // the buffer for concatenated storage data + accountIndexes []byte // the buffer for concatenated account indexes + storageIndexes []byte // the buffer for concatenated storage indexes + ) + + for _, addr := range h.accountList { + accIndex := accountIndex{ + address: addr, + length: uint8(len(h.accounts[addr])), // get the length of account data + offset: uint32(len(accountData)), + } + slots, exist := h.storages[addr] + if exist { + // For per account which has storage slots, we need to encode storage slots in order + for _, slotHash := range h.storageList[addr] { + sIndex := slotIndex{ + hash: slotHash, + length: uint8(len(slots[slotHash])), + offset: uint32(len(storageData)), + } + // Concat. + storageData = append(storageData, slots[slotHash]...) + storageIndexes = append(storageIndexes, sIndex.encode()...) + } + // Fill up the storage meta in account index + accIndex.storageOffset = slotNumber // 0 for the first account. + accIndex.storageSlots = uint32(len(slots)) + slotNumber += uint32(len(slots)) // collect full accounts in one state. + } + accountData = append(accountData, h.accounts[addr]...) + accountIndexes = append(accountIndexes, accIndex.encode()...) + } + return accountData, storageData, accountIndexes, storageIndexes +} + +/* decoder */ +// decoder wraps the byte streams for decoding with extra meta fields. +type decoder struct { + accountData []byte // the buffer for concatenated account data + storageData []byte // the buffer for concatenated storage data + accountIndexes []byte // the buffer for concatenated account index + storageIndexes []byte // the buffer for concatenated storage index + + lastAccount *common.Address // the address of last resolved account + lastAccountRead uint32 // the read-cursor position of account data + lastStorageSlotIndexRead uint32 // the read-cursor position of storage slot index + lastStorageSlotDataRead uint32 // the read-cursor position of storage slot data +} + +// verify validates the provided byte streams for decoding state history. A few +// checks will be performed to quickly detect data corruption. +// +// The byte stream is regarded as corrupted if: +// - account indexes buffer is empty(empty state set is invalid) +// - account indexes/storage indexer buffer is not aligned +// +// note, these situations are allowed: +// +// - empty account data: all accounts were not present +// - empty storage set: no slots are modified +func (r *decoder) verify() error { + if len(r.accountIndexes)%accountIndexSize != 0 || len(r.accountIndexes) == 0 { + return fmt.Errorf("invalid account index, len: %d", len(r.accountIndexes)) + } + if len(r.storageIndexes)%slotIndexSize != 0 { + return fmt.Errorf("invalid storage index, len: %d", len(r.storageIndexes)) + } + return nil +} + +// readAccount parses the account from the byte stream with specified position. +// It returns this account's index and data in byte stream. at this position. +func (r *decoder) readAccount(pos int) (accountIndex, []byte, error) { + // Decode account index from the index byte stream. + var index accountIndex + if (pos+1)*accountIndexSize > len(r.accountIndexes) { + return accountIndex{}, nil, errors.New("The pos is out range Indexes, seem account data buffer is corrupted") + } + // Decode account index from the index byte stream. + index.decode(r.accountIndexes[pos*accountIndexSize : (pos+1)*accountIndexSize]) + + // Perform validation before parsing account data, ensure + // - account is sorted in order in byte stream + // - account data is strictly encoded with no gap inside + // - account data is not out-of-slice + if r.lastAccount != nil { + if bytes.Compare(r.lastAccount.Bytes(), index.address.Bytes()) >= 0 { + return accountIndex{}, nil, errors.New("account is not in order") + } + } + if index.offset != r.lastAccountRead { + return accountIndex{}, nil, errors.New("account data buffer is gaped") + } + lastOffset := index.offset + uint32(index.length) + if uint32(len(r.accountData)) < lastOffset { + return accountIndex{}, nil, errors.New("account data buffer is corrupted") + } + data := r.accountData[index.offset:lastOffset] + + r.lastAccount = &index.address + r.lastAccountRead = lastOffset + return index, data, nil +} + +// readStorage parses the storage slots from the byte stream with specified account. +func (r *decoder) readStorage(accIndex accountIndex) ([]common.Hash, map[common.Hash][]byte, error) { + var ( + last common.Hash + list []common.Hash + storage = make(map[common.Hash][]byte) + ) + for j := 0; j < int(accIndex.storageSlots); j++ { + // Need to calculate the start/stop index slot j. + var ( + index slotIndex + start = (accIndex.storageOffset + uint32(j)) * uint32(slotIndexSize) + end = (accIndex.storageOffset + uint32(j+1)) * uint32(slotIndexSize) + ) + // Perform validation before parsing storage slot data, ensure + // - slot index is not out-of-slice + // - slot data is not out-of-slice + // - slot is sorted in order in byte stream + // - slot indexes is strictly encoded with no gap inside + // - slot data is strictly encoded with no gap inside + if start != r.lastStorageSlotIndexRead { + return nil, nil, errors.New("storage index buffer is gapped") + } + + if uint32(len(r.storageIndexes)) < end { + return nil, nil, errors.New("Index is out scope, storage index buffer is corrupted") + } + + // decode + index.decode(r.storageIndexes[start:end]) + + if bytes.Compare(last.Bytes(), index.hash.Bytes()) >= 0 { + return nil, nil, errors.New("storage slot is not in order") + } + if index.offset != r.lastStorageSlotDataRead { + return nil, nil, errors.New("storage data buffer is gapped") + } + sEnd := index.offset + uint32(index.length) + if uint32(len(r.storageData)) < sEnd { + return nil, nil, errors.New("storage data buffer is corrupted") + } + storage[index.hash] = r.storageData[r.lastStorageSlotDataRead:sEnd] + list = append(list, index.hash) + + last = index.hash + r.lastStorageSlotIndexRead = end + r.lastStorageSlotDataRead = sEnd + } + return list, storage, nil +} + +// decode deserializes the account and storage data from the provided byte stream. +func (h *history) decode(accountData, storageData, accountIndexes, storageIndexes []byte) error { + var ( + accounts = make(map[common.Address][]byte) + storages = make(map[common.Address]map[common.Hash][]byte) + accountList []common.Address + storageList = make(map[common.Address][]common.Hash) + + r = &decoder{ + accountData: accountData, + storageData: storageData, + accountIndexes: accountIndexes, + storageIndexes: storageIndexes, + } + ) + if err := r.verify(); err != nil { + return err + } + for i := 0; i < len(accountIndexes)/accountIndexSize; i++ { + // Resolve account first + accIndex, accData, err := r.readAccount(i) + if err != nil { + return err + } + accounts[accIndex.address] = accData + accountList = append(accountList, accIndex.address) + + // Resolve storage slots + slotList, slotData, err := r.readStorage(accIndex) + if err != nil { + return err + } + if len(slotList) > 0 { + storageList[accIndex.address] = slotList + storages[accIndex.address] = slotData + } + } + h.accounts = accounts + h.accountList = accountList + h.storages = storages + h.storageList = storageList + return nil +} + +// readHistory reads and decodes the state history object by the given id. +func readHistory(freezer *rawdb.ResettableFreezer, id uint64) (*history, error) { + blob := rawdb.ReadStateHistoryMeta(freezer, id) + if len(blob) == 0 { + return nil, fmt.Errorf("state history not found %d", id) + } + var m meta + if err := m.decode(blob); err != nil { + return nil, err + } + var ( + dec = history{meta: &m} + accountData = rawdb.ReadStateAccountHistory(freezer, id) + storageData = rawdb.ReadStateStorageHistory(freezer, id) + accountIndexes = rawdb.ReadStateAccountIndex(freezer, id) + storageIndexes = rawdb.ReadStateStorageIndex(freezer, id) + ) + if err := dec.decode(accountData, storageData, accountIndexes, storageIndexes); err != nil { + return nil, err + } + return &dec, nil +} + +// writeHistory persists the state history with the provided state set. +func writeHistory(freezer *rawdb.ResettableFreezer, dl *diffLayer) error { + // Short circuit if state set is not available. + if dl.states == nil { + return errors.New("state change set is not available") + } + var ( + start = time.Now() + history = newHistory(dl.rootHash(), dl.parentLayer().rootHash(), dl.block, dl.states) + ) + // Return byte streams of account and storage infor in current state. + accountData, storageData, accountIndex, storageIndex := history.encode() + dataSize := common.StorageSize(len(accountData) + len(storageData)) + indexSize := common.StorageSize(len(accountIndex) + len(storageIndex)) + + // Write history data into five freezer table respectively. + rawdb.WriteStateHistory(freezer, dl.stateID(), history.meta.encode(), accountIndex, storageIndex, accountData, storageData) + + historyDataBytesMeter.Mark(int64(dataSize)) + historyIndexBytesMeter.Mark(int64(indexSize)) + historyBuildTimeMeter.UpdateSince(start) + log.Debug("Stored state history", "id", dl.stateID(), "block", dl.block, "data", dataSize, "index", indexSize, "elapsed", common.PrettyDuration(time.Since(start))) + + return nil +} + +// checkHistories retrieves a batch of meta objects with the specified range +// and performs the callback on each item. +func checkHistories(freezer *rawdb.ResettableFreezer, start, count uint64, check func(*meta) error) error { + for count > 0 { + number := count + if number > 10000 { + number = 10000 // split the big read into small chunks + } + blobs, err := rawdb.ReadStateHistoryMetaList(freezer, start, number) + if err != nil { + return err + } + for _, blob := range blobs { + var dec meta + if err := dec.decode(blob); err != nil { + return err + } + if err := check(&dec); err != nil { + return err + } + } + count -= uint64(len(blobs)) + start += uint64(len(blobs)) + } + return nil +} + +// truncateFromHead removes the extra state histories from the head with the given +// parameters. It returns the number of items removed from the head. +func truncateFromHead(db ethdb.Batcher, freezer *rawdb.ResettableFreezer, nhead uint64) (int, error) { + ohead, err := freezer.Ancients() + if err != nil { + return 0, err + } + otail, err := freezer.Tail() + if err != nil { + return 0, err + } + // Ensure that the truncation target falls within the specified range. + if ohead < nhead || nhead < otail { + return 0, fmt.Errorf("out of range, tail: %d, head: %d, target: %d", otail, ohead, nhead) + } + // Short circuit if nothing to truncate. + if ohead == nhead { + return 0, nil + } + // Load the meta objects in range [nhead+1, ohead] + blobs, err := rawdb.ReadStateHistoryMetaList(freezer, nhead+1, ohead-nhead) + if err != nil { + return 0, err + } + batch := db.NewBatch() + for _, blob := range blobs { + var m meta + if err := m.decode(blob); err != nil { + return 0, err + } + rawdb.DeleteStateID(batch, m.root) + } + if err := batch.Write(); err != nil { + return 0, err + } + ohead, err = freezer.TruncateHead(nhead) + if err != nil { + return 0, err + } + return int(ohead - nhead), nil +} + +// truncateFromTail removes the extra state histories from the tail with the given +// parameters. It returns the number of items removed from the tail. +func truncateFromTail(db ethdb.Batcher, freezer *rawdb.ResettableFreezer, ntail uint64) (int, error) { + ohead, err := freezer.Ancients() + if err != nil { + return 0, err + } + otail, err := freezer.Tail() + if err != nil { + return 0, err + } + // Ensure that the truncation target falls within the specified range. + if otail > ntail || ntail > ohead { + return 0, fmt.Errorf("out of range, tail: %d, head: %d, target: %d", otail, ohead, ntail) + } + // Short circuit if nothing to truncate. + if otail == ntail { + return 0, nil + } + // Load the meta objects in range [otail+1, ntail] + blobs, err := rawdb.ReadStateHistoryMetaList(freezer, otail+1, ntail-otail) + if err != nil { + return 0, err + } + batch := db.NewBatch() + for _, blob := range blobs { + var m meta + if err := m.decode(blob); err != nil { + return 0, err + } + // Delete the state root from the state ID table + rawdb.DeleteStateID(batch, m.root) + } + if err := batch.Write(); err != nil { + return 0, err + } + otail, err = freezer.TruncateTail(ntail) + if err != nil { + return 0, err + } + return int(ntail - otail), nil +} diff --git a/trie/triedb/pathdb/history_test.go b/trie/triedb/pathdb/history_test.go new file mode 100644 index 0000000000..6828787efc --- /dev/null +++ b/trie/triedb/pathdb/history_test.go @@ -0,0 +1,356 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see + +package pathdb + +import ( + "bytes" + "fmt" + "reflect" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/testutil" + "github.com/ethereum/go-ethereum/trie/triestate" +) + +const ( + SeedingHistory = 10 +) + +// randomStateSet generates a random state change set. +func randomStateSet(n int) *triestate.Set { + var ( + accounts = make(map[common.Address][]byte) + storages = make(map[common.Address]map[common.Hash][]byte) + ) + for i := 0; i < n; i++ { + addr := testutil.RandomAddress() + storages[addr] = make(map[common.Hash][]byte) + for j := 0; j < 3; j++ { + v, _ := rlp.EncodeToBytes(common.TrimLeftZeroes(testutil.RandBytes(32))) + storages[addr][testutil.RandomHash()] = v + } + account := generateAccount(types.EmptyRootHash) + accounts[addr] = types.SlimAccountRLP(account) + } + return triestate.New(accounts, storages, nil) +} + +func makeHistory() *history { + return newHistory(testutil.RandomHash(), types.EmptyRootHash, 0, randomStateSet(3)) +} + +func makeHistories(n int) []*history { + var ( + parent = types.EmptyRootHash + result []*history + ) + for i := 0; i < n; i++ { + root := testutil.RandomHash() + h := newHistory(root, parent, uint64(i), randomStateSet(3)) + parent = root + result = append(result, h) + } + return result +} + +func TestEncodeDecodeHistory(t *testing.T) { + var ( + m meta + dec history + obj = makeHistory() + ) + // check if meta data can be correctly encode/decode + blob := obj.meta.encode() + if err := m.decode(blob); err != nil { + t.Fatalf("Failed to decode %v", err) + } + if !reflect.DeepEqual(&m, obj.meta) { + t.Fatal("meta is mismatched") + } + + // check if account/storage data can be correctly encode/decode + accountData, storageData, accountIndexes, storageIndexes := obj.encode() + if err := dec.decode(accountData, storageData, accountIndexes, storageIndexes); err != nil { + t.Fatalf("Failed to decode, err: %v", err) + } + if !compareSet(dec.accounts, obj.accounts) { + t.Fatal("account data is mismatched") + } + if !compareStorages(dec.storages, obj.storages) { + t.Fatal("storage data is mismatched") + } + if !compareList(dec.accountList, obj.accountList) { + t.Fatal("account list is mismatched") + } + if !compareStorageList(dec.storageList, obj.storageList) { + t.Fatal("storage list is mismatched") + } +} + +func checkHistory(t *testing.T, db ethdb.KeyValueReader, freezer *rawdb.ResettableFreezer, id uint64, root common.Hash, exist bool) { + blob := rawdb.ReadStateHistoryMeta(freezer, id) + if exist && len(blob) == 0 { + t.Fatalf("Failed to load trie history, %d", id) + } + if !exist && len(blob) != 0 { + t.Fatalf("Unexpected trie history, %d", id) + } + if exist && rawdb.ReadStateID(db, root) == nil { + t.Fatalf("Root->ID mapping is not found, %d", id) + } + if !exist && rawdb.ReadStateID(db, root) != nil { + t.Fatalf("Unexpected root->ID mapping, %d", id) + } +} + +func checkHistoriesInRange(t *testing.T, db ethdb.KeyValueReader, freezer *rawdb.ResettableFreezer, from, to uint64, roots []common.Hash, exist bool) { + for i, j := from, 0; i <= to; i, j = i+1, j+1 { + checkHistory(t, db, freezer, i, roots[j], exist) + } +} + +func TestTruncateHeadHistory(t *testing.T) { + var ( + roots []common.Hash + hs = makeHistories(SeedingHistory) + db = rawdb.NewMemoryDatabase() + freezer, _ = openFreezer(t.TempDir(), false) + ) + defer freezer.Close() + + for i := 0; i < len(hs); i++ { + accountData, storageData, accountIndex, storageIndex := hs[i].encode() + rawdb.WriteStateHistory(freezer, uint64(i+1), hs[i].meta.encode(), accountIndex, storageIndex, accountData, storageData) + rawdb.WriteStateID(db, hs[i].meta.root, uint64(i+1)) + roots = append(roots, hs[i].meta.root) + } + for size := len(hs); size > 0; size-- { + pruned, err := truncateFromHead(db, freezer, uint64(size-1)) + if err != nil { + t.Fatalf("Failed to truncate from head %v", err) + } + if pruned != 1 { + t.Error("Unexpected pruned items", "want", 1, "got", pruned) + } + checkHistoriesInRange(t, db, freezer, uint64(size), uint64(SeedingHistory), roots[size-1:], false) + checkHistoriesInRange(t, db, freezer, uint64(1), uint64(size-1), roots[:size-1], true) + } +} + +/* +Create n histories, write them to the freezer, and start truncate from the tail one by one. +*/ +func TestTruncateTailHistory(t *testing.T) { + var ( + roots []common.Hash + hs = makeHistories(SeedingHistory) + db = rawdb.NewMemoryDatabase() + freezer, err = openFreezer(t.TempDir(), false) + ) + if err != nil { + t.Fatalf("Failed to open freezer %v", err) + } + defer freezer.Close() + + for i := 0; i < len(hs); i++ { + accountData, storageData, accountIndex, storageIndex := hs[i].encode() + // append i-th history to the freezer. + rawdb.WriteStateHistory(freezer, uint64(i+1), hs[i].meta.encode(), accountIndex, storageIndex, accountData, storageData) + // Update the root->ID mapping in KeyValue database. + rawdb.WriteStateID(db, hs[i].meta.root, uint64(i+1)) + roots = append(roots, hs[i].meta.root) + } + // truncate from the tail one by one, 1, 2, 3, ..., n-1. + for newTail := 1; newTail < len(hs); newTail++ { + pruned, _ := truncateFromTail(db, freezer, uint64(newTail)) + if pruned != 1 { + t.Error("Unexpected pruned items", "want", 1, "got", pruned) + } + // Check this range should not be existed from 1 to newTail. + checkHistoriesInRange(t, db, freezer, uint64(1), uint64(newTail), roots[:newTail], false) + // Check this range should be existed from newTail+1 to SeedingHistory. + checkHistoriesInRange(t, db, freezer, uint64(newTail+1), uint64(SeedingHistory), roots[newTail:], true) + } +} + +func TestTruncateTailHistories(t *testing.T) { + var cases = []struct { + limit uint64 + expectedPruned int + maxPruned uint64 + minUnprunedOffset uint64 + empty bool + }{ + { + 1, 9, 9, 10, false, + }, + { + 0, 10, 10, 0 /* no meaning */, true, + }, + { + 10, 0, 0, 1, false, + }, + } + + for i, c := range cases { + var ( + roots []common.Hash + hs = makeHistories(SeedingHistory) + db = rawdb.NewMemoryDatabase() + freezer, err = openFreezer(t.TempDir()+fmt.Sprintf("%d", i), false) + ) + if err != nil { + t.Fatalf("Failed to open freezer %v", err) + } + defer freezer.Close() + + // Write SeedingHistory histories to the freezer. + for i := 0; i < len(hs); i++ { + accountData, storageData, accountIndex, storageIndex := hs[i].encode() + rawdb.WriteStateHistory(freezer, uint64(i+1), hs[i].meta.encode(), accountIndex, storageIndex, accountData, storageData) + rawdb.WriteStateID(db, hs[i].meta.root, uint64(i+1)) + roots = append(roots, hs[i].meta.root) + } + // Truncate from the tail, In this case, we truncate a range of histories. + tail := SeedingHistory - int(c.limit) + pruned, _ := truncateFromTail(db, freezer, uint64(tail)) + if pruned != c.expectedPruned { + t.Error("Unexpected pruned items", "want", c.expectedPruned, "got", pruned) + } + // In case of empty, jus make sure the range is truncated. + if c.empty { + checkHistoriesInRange(t, db, freezer, uint64(1), uint64(SeedingHistory), roots, false) + } else { + checkHistoriesInRange(t, db, freezer, uint64(1), c.maxPruned, roots[:tail], false) + checkHistoriesInRange(t, db, freezer, c.minUnprunedOffset, uint64(SeedingHistory), roots[tail:], true) + } + } +} + +func TestTruncateOutOfRange(t *testing.T) { + var ( + hs = makeHistories(10) + db = rawdb.NewMemoryDatabase() + freezer, _ = openFreezer(t.TempDir(), false) + ) + defer freezer.Close() + + for i := 0; i < len(hs); i++ { + accountData, storageData, accountIndex, storageIndex := hs[i].encode() + rawdb.WriteStateHistory(freezer, uint64(i+1), hs[i].meta.encode(), accountIndex, storageIndex, accountData, storageData) + rawdb.WriteStateID(db, hs[i].meta.root, uint64(i+1)) + } + truncateFromTail(db, freezer, uint64(len(hs)/2)) + + // Ensure of-out-range truncations are rejected correctly. + head, _ := freezer.Ancients() + tail, _ := freezer.Tail() + + cases := []struct { + mode int + target uint64 + expErr error + }{ + {0, head, nil}, // nothing to delete + {0, head + 1, fmt.Errorf("out of range, tail: %d, head: %d, target: %d", tail, head, head+1)}, + {0, tail - 1, fmt.Errorf("out of range, tail: %d, head: %d, target: %d", tail, head, tail-1)}, + {1, tail, nil}, // nothing to delete + {1, head + 1, fmt.Errorf("out of range, tail: %d, head: %d, target: %d", tail, head, head+1)}, + {1, tail - 1, fmt.Errorf("out of range, tail: %d, head: %d, target: %d", tail, head, tail-1)}, + } + for _, c := range cases { + var gotErr error + if c.mode == 0 { + _, gotErr = truncateFromHead(db, freezer, c.target) + } else { + _, gotErr = truncateFromTail(db, freezer, c.target) + } + if !reflect.DeepEqual(gotErr, c.expErr) { + t.Errorf("Unexpected error, want: %v, got: %v", c.expErr, gotErr) + } + } +} + +// openFreezer initializes the freezer instance for storing state histories. +func openFreezer(datadir string, readOnly bool) (*rawdb.ResettableFreezer, error) { + return rawdb.NewStateFreezer(datadir, readOnly) +} + +func compareSet[k comparable](a, b map[k][]byte) bool { + if len(a) != len(b) { + return false + } + for key, valA := range a { + valB, ok := b[key] + if !ok { + return false + } + if !bytes.Equal(valA, valB) { + return false + } + } + return true +} + +func compareList[k comparable](a, b []k) bool { + if len(a) != len(b) { + return false + } + for i := 0; i < len(a); i++ { + if a[i] != b[i] { + return false + } + } + return true +} + +func compareStorages(a, b map[common.Address]map[common.Hash][]byte) bool { + if len(a) != len(b) { + return false + } + for h, subA := range a { + subB, ok := b[h] + if !ok { + return false + } + if !compareSet(subA, subB) { + return false + } + } + return true +} + +func compareStorageList(a, b map[common.Address][]common.Hash) bool { + if len(a) != len(b) { + return false + } + for h, la := range a { + lb, ok := b[h] + if !ok { + return false + } + if !compareList(la, lb) { + return false + } + } + return true +} diff --git a/trie/triedb/pathdb/journal.go b/trie/triedb/pathdb/journal.go new file mode 100644 index 0000000000..ec7d1d6806 --- /dev/null +++ b/trie/triedb/pathdb/journal.go @@ -0,0 +1,401 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "bytes" + "errors" + "fmt" + "io" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/trienode" + "github.com/ethereum/go-ethereum/trie/triestate" +) + +var ( + errMissJournal = errors.New("journal not found") + errMissVersion = errors.New("version not found") + errUnexpectedVersion = errors.New("unexpected journal version") + errMissDiskRoot = errors.New("disk layer root not found") + errUnmatchedJournal = errors.New("unmatched journal") +) + +const journalVersion uint64 = 0 + +// journalNode represents a trie node persisted in the journal. +type journalNode struct { + Path []byte // Path of node in the trie + Blob []byte // RLP-encoded trie node blob, nil means the node is deleted +} + +/* Single account */ + +// journalNodes represents a list trie nodes belong to a single account +// or the main account trie. +type journalNodes struct { + Owner common.Hash + Nodes []journalNode +} + +// journalStorage represents a list of storage slots belong to an account. +type journalStorage struct { + Incomplete bool + Account common.Address + Hashes []common.Hash + Slots [][]byte +} + +// journalAccounts represents a list accounts belong to the layer. +type journalAccounts struct { + Addresses []common.Address + Accounts [][]byte +} + +// loadDiskLayer reads the binary blob from the layer journal, reconstructing +// a new disk layer on it. +func (db *Database) loadDiskLayer(r *rlp.Stream) (layer, error) { + // Resolve disk layer root + var root common.Hash + + if err := r.Decode(&root); err != nil { + return nil, fmt.Errorf("load disk root: %v", err) + } + // Resolve the state id of disk layer, it can be different + // with the persistent id tracked in disk, the id distance + // is the number of transitions aggregated in disk layer. + var id uint64 + if err := r.Decode(&id); err != nil { + return nil, fmt.Errorf("load state id: %v", err) + } + // get the persistent state id from the disk + stored := rawdb.ReadPersistentStateID(db.diskdb) + if stored > id { + return nil, fmt.Errorf("invalid state id: stored %d resolved %d", stored, id) + } + // Resolve nodes cached in node buffer + var encoded []journalNodes + if err := r.Decode(&encoded); err != nil { + return nil, fmt.Errorf("load disk nodes: %v", err) + } + /* + { "ownerHash": {"path": trieNode}} + */ + nodes := make(map[common.Hash]map[string]*trienode.Node) + for _, entry := range encoded { + subset := make(map[string]*trienode.Node) + for _, n := range entry.Nodes { + isLive := (len(n.Blob) > 0) + if isLive { + subset[string(n.Path)] = trienode.New(crypto.Keccak256Hash(n.Blob), n.Blob) + } else { + subset[string(n.Path)] = trienode.NewDeleted() + } + nodes[entry.Owner] = subset + } + } + // Calculate the internal state transitions by id difference. + base := newDiskLayer(root, id, db, nil, newNodeBuffer(db.bufferSize, nodes, id-stored)) + return base, nil +} + +// loadDiffLayer reads the next sections of a layer journal, reconstructing a new +// diff and verifying that it can be linked to the requested parent. +// It will get the base from diskfirstly, then load next diff layer from the former. +func (db *Database) loadDiffLayer(parent layer, r *rlp.Stream) (layer, error) { + // Read the next diff journal entry + var root common.Hash + if err := r.Decode(&root); err != nil { + // The first read may fail with EOF, marking the end of the journal + if err == io.EOF { + return parent, nil + } + return nil, fmt.Errorf("load diff root: %v", err) + } + var block uint64 + if err := r.Decode(&block); err != nil { + return nil, fmt.Errorf("load block number: %v", err) + } + // Read in-memory trie nodes from journal + var encoded []journalNodes + if err := r.Decode(&encoded); err != nil { + return nil, fmt.Errorf("load diff nodes: %v", err) + } + /* + { "ownerHash": {"path": trieNode}} + */ + nodes := make(map[common.Hash]map[string]*trienode.Node) + for _, entry := range encoded { + subset := make(map[string]*trienode.Node) + for _, n := range entry.Nodes { + isLive := (len(n.Blob) > 0) + if isLive { + subset[string(n.Path)] = trienode.New(crypto.Keccak256Hash(n.Blob), n.Blob) + } else { + subset[string(n.Path)] = trienode.NewDeleted() + } + nodes[entry.Owner] = subset + } + } + + // Read state changes from journal + var ( + jaccounts journalAccounts + jstorages []journalStorage + accounts = make(map[common.Address][]byte) + storages = make(map[common.Address]map[common.Hash][]byte) + incomplete = make(map[common.Address]struct{}) + ) + // Read the account changes from the journal, changes in one layer. + if err := r.Decode(&jaccounts); err != nil { + return nil, fmt.Errorf("load diff accounts: %v", err) + } + for i, addr := range jaccounts.Addresses { + accounts[addr] = jaccounts.Accounts[i] + } + if err := r.Decode(&jstorages); err != nil { + return nil, fmt.Errorf("load diff storages: %v", err) + } + + for _, entry := range jstorages { + set := make(map[common.Hash][]byte) + for i, h := range entry.Hashes { + hasStorgeSlot := len(entry.Slots[i]) > 0 + if hasStorgeSlot { + set[h] = entry.Slots[i] + } else { + set[h] = nil + } + } + if entry.Incomplete { + incomplete[entry.Account] = struct{}{} + } + storages[entry.Account] = set + } + // Recursively load the next diff layer until reaching the end of the journal. + return db.loadDiffLayer(newDiffLayer(parent, root, parent.stateID()+1, block, nodes, triestate.New(accounts, storages, incomplete)), r) +} + +// loadJournal tries to parse the layer journal from the disk. +func (db *Database) loadJournal(diskRoot common.Hash) (layer, error) { + // Read the journal raw data from the disk + journal := rawdb.ReadTrieJournal(db.diskdb) + if len(journal) == 0 { + return nil, errMissJournal + } + // Construct a RLP stream to decode the journal with nolimit size. + r := rlp.NewStream(bytes.NewReader(journal), 0) + + // Firstly, resolve the first element as the journal version + version, err := r.Uint64() + if err != nil { + return nil, errMissVersion + } + if version != journalVersion { + return nil, fmt.Errorf("%w want %d got %d", errUnexpectedVersion, journalVersion, version) + } + // Secondly, resolve the disk layer root, ensure it's continuous + // with disk layer. Note now we can ensure it's the layer journal + // correct version, so we expect everything can be resolved properly. + var root common.Hash + if err := r.Decode(&root); err != nil { + return nil, errMissDiskRoot + } + // The journal is not matched with persistent state, discard them. + // It can happen that geth crashes without persisting the journal. + if !bytes.Equal(root.Bytes(), diskRoot.Bytes()) { + return nil, fmt.Errorf("%w want %x got %x", errUnmatchedJournal, root, diskRoot) + } + + // Load the disk layer from the journal + base, err := db.loadDiskLayer(r) + if err != nil { + return nil, err + } + // Load all the diff layers from the journal (parent, RLP stream) + head, err := db.loadDiffLayer(base, r) + if err != nil { + return nil, err + } + log.Debug("Loaded layer journal", "diskroot", diskRoot, "diffhead", head.rootHash()) + return head, nil +} + +// loadLayers loads a pre-existing state layer backed by a key-value store. +// expected head or base. +func (db *Database) loadLayers() layer { + // Retrieve the root node of persistent state. + _, root := rawdb.ReadAccountTrieNode(db.diskdb, nil) + root = types.TrieRootHash(root) + + // Load the layers by resolving the journal + head, err := db.loadJournal(root) + if err == nil { + return head + } + // journal is not matched(or missing) with the persistent state, discard + // it. Display log for discarding journal, but try to avoid showing + // useless information when the db is created from scratch. + if !(root == types.EmptyRootHash && errors.Is(err, errMissJournal)) { + log.Info("Failed to load journal, discard it", "err", err) + } + // Return single layer with persistent state. (base layer, expected all difflayers has corrupted). + return newDiskLayer(root, rawdb.ReadPersistentStateID(db.diskdb), db, nil, newNodeBuffer(db.bufferSize, nil, 0)) + +} + +// journal implements the layer interface, marshaling the un-flushed trie nodes +// along with layer meta data into provided byte buffer. +func (dl *diskLayer) journal(w io.Writer) error { + dl.lock.RLock() + defer dl.lock.RUnlock() + if dl.stale { + return errSnapshotStale + } + // Step one, write the disk root into the journal. + if err := rlp.Encode(w, dl.root); err != nil { + return err + } + // Step two, write the corresponding state id into the journal + if err := rlp.Encode(w, dl.id); err != nil { + return err + } + // Step three, write all unwritten nodes into the journal + nodes := make([]journalNodes, 0, len(dl.buffer.nodes)) + for owner, subset := range dl.buffer.nodes { + entry := journalNodes{Owner: owner} + for path, node := range subset { + entry.Nodes = append(entry.Nodes, journalNode{Path: []byte(path), Blob: node.Blob}) + } + nodes = append(nodes, entry) + } + if err := rlp.Encode(w, nodes); err != nil { + return err + } + log.Debug("Journaled pathdb disk layer", "root", dl.root, "nodes", len(dl.buffer.nodes)) + return nil +} + +// journal implements the layer interface, writing the memory layer contents +// into a buffer to be stored in the database as the layer journal. +func (dl *diffLayer) journal(w io.Writer) error { + dl.lock.RLock() + defer dl.lock.RUnlock() + // journal the parent layer first (n-1) + if err := dl.parent.journal(w); err != nil { + return err + } + // Everything below was journaled, persist this layer too ( n ) + if err := rlp.Encode(w, dl.root); err != nil { + return err + } + if err := rlp.Encode(w, dl.block); err != nil { + return err + } + // Write the accumulated trie nodes into buffer + nodes := make([]journalNodes, 0, len(dl.nodes)) + for owner, subset := range dl.nodes { + entry := journalNodes{Owner: owner} + for path, node := range subset { + entry.Nodes = append(entry.Nodes, journalNode{Path: []byte(path), Blob: node.Blob}) + } + nodes = append(nodes, entry) + } + if err := rlp.Encode(w, nodes); err != nil { + return err + } + // Write the accumulated state changes into buffer + var jacct journalAccounts + for addr, account := range dl.states.Accounts { + jacct.Addresses = append(jacct.Addresses, addr) + jacct.Accounts = append(jacct.Accounts, account) + } + if err := rlp.Encode(w, jacct); err != nil { + return err + } + storage := make([]journalStorage, 0, len(dl.states.Storages)) + for addr, slots := range dl.states.Storages { + entry := journalStorage{Account: addr} + // If the storage is incomplete, mark it as such + if _, ok := dl.states.Incomplete[addr]; ok { + entry.Incomplete = true + } + for slotHash, slot := range slots { + entry.Hashes = append(entry.Hashes, slotHash) + entry.Slots = append(entry.Slots, slot) + } + storage = append(storage, entry) + } + if err := rlp.Encode(w, storage); err != nil { + return err + } + log.Debug("Journaled pathdb diff layer", "root", dl.root, "parent", dl.parent.rootHash(), "id", dl.stateID(), "block", dl.block, "nodes", len(dl.nodes)) + return nil +} + +// Journal commits an entire diff hierarchy to disk into a single journal entry. +// This is meant to be used during shutdown to persist the layer without +// flattening everything down (bad for reorgs). And this function will mark the +// database as read-only to prevent all following mutation to disk. +func (db *Database) Journal(root common.Hash) error { + // Retrieve the head layer to journal from. + l := db.tree.get(root) + if l == nil { + return fmt.Errorf("triedb layer [%#x] missing", root) + } + // Run the journaling + db.lock.Lock() + defer db.lock.Unlock() + + // Short circuit if the database is in read only mode. + if db.readOnly { + return errDatabaseReadOnly + } + // Firstly write out the metadata of journal + journal := new(bytes.Buffer) + // Write the journal version to journal []bytes + if err := rlp.Encode(journal, journalVersion); err != nil { + return err + } + + // The stored state in disk might be empty, convert the + // root to emptyRoot in this case. + _, diskroot := rawdb.ReadAccountTrieNode(db.diskdb, nil) + diskroot = types.TrieRootHash(diskroot) + // Secondly write out the state root in disk, ensure all layers + // on top are continuous with disk. + if err := rlp.Encode(journal, diskroot); err != nil { + return err + } + + // Finally write out the journal of each layer in reverse order. (Resursive journal) + if err := l.journal(journal); err != nil { + return err + } + // Store the journal into the database and return + rawdb.WriteTrieJournal(db.diskdb, journal.Bytes()) + + // Set the db in read only mode to reject all following mutations to disk. + db.readOnly = true + log.Info("Stored journal in triedb, db is Readonly mode now.", "disk", diskroot, "size", common.StorageSize(journal.Len())) + return nil +} diff --git a/trie/triedb/pathdb/layertree.go b/trie/triedb/pathdb/layertree.go new file mode 100644 index 0000000000..9352fa3f89 --- /dev/null +++ b/trie/triedb/pathdb/layertree.go @@ -0,0 +1,214 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "errors" + "fmt" + "sync" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/trie/trienode" + "github.com/ethereum/go-ethereum/trie/triestate" +) + +// layerTree is a group of state layers identified by the state root. +// This structure defines a few basic operations for manipulating +// state layers linked with each other in a tree structure. It's +// thread-safe to use. However, callers need to ensure the thread-safety +// of the referenced layer by themselves. +type layerTree struct { + lock sync.RWMutex + layers map[common.Hash]layer +} + +// newLayerTree constructs the layerTree with the given head layer. +func newLayerTree(head layer) *layerTree { + tree := new(layerTree) + tree.reset(head) + return tree +} + +// reset initializes the layerTree by the given head layer. +// All the ancestors will be iterated out and linked in the tree. +func (tree *layerTree) reset(head layer) { + tree.lock.Lock() + defer tree.lock.Unlock() + + var layers = make(map[common.Hash]layer) + for head != nil { + layers[head.rootHash()] = head + head = head.parentLayer() + } + tree.layers = layers +} + +// get retrieves a layer belonging to the given state root. +func (tree *layerTree) get(root common.Hash) layer { + tree.lock.RLock() + defer tree.lock.RUnlock() + + return tree.layers[types.TrieRootHash(root)] +} + +// forEach iterates the stored layers inside and applies the +// given callback on them. +func (tree *layerTree) forEach(onLayer func(layer)) { + tree.lock.RLock() + defer tree.lock.RUnlock() + + for _, layer := range tree.layers { + onLayer(layer) + } +} + +// len returns the number of layers cached. +func (tree *layerTree) len() int { + tree.lock.RLock() + defer tree.lock.RUnlock() + + return len(tree.layers) +} + +// add inserts a new layer into the tree if it can be linked to an existing old parent. +func (tree *layerTree) add(root common.Hash, parentRoot common.Hash, block uint64, nodes *trienode.MergedNodeSet, states *triestate.Set) error { + // Reject noop updates to avoid self-loops. This is a special case that can + // happen for clique networks and proof-of-stake networks where empty blocks + // don't modify the state (0 block subsidy). + // + // Although we could silently ignore this internally, it should be the caller's + // responsibility to avoid even attempting to insert such a layer. + root, parentRoot = types.TrieRootHash(root), types.TrieRootHash(parentRoot) + if root == parentRoot { + return errors.New("layer cycle") + } + parent := tree.get(parentRoot) + if parent == nil { + return fmt.Errorf("triedb parent [%#x] layer missing", parentRoot) + } + l := parent.update(root, parent.stateID()+1, block, nodes.Flatten(), states) + + tree.lock.Lock() + tree.layers[l.rootHash()] = l + tree.lock.Unlock() + return nil +} + +// cap traverses downwards the diff tree until the number of allowed diff layers +// are crossed. All diffs beyond the permitted number are flattened downwards. +func (tree *layerTree) cap(root common.Hash, layers int) error { + // Retrieve the head layer to cap from + root = types.TrieRootHash(root) + l := tree.get(root) + if l == nil { + return fmt.Errorf("triedb layer [%#x] missing", root) + } + diff, ok := l.(*diffLayer) + if !ok { + return fmt.Errorf("triedb layer [%#x] is disk layer", root) + } + tree.lock.Lock() + defer tree.lock.Unlock() + + // If full commit was requested, flatten the diffs and merge onto disk + if layers == 0 { + base, err := diff.persist(true) + if err != nil { + return err + } + // Replace the entire layer tree with the flat base + tree.layers = map[common.Hash]layer{base.rootHash(): base} + return nil + } + // Dive until we run out of layers or reach the persistent database + for i := 0; i < layers-1; i++ { + // If we still have diff layers below, continue down + if parent, ok := diff.parentLayer().(*diffLayer); ok { + diff = parent + } else { + // Diff stack too shallow, return without modifications + return nil + } + } + // We're out of layers, flatten anything below, stopping if it's the disk or if + // the memory limit is not yet exceeded. + switch parent := diff.parentLayer().(type) { + case *diskLayer: + return nil + + case *diffLayer: + // Hold the lock to prevent any read operations until the new + // parent is linked correctly. + diff.lock.Lock() + + base, err := parent.persist(false) + if err != nil { + diff.lock.Unlock() + return err + } + tree.layers[base.rootHash()] = base + diff.parent = base + + diff.lock.Unlock() + + default: + panic(fmt.Sprintf("unknown data layer in triedb: %T", parent)) + } + // Remove any layer that is stale or links into a stale layer + children := make(map[common.Hash][]common.Hash) + for root, layer := range tree.layers { + if dl, ok := layer.(*diffLayer); ok { + parent := dl.parentLayer().rootHash() + children[parent] = append(children[parent], root) + } + } + var remove func(root common.Hash) + remove = func(root common.Hash) { + delete(tree.layers, root) + for _, child := range children[root] { + remove(child) + } + delete(children, root) + } + for root, layer := range tree.layers { + if dl, ok := layer.(*diskLayer); ok && dl.isStale() { + remove(root) + } + } + return nil +} + +// bottom returns the bottom-most disk layer in this tree. +func (tree *layerTree) bottom() *diskLayer { + tree.lock.RLock() + defer tree.lock.RUnlock() + + if len(tree.layers) == 0 { + return nil // Shouldn't happen, empty tree + } + // pick a random one as the entry point + var current layer + for _, layer := range tree.layers { + current = layer + break + } + for current.parentLayer() != nil { + current = current.parentLayer() + } + return current.(*diskLayer) +} diff --git a/trie/triedb/pathdb/metrics.go b/trie/triedb/pathdb/metrics.go new file mode 100644 index 0000000000..9e2b1dcbf5 --- /dev/null +++ b/trie/triedb/pathdb/metrics.go @@ -0,0 +1,50 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see + +package pathdb + +import "github.com/ethereum/go-ethereum/metrics" + +var ( + cleanHitMeter = metrics.NewRegisteredMeter("pathdb/clean/hit", nil) + cleanMissMeter = metrics.NewRegisteredMeter("pathdb/clean/miss", nil) + cleanReadMeter = metrics.NewRegisteredMeter("pathdb/clean/read", nil) + cleanWriteMeter = metrics.NewRegisteredMeter("pathdb/clean/write", nil) + + dirtyHitMeter = metrics.NewRegisteredMeter("pathdb/dirty/hit", nil) + dirtyMissMeter = metrics.NewRegisteredMeter("pathdb/dirty/miss", nil) + dirtyReadMeter = metrics.NewRegisteredMeter("pathdb/dirty/read", nil) + dirtyWriteMeter = metrics.NewRegisteredMeter("pathdb/dirty/write", nil) + dirtyNodeHitDepthHist = metrics.NewRegisteredHistogram("pathdb/dirty/depth", nil, metrics.NewExpDecaySample(1028, 0.015)) + + cleanFalseMeter = metrics.NewRegisteredMeter("pathdb/clean/false", nil) + dirtyFalseMeter = metrics.NewRegisteredMeter("pathdb/dirty/false", nil) + diskFalseMeter = metrics.NewRegisteredMeter("pathdb/disk/false", nil) + + commitTimeTimer = metrics.NewRegisteredTimer("pathdb/commit/time", nil) + commitNodesMeter = metrics.NewRegisteredMeter("pathdb/commit/nodes", nil) + commitBytesMeter = metrics.NewRegisteredMeter("pathdb/commit/bytes", nil) + + gcNodesMeter = metrics.NewRegisteredMeter("pathdb/gc/nodes", nil) + gcBytesMeter = metrics.NewRegisteredMeter("pathdb/gc/bytes", nil) + + diffLayerBytesMeter = metrics.NewRegisteredMeter("pathdb/diff/bytes", nil) + diffLayerNodesMeter = metrics.NewRegisteredMeter("pathdb/diff/nodes", nil) + + historyBuildTimeMeter = metrics.NewRegisteredTimer("pathdb/history/time", nil) + historyDataBytesMeter = metrics.NewRegisteredMeter("pathdb/history/bytes/data", nil) + historyIndexBytesMeter = metrics.NewRegisteredMeter("pathdb/history/bytes/index", nil) +) diff --git a/trie/triedb/pathdb/nodebuffer.go b/trie/triedb/pathdb/nodebuffer.go new file mode 100644 index 0000000000..0a334baba2 --- /dev/null +++ b/trie/triedb/pathdb/nodebuffer.go @@ -0,0 +1,289 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "fmt" + "time" + + "github.com/VictoriaMetrics/fastcache" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/trie/trienode" +) + +// nodebuffer is a collection of modified trie nodes to aggregate the disk write. +// The content of the nodebuffer must be checked before diving into disk (since it basically is not-yet-written data). +// +// nodebuffer serves as an intermediate layer for storing changes before flushing to disk, +// making it easier for the diskLayer to manage the changes and construct new diskLayers when flushing. +type nodebuffer struct { + layers uint64 // The number of diff layers aggregated inside + size uint64 // The size of aggregated writes + limit uint64 // The maximum memory allowance in bytes + nodes map[common.Hash]map[string]*trienode.Node // The dirty node set, mapped by owner and path +} + +// newNodeBuffer initializes the node buffer with the provided nodes. +func newNodeBuffer(limit int, nodes map[common.Hash]map[string]*trienode.Node, layers uint64) *nodebuffer { + if nodes == nil { + nodes = make(map[common.Hash]map[string]*trienode.Node) + } + var size uint64 + for _, subset := range nodes { + for path, n := range subset { + size += uint64(len(n.Blob) + len(path)) + } + } + return &nodebuffer{ + layers: layers, + nodes: nodes, + size: size, + limit: uint64(limit), + } +} + +// node retrieves the trie node with given node info. +func (b *nodebuffer) node(owner common.Hash, path []byte, hash common.Hash) (*trienode.Node, error) { + subset, ok := b.nodes[owner] + if !ok { + return nil, nil + } + n, ok := subset[string(path)] + if !ok { + return nil, nil + } + if n.Hash != hash { + dirtyFalseMeter.Mark(1) + log.Error("Unexpected trie node in node buffer", "owner", owner, "path", path, "expect", hash, "got", n.Hash) + return nil, newUnexpectedNodeError("dirty", hash, n.Hash, owner, path) + } + return n, nil +} + +// commit merges the dirty nodes into the nodebuffer. This operation won't take +// the ownership of the nodes map which belongs to the bottom-most diff layer. +// It will just hold the node references from the given map which are safe to copy. +func (b *nodebuffer) commit(nodes map[common.Hash]map[string]*trienode.Node) *nodebuffer { + var ( + delta int64 + overwrite int64 + overwriteSize int64 + ) + for owner, subset := range nodes { + current, exist := b.nodes[owner] + if !exist { + // Allocate a new map for the subset instead of claiming it directly + // from the passed map to avoid potential concurrent map read/write. + // The nodes belong to original diff layer are still accessible even + // after merging, thus the ownership of nodes map should still belong + // to original layer and any mutation on it should be prevented. + current = make(map[string]*trienode.Node) + for path, n := range subset { + current[path] = n + delta += int64(len(n.Blob) + len(path)) + } + b.nodes[owner] = current + continue + } + for path, n := range subset { + if orig, exist := current[path]; !exist { + delta += int64(len(n.Blob) + len(path)) + } else { + delta += int64(len(n.Blob) - len(orig.Blob)) + overwrite++ + overwriteSize += int64(len(orig.Blob) + len(path)) + } + current[path] = n + } + b.nodes[owner] = current + } + b.updateSize(delta) + b.layers++ + gcNodesMeter.Mark(overwrite) + gcBytesMeter.Mark(overwriteSize) + return b +} + +// revert is the reverse operation of commit. It also merges the provided nodes +// into the nodebuffer, the difference is that the provided node set should +// revert the changes made by the last state transition. +func (b *nodebuffer) revert(db ethdb.KeyValueReader, nodes map[common.Hash]map[string]*trienode.Node) error { + // Short circuit if no embedded state transition to revert. + if b.layers == 0 { + return errStateUnrecoverable + } + b.layers-- + + // Reset the entire buffer if only a single transition left. + if b.layers == 0 { + b.reset() + return nil + } + var delta int64 + for owner, subset := range nodes { + current, ok := b.nodes[owner] + if !ok { + panic(fmt.Sprintf("non-existent subset (%x)", owner)) + } + for path, n := range subset { + orig, ok := current[path] + if !ok { + // There is a special case in MPT that one child is removed from + // a fullNode which only has two children, and then a new child + // with different position is immediately inserted into the fullNode. + // In this case, the clean child of the fullNode will also be + // marked as dirty because of node collapse and expansion. + // + // In case of database rollback, don't panic if this "clean" + // node occurs which is not present in buffer. + var nhash common.Hash + if owner == (common.Hash{}) { + _, nhash = rawdb.ReadAccountTrieNode(db, []byte(path)) + } else { + _, nhash = rawdb.ReadStorageTrieNode(db, owner, []byte(path)) + } + // Ignore the clean node in the case described above. + if nhash == n.Hash { + continue + } + panic(fmt.Sprintf("non-existent node (%x %v) blob: %v", owner, path, crypto.Keccak256Hash(n.Blob).Hex())) + } + current[path] = n + delta += int64(len(n.Blob)) - int64(len(orig.Blob)) + } + } + b.updateSize(delta) + return nil +} + +// updateSize updates the total cache size by the given delta. +func (b *nodebuffer) updateSize(delta int64) { + size := int64(b.size) + delta + if size >= 0 { + b.size = uint64(size) + return + } + s := b.size + b.size = 0 + log.Error("Invalid pathdb buffer size", "prev", common.StorageSize(s), "delta", common.StorageSize(delta)) +} + +// reset cleans up the disk cache. +func (b *nodebuffer) reset() { + b.layers = 0 + b.size = 0 + b.nodes = make(map[common.Hash]map[string]*trienode.Node) +} + +// empty returns an indicator if nodebuffer contains any state transition inside. +func (b *nodebuffer) empty() bool { + return b.layers == 0 +} + +// setSize sets the buffer size to the provided number, and invokes a flush +// operation if the current memory usage exceeds the new limit. +func (b *nodebuffer) setSize(size int, db ethdb.KeyValueStore, clean *fastcache.Cache, id uint64) error { + b.limit = uint64(size) + return b.flush(db, clean, id, false) +} + +// allocBatch returns a database batch with pre-allocated buffer. +func (b *nodebuffer) allocBatch(db ethdb.KeyValueStore) ethdb.Batch { + var metasize int + for owner, nodes := range b.nodes { + if owner == (common.Hash{}) { + metasize += len(nodes) * len(rawdb.TrieNodeAccountPrefix) // database key prefix + } else { + metasize += len(nodes) * (len(rawdb.TrieNodeStoragePrefix) + common.HashLength) // database key prefix + owner + } + } + return db.NewBatchWithSize((metasize + int(b.size)) * 11 / 10) // extra 10% for potential pebble internal stuff +} + +// flush persists the in-memory dirty trie node into the disk if the configured +// memory threshold is reached. Note, all data must be written atomically. +func (b *nodebuffer) flush(db ethdb.KeyValueStore, clean *fastcache.Cache, id uint64, force bool) error { + if b.size <= b.limit && !force { + return nil + } + // Ensure the target state id is aligned with the internal counter. + head := rawdb.ReadPersistentStateID(db) + if head+b.layers != id { + return fmt.Errorf("buffer layers (%d) cannot be applied on top of persisted state id (%d) to reach requested state id (%d)", b.layers, head, id) + } + var ( + start = time.Now() + batch = b.allocBatch(db) + ) + nodes := writeNodes(batch, b.nodes, clean) + rawdb.WritePersistentStateID(batch, id) + + // Flush all mutations in a single batch + size := batch.ValueSize() + if err := batch.Write(); err != nil { + return err + } + commitBytesMeter.Mark(int64(size)) + commitNodesMeter.Mark(int64(nodes)) + commitTimeTimer.UpdateSince(start) + log.Debug("Persisted pathdb nodes", "nodes", len(b.nodes), "bytes", common.StorageSize(size), "elapsed", common.PrettyDuration(time.Since(start))) + b.reset() + return nil +} + +// writeNodes writes the trie nodes into the provided database batch. +// Note this function will also inject all the newly written nodes +// into clean cache. +func writeNodes(batch ethdb.Batch, nodes map[common.Hash]map[string]*trienode.Node, clean *fastcache.Cache) (total int) { + for owner, subset := range nodes { + for path, n := range subset { + if n.IsDeleted() { + if owner == (common.Hash{}) { + rawdb.DeleteAccountTrieNode(batch, []byte(path)) + } else { + rawdb.DeleteStorageTrieNode(batch, owner, []byte(path)) + } + if clean != nil { + clean.Del(cacheKey(owner, []byte(path))) + } + } else { + if owner == (common.Hash{}) { + rawdb.WriteAccountTrieNode(batch, []byte(path), n.Blob) + } else { + rawdb.WriteStorageTrieNode(batch, owner, []byte(path), n.Blob) + } + if clean != nil { + clean.Set(cacheKey(owner, []byte(path)), n.Blob) + } + } + } + total += len(subset) + } + return total +} + +// cacheKey constructs the unique key of clean cache. +func cacheKey(owner common.Hash, path []byte) []byte { + if owner == (common.Hash{}) { + return path + } + return append(owner.Bytes(), path...) +} diff --git a/trie/triedb/pathdb/testutils.go b/trie/triedb/pathdb/testutils.go new file mode 100644 index 0000000000..674f52fd16 --- /dev/null +++ b/trie/triedb/pathdb/testutils.go @@ -0,0 +1,157 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "bytes" + "fmt" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/trie/trienode" + "github.com/ethereum/go-ethereum/trie/triestate" + "golang.org/x/exp/slices" +) + +// testHasher is a test utility for computing root hash of a batch of state +// elements. The hash algorithm is to sort all the elements in lexicographical +// order, concat the key and value in turn, and perform hash calculation on +// the concatenated bytes. Except the root hash, a nodeset will be returned +// once Commit is called, which contains all the changes made to hasher. +type testHasher struct { + owner common.Hash // owner identifier + root common.Hash // original root + dirties map[common.Hash][]byte // dirty states + cleans map[common.Hash][]byte // clean states +} + +// newTestHasher constructs a hasher object with provided states. +func newTestHasher(owner common.Hash, root common.Hash, cleans map[common.Hash][]byte) (*testHasher, error) { + if cleans == nil { + cleans = make(map[common.Hash][]byte) + } + if got, _ := hash(cleans); got != root { + return nil, fmt.Errorf("state root mismatched, want: %x, got: %x", root, got) + } + return &testHasher{ + owner: owner, + root: root, + dirties: make(map[common.Hash][]byte), + cleans: cleans, + }, nil +} + +// Get returns the value for key stored in the trie. +func (h *testHasher) TryGet(key []byte) ([]byte, error) { + hash := common.BytesToHash(key) + val, ok := h.dirties[hash] + if ok { + return val, nil + } + return h.cleans[hash], nil +} + +// Update associates key with value in the trie. +func (h *testHasher) TryUpdate(key, value []byte) error { + h.dirties[common.BytesToHash(key)] = common.CopyBytes(value) + return nil +} + +// Delete removes any existing value for key from the trie. +func (h *testHasher) TryDelete(key []byte) error { + h.dirties[common.BytesToHash(key)] = nil + return nil +} + +// Commit computes the new hash of the states and returns the set with all +// state changes. +func (h *testHasher) Commit(collectLeaf bool) (common.Hash, *trienode.NodeSet, error) { + var ( + nodes = make(map[common.Hash][]byte) + set = trienode.NewNodeSet(h.owner) + ) + for hash, val := range h.cleans { + nodes[hash] = val + } + for hash, val := range h.dirties { + nodes[hash] = val + if bytes.Equal(val, h.cleans[hash]) { + continue + } + if len(val) == 0 { + set.AddNode(hash.Bytes(), trienode.NewDeleted()) + } else { + set.AddNode(hash.Bytes(), trienode.New(crypto.Keccak256Hash(val), val)) + } + } + root, blob := hash(nodes) + + // Include the dirty root node as well. + if root != types.EmptyRootHash && root != h.root { + set.AddNode(nil, trienode.New(root, blob)) + } + if root == types.EmptyRootHash && h.root != types.EmptyRootHash { + set.AddNode(nil, trienode.NewDeleted()) + } + return root, set, nil +} + +// hash performs the hash computation upon the provided states. +func hash(states map[common.Hash][]byte) (common.Hash, []byte) { + var hs []common.Hash + for hash := range states { + hs = append(hs, hash) + } + // Sort hashes. + slices.SortFunc(hs, common.Hash.Cmp) + + var input []byte + for _, hash := range hs { + if len(states[hash]) == 0 { + continue + } + input = append(input, hash.Bytes()...) + input = append(input, states[hash]...) + } + if len(input) == 0 { + return types.EmptyRootHash, nil + } + return crypto.Keccak256Hash(input), input +} + +type hashLoader struct { + accounts map[common.Hash][]byte + storages map[common.Hash]map[common.Hash][]byte +} + +func newHashLoader(accounts map[common.Hash][]byte, storages map[common.Hash]map[common.Hash][]byte) *hashLoader { + return &hashLoader{ + accounts: accounts, + storages: storages, + } +} + +// OpenTrie opens the main account trie. +func (l *hashLoader) OpenTrie(root common.Hash) (triestate.Trie, error) { + return newTestHasher(common.Hash{}, root, l.accounts) +} + +// OpenStorageTrie opens the storage trie of an account. +func (l *hashLoader) OpenStorageTrie(stateRoot common.Hash, addrHash, root common.Hash) (triestate.Trie, error) { + return newTestHasher(addrHash, root, l.storages[addrHash]) +} diff --git a/trie/trienode/node.go b/trie/trienode/node.go new file mode 100644 index 0000000000..bffa5a90f1 --- /dev/null +++ b/trie/trienode/node.go @@ -0,0 +1,198 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see + +package trienode + +import ( + "fmt" + "sort" + "strings" + + "github.com/ethereum/go-ethereum/common" +) + +// Node is a wrapper which contains the encoded blob of the trie node and its +// unique hash identifier. It is general enough that can be used to represent +// trie nodes corresponding to different trie implementations. +type Node struct { + Hash common.Hash // Node hash, empty for deleted node + Blob []byte // Encoded node blob, nil for the deleted node +} + +// Size returns the total memory size used by this node. +func (n *Node) Size() int { + return len(n.Blob) + common.HashLength +} + +// IsDeleted returns the indicator if the node is marked as deleted. +func (n *Node) IsDeleted() bool { + return n.Hash == (common.Hash{}) +} + +// New constructs a node with provided node information. +func New(hash common.Hash, blob []byte) *Node { + return &Node{Hash: hash, Blob: blob} +} + +// NewDeleted constructs a node which is deleted. +func NewDeleted() *Node { return New(common.Hash{}, nil) } + +// leaf represents a trie leaf node +type leaf struct { + Blob []byte // raw blob of leaf + Parent common.Hash // the hash of parent node +} + +// NodeSet contains a set of nodes collected during the commit operation. +// Each node is keyed by path. It's not thread-safe to use. +type NodeSet struct { + Owner common.Hash + Leaves []*leaf + Nodes map[string]*Node + updates int // the count of updated and inserted nodes + deletes int // the count of deleted nodes +} + +// NewNodeSet initializes a node set. The owner is zero for the account trie and +// the owning account address hash for storage tries. +func NewNodeSet(owner common.Hash) *NodeSet { + return &NodeSet{ + Owner: owner, + Nodes: make(map[string]*Node), + } +} + +// ForEachWithOrder iterates the nodes with the order from bottom to top, +// right to left, nodes with the longest path will be iterated first. +func (set *NodeSet) ForEachWithOrder(callback func(path string, n *Node)) { + var paths sort.StringSlice + for path := range set.Nodes { + paths = append(paths, path) + } + // Bottom-up, longest path first + sort.Sort(sort.Reverse(paths)) + for _, path := range paths { + callback(path, set.Nodes[path]) + } +} + +// AddNode adds the provided node into set. +func (set *NodeSet) AddNode(path []byte, n *Node) { + if n.IsDeleted() { + set.deletes += 1 + } else { + set.updates += 1 + } + set.Nodes[string(path)] = n +} + +// Merge adds a set of nodes into the set. +func (set *NodeSet) Merge(owner common.Hash, nodes map[string]*Node) error { + if set.Owner != owner { + return fmt.Errorf("nodesets belong to different owner are not mergeable %x-%x", set.Owner, owner) + } + for path, node := range nodes { + prev, ok := set.Nodes[path] + if ok { + // overwrite happens, revoke the counter + if prev.IsDeleted() { + set.deletes -= 1 + } else { + set.updates -= 1 + } + } + set.AddNode([]byte(path), node) + } + return nil +} + +// AddLeaf adds the provided leaf node into set. +func (set *NodeSet) AddLeaf(parent common.Hash, blob []byte) { + set.Leaves = append(set.Leaves, &leaf{Blob: blob, Parent: parent}) +} + +// Size returns the number of dirty nodes in set. +func (set *NodeSet) Size() (int, int) { + return set.updates, set.deletes +} + +// Hashes returns the hashes of all updated nodes. +func (set *NodeSet) Hashes() []common.Hash { + var ret []common.Hash + for _, node := range set.Nodes { + ret = append(ret, node.Hash) + } + return ret +} + +// Summary returns a string-representation of the NodeSet. +func (set *NodeSet) Summary() string { + var out = new(strings.Builder) + fmt.Fprintf(out, "nodeset owner: %v\n", set.Owner) + if set.Nodes != nil { + for path, n := range set.Nodes { + // Deletion + + if n.IsDeleted() { + fmt.Fprintf(out, " [-]: %x\n", path) + continue + } + // Insertion or update + fmt.Fprintf(out, " [+/*]: %x -> %v \n", path, n.Hash) + } + } + for _, n := range set.Leaves { + fmt.Fprintf(out, "[leaf]: %v\n", n) + } + return out.String() +} + +// MergedNodeSet represents a merged node set for a group of tries. +type MergedNodeSet struct { + Sets map[common.Hash]*NodeSet +} + +// NewMergedNodeSet initializes an empty merged set. +func NewMergedNodeSet() *MergedNodeSet { + return &MergedNodeSet{Sets: make(map[common.Hash]*NodeSet)} +} + +// NewWithNodeSet constructs a merged nodeset with the provided single set. +func NewWithNodeSet(set *NodeSet) *MergedNodeSet { + merged := NewMergedNodeSet() + merged.Merge(set) + return merged +} + +// Merge merges the provided dirty nodes of a trie into the set. The assumption +// is held that no duplicated set belonging to the same trie will be merged twice. +func (set *MergedNodeSet) Merge(other *NodeSet) error { + subset, present := set.Sets[other.Owner] + if present { + return subset.Merge(other.Owner, other.Nodes) + } + set.Sets[other.Owner] = other + return nil +} + +// Flatten returns a two-dimensional map for internal nodes. +func (set *MergedNodeSet) Flatten() map[common.Hash]map[string]*Node { + nodes := make(map[common.Hash]map[string]*Node) + for owner, set := range set.Sets { + nodes[owner] = set.Nodes + } + return nodes +} diff --git a/trie/triestate/state.go b/trie/triestate/state.go new file mode 100644 index 0000000000..2b2f3720d9 --- /dev/null +++ b/trie/triestate/state.go @@ -0,0 +1,278 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see + +package triestate + +import ( + "errors" + "fmt" + "sync" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/trienode" + "golang.org/x/crypto/sha3" +) + +// Trie is an Ethereum state trie, can be implemented by Ethereum Merkle Patricia + +// tree or Verkle tree. +type Trie interface { + // Get returns the value for key stored in the trie. + TryGet(key []byte) ([]byte, error) + + // Update associates key with value in the trie. + TryUpdate(key, value []byte) error + + // Delete removes any existing value for key from the trie. + TryDelete(key []byte) error + + // Commit the trie and returns a set of dirty nodes generated along with + // the new root hash. + Commit(collectLeaf bool) (common.Hash, *trienode.NodeSet, error) +} + +// TrieLoader wraps functions to load tries. +type TrieLoader interface { + // OpenTrie opens the main account trie. + OpenTrie(root common.Hash) (Trie, error) + + // OpenStorageTrie opens the storage trie of an account. + OpenStorageTrie(stateRoot common.Hash, addrHash, root common.Hash) (Trie, error) +} + +// Set represents a collection of mutated states during a state transition. +// The value refers to the original content of state before the transition +// is made. Nil means that the state was not present previously. +type Set struct { + Accounts map[common.Address][]byte // Mutated account set, nil means the account was not present + Storages map[common.Address]map[common.Hash][]byte // Mutated storage set, nil means the slot was not present + Incomplete map[common.Address]struct{} // Indicator whether the storage slot is incomplete due to large deletion + size common.StorageSize // Approximate size of set +} + +// New constructs the state set with provided data. +func New(accounts map[common.Address][]byte, storages map[common.Address]map[common.Hash][]byte, incomplete map[common.Address]struct{}) *Set { + + return &Set{ + Accounts: accounts, + Storages: storages, + Incomplete: incomplete, + } +} + +// Size returns the approximate memory size occupied by the set. +func (s *Set) Size() common.StorageSize { + if s.size != 0 { + return s.size + } + for _, account := range s.Accounts { + s.size += common.StorageSize(common.AddressLength + len(account)) + } + for _, slots := range s.Storages { + for _, val := range slots { + s.size += common.StorageSize(common.HashLength + len(val)) + } + s.size += common.StorageSize(common.AddressLength) + } + s.size += common.StorageSize(common.AddressLength * len(s.Incomplete)) + return s.size +} + +// context wraps all fields for executing state diffs. +type context struct { + prevRoot common.Hash + postRoot common.Hash + accounts map[common.Address][]byte + storages map[common.Address]map[common.Hash][]byte + accountTrie Trie + nodes *trienode.MergedNodeSet +} + +// Apply traverses the provided state diffs, apply them in the associated +// post-state and return the generated dirty trie nodes. The state can be +// loaded via the provided trie loader. +func Apply(prevRoot common.Hash, postRoot common.Hash, accounts map[common.Address][]byte, storages map[common.Address]map[common.Hash][]byte, loader TrieLoader) (map[common.Hash]map[string]*trienode.Node, error) { + tr, err := loader.OpenTrie(postRoot) + if err != nil { + return nil, err + } + ctx := &context{ + prevRoot: prevRoot, + postRoot: postRoot, + accounts: accounts, + storages: storages, + accountTrie: tr, + nodes: trienode.NewMergedNodeSet(), + } + for addr, account := range accounts { + var err error + if len(account) == 0 { + err = deleteAccount(ctx, loader, addr) + } else { + err = updateAccount(ctx, loader, addr) + } + if err != nil { + return nil, fmt.Errorf("failed to revert state, err: %w", err) + } + } + root, result, err := tr.Commit(false) + if err != nil { + return nil, err + } + if root != prevRoot { + return nil, fmt.Errorf("failed to revert state, want %#x, got %#x", prevRoot, root) + } + if err := ctx.nodes.Merge(result); err != nil { + return nil, err + } + return ctx.nodes.Flatten(), nil +} + +// updateAccount the account was present in prev-state, and may or may not +// existent in post-state. Apply the reverse diff and verify if the storage +// root matches the one in prev-state account. +func updateAccount(ctx *context, loader TrieLoader, addr common.Address) error { + // The account was present in prev-state, decode it from the + // 'slim-rlp' format bytes. + h := newHasher() + defer h.release() + + addrHash := h.hash(addr.Bytes()) + prev, err := types.FullAccount(ctx.accounts[addr]) + if err != nil { + return err + } + // The account may or may not existent in post-state, try to + // load it and decode if it's found. + blob, err := ctx.accountTrie.TryGet(addrHash.Bytes()) + if err != nil { + return err + } + post := types.NewEmptyStateAccount() + if len(blob) != 0 { + if err := rlp.DecodeBytes(blob, &post); err != nil { + return err + } + } + // Apply all storage changes into the post-state storage trie. + st, err := loader.OpenStorageTrie(ctx.postRoot, addrHash, post.Root) + if err != nil { + return err + } + for key, val := range ctx.storages[addr] { + var err error + if len(val) == 0 { + err = st.TryDelete(key.Bytes()) + } else { + err = st.TryUpdate(key.Bytes(), val) + } + if err != nil { + return err + } + } + root, result, err := st.Commit(false) + if err != nil { + return err + } + if root != prev.Root { + return errors.New("failed to reset storage trie") + } + // The returned set can be nil if storage trie is not changed + // at all. + if result != nil { + if err := ctx.nodes.Merge(result); err != nil { + return err + } + } + // Write the prev-state account into the main trie + full, err := rlp.EncodeToBytes(prev) + if err != nil { + return err + } + return ctx.accountTrie.TryUpdate(addrHash.Bytes(), full) +} + +// deleteAccount the account was not present in prev-state, and is expected +// to be existent in post-state. Apply the reverse diff and verify if the +// account and storage is wiped out correctly. +func deleteAccount(ctx *context, loader TrieLoader, addr common.Address) error { + // The account must be existent in post-state, load the account. + h := newHasher() + defer h.release() + + addrHash := h.hash(addr.Bytes()) + blob, err := ctx.accountTrie.TryGet(addrHash.Bytes()) + if err != nil { + return err + } + if len(blob) == 0 { + return fmt.Errorf("account is non-existent %#x", addrHash) + } + var post types.StateAccount + if err := rlp.DecodeBytes(blob, &post); err != nil { + return err + } + st, err := loader.OpenStorageTrie(ctx.postRoot, addrHash, post.Root) + if err != nil { + return err + } + for key, val := range ctx.storages[addr] { + if len(val) != 0 { + return errors.New("expect storage deletion") + } + if err := st.TryDelete(key.Bytes()); err != nil { + return err + } + } + root, result, err := st.Commit(false) + if err != nil { + return err + } + if root != types.EmptyRootHash { + return errors.New("failed to clear storage trie") + } + // The returned set can be nil if storage trie is not changed + // at all. + if result != nil { + if err := ctx.nodes.Merge(result); err != nil { + return err + } + } + // Delete the post-state account from the main trie. + return ctx.accountTrie.TryDelete(addrHash.Bytes()) +} + +// hasher is used to compute the sha256 hash of the provided data. +type hasher struct{ sha crypto.KeccakState } + +var hasherPool = sync.Pool{ + New: func() interface{} { return &hasher{sha: sha3.NewLegacyKeccak256().(crypto.KeccakState)} }, +} + +func newHasher() *hasher { + return hasherPool.Get().(*hasher) +} + +func (h *hasher) hash(data []byte) common.Hash { + return crypto.HashData(h.sha, data) +} + +func (h *hasher) release() { + hasherPool.Put(h) +}