From 4d9489a765fc3d9bcbd96e7fece087bd8f6723d7 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Sat, 3 Dec 2022 15:59:05 +0100 Subject: [PATCH] feat: daemon: automatically set GOMEMLIMIT if it is unset I have a rather big collection of profiles where someone claims that Kubo is ooming on XGiB. Then you open the profile and it is using half of that, this is due to the default GOGC=200%. That means, go will only run the GC once it's twice as being as the previous alive set. This situation happen more than it should / almost always because many parts of Kubo are memory garbage factories. Adding a GOMEMLIMIT helps by trading off more and more CPU running GC more often when memory is about to run out, it's not healthy to run at the edge of the limit because the GC will continously run killing performance. So this doesn't double the effective memory usable by Kubo, but we should expect to be able to use ~1.5x~1.75x before performance drastically falling off. Closes: #8798 --- cmd/ipfs/daemon.go | 51 +++++++++++++++++++++++++++++++---- go.mod | 2 +- test/sharness/t0060-daemon.sh | 4 ++- 3 files changed, 50 insertions(+), 7 deletions(-) diff --git a/cmd/ipfs/daemon.go b/cmd/ipfs/daemon.go index 1e03a8264a8e..73b982d718a5 100644 --- a/cmd/ipfs/daemon.go +++ b/cmd/ipfs/daemon.go @@ -4,17 +4,23 @@ import ( "errors" _ "expvar" "fmt" + "math" "net" "net/http" _ "net/http/pprof" "os" "runtime" + "runtime/debug" "sort" "sync" "time" multierror "github.com/hashicorp/go-multierror" + "github.com/dustin/go-humanize" + options "github.com/ipfs/boxo/coreiface/options" + cmds "github.com/ipfs/go-ipfs-cmds" + mprome "github.com/ipfs/go-metrics-prometheus" version "github.com/ipfs/kubo" utilmain "github.com/ipfs/kubo/cmd/ipfs/util" oldcmds "github.com/ipfs/kubo/commands" @@ -30,16 +36,13 @@ import ( fsrepo "github.com/ipfs/kubo/repo/fsrepo" "github.com/ipfs/kubo/repo/fsrepo/migrations" "github.com/ipfs/kubo/repo/fsrepo/migrations/ipfsfetcher" + goprocess "github.com/jbenet/goprocess" p2pcrypto "github.com/libp2p/go-libp2p/core/crypto" pnet "github.com/libp2p/go-libp2p/core/pnet" sockets "github.com/libp2p/go-socket-activation" - - options "github.com/ipfs/boxo/coreiface/options" - cmds "github.com/ipfs/go-ipfs-cmds" - mprome "github.com/ipfs/go-metrics-prometheus" - goprocess "github.com/jbenet/goprocess" ma "github.com/multiformats/go-multiaddr" manet "github.com/multiformats/go-multiaddr/net" + "github.com/pbnjay/memory" prometheus "github.com/prometheus/client_golang/prometheus" promauto "github.com/prometheus/client_golang/prometheus/promauto" ) @@ -197,6 +200,42 @@ func defaultMux(path string) corehttp.ServeOption { } } +// setMemoryLimit a soft memory limit to enforce running the GC more often when +// we are about to run out. +// This allows to recoop memory when it's about to run out and cancel the +// doubled memory footprint most go programs experience, at the cost of more CPU +// usage in memory tight conditions. This does not increase CPU usage when memory +// is plenty available, it will use more CPU and continue to run in cases where Kubo +// would OOM. +func setMemoryLimit() { + // From the STD documentation: + // A negative input does not adjust the limit, and allows for retrieval of the currently set memory limit. + if currentMemoryLimit := debug.SetMemoryLimit(-1); currentMemoryLimit != math.MaxInt64 { + fmt.Printf("GOMEMLIMIT already set to %s, leaving as-is.\n", humanize.IBytes(uint64(currentMemoryLimit))) + // only update the memory limit if it wasn't set with GOMEMLIMIT already + return + } + + // this is a proportional negative-rate increase curve fitted to thoses points: + // 0GiB -> 0GiB + // 4GiB -> 0.5GiB + // 6GiB -> 0.75GiB + // 12GiB -> 1GiB + // 256GiB -> 2GiB + totalMemory := memory.TotalMemory() + memoryMargin := int64(213865e4 - 209281e4*math.Pow(math.E, -588918e-16*float64(totalMemory))) + // if memory is extremely small this approximation / is useless + if memoryMargin <= 0 { + // then don't bother setting a limit and rely on GOGC + fmt.Println("TotalMemory is too tight, continuing without GOMEMLIMIT.") + return + } + + remainingMemory := totalMemory - uint64(memoryMargin) + debug.SetMemoryLimit(int64(remainingMemory)) + fmt.Printf("Set GOMEMLIMIT to %s.\n", humanize.IBytes(remainingMemory)) +} + func daemonFunc(req *cmds.Request, re cmds.ResponseEmitter, env cmds.Environment) (_err error) { // Inject metrics before we do anything err := mprome.Inject() @@ -219,6 +258,8 @@ func daemonFunc(req *cmds.Request, re cmds.ResponseEmitter, env cmds.Environment // print the ipfs version printVersion() + setMemoryLimit() + managefd, _ := req.Options[adjustFDLimitKwd].(bool) if managefd { if _, _, err := utilmain.ManageFdLimit(); err != nil { diff --git a/go.mod b/go.mod index 159bbd3e4fd3..82d239cbaa87 100644 --- a/go.mod +++ b/go.mod @@ -234,4 +234,4 @@ require ( nhooyr.io/websocket v1.8.7 // indirect ) -go 1.18 +go 1.19 diff --git a/test/sharness/t0060-daemon.sh b/test/sharness/t0060-daemon.sh index f43708b1df53..d005779dbfd7 100755 --- a/test/sharness/t0060-daemon.sh +++ b/test/sharness/t0060-daemon.sh @@ -93,7 +93,9 @@ test_expect_success "ipfs daemon output looks good" ' echo "WebUI: http://'$API_ADDR'/webui" >>expected_daemon && echo "Gateway server listening on '$GWAY_MADDR'" >>expected_daemon && echo "Daemon is ready" >>expected_daemon && - test_cmp expected_daemon actual_daemon + grep -q "^Set GOMEMLIMIT to" actual_daemon && + grep -v "^Set GOMEMLIMIT to" actual_daemon > actual_daemon_filtered && + test_cmp expected_daemon actual_daemon_filtered ' test_expect_success ".ipfs/ has been created" '