From dac1fdd8c03699330407d01ebb26bcdb675fff0f Mon Sep 17 00:00:00 2001 From: Marten Seemann Date: Mon, 21 Nov 2022 11:40:46 +1300 Subject: [PATCH 1/6] swarm: add very basic metrics for opening and closing connections --- p2p/net/swarm/swarm_conn.go | 3 + p2p/net/swarm/swarm_dial.go | 4 + p2p/net/swarm/swarm_listen.go | 1 + p2p/net/swarm/swarm_metrics.go | 152 +++++++++++++++++++++++++++++++++ p2p/net/upgrader/upgrader.go | 6 +- 5 files changed, 163 insertions(+), 3 deletions(-) create mode 100644 p2p/net/swarm/swarm_metrics.go diff --git a/p2p/net/swarm/swarm_conn.go b/p2p/net/swarm/swarm_conn.go index 4de2727f80..f85cd907cb 100644 --- a/p2p/net/swarm/swarm_conn.go +++ b/p2p/net/swarm/swarm_conn.go @@ -60,6 +60,9 @@ func (c *Conn) Close() error { } func (c *Conn) doClose() { + recordConnectionClosed(c.stat.Direction, c.ConnState()) + recordConnectionDuration(c.stat.Direction, time.Since(c.stat.Stats.Opened), c.ConnState()) + c.swarm.removeConn(c) // Prevent new streams from opening. diff --git a/p2p/net/swarm/swarm_dial.go b/p2p/net/swarm/swarm_dial.go index 29703f7747..09e296ae4b 100644 --- a/p2p/net/swarm/swarm_dial.go +++ b/p2p/net/swarm/swarm_dial.go @@ -490,11 +490,15 @@ func (s *Swarm) dialAddr(ctx context.Context, p peer.ID, addr ma.Multiaddr) (tra return nil, ErrNoTransport } + start := time.Now() connC, err := tpt.Dial(ctx, addr, p) if err != nil { + recordDialFailed(addr, err) return nil, err } canonicallog.LogPeerStatus(100, connC.RemotePeer(), connC.RemoteMultiaddr(), "connection_status", "established", "dir", "outbound") + recordConnectionOpened(network.DirOutbound, connC.RemotePublicKey(), connC.ConnState()) + recordHandshakeLatency(time.Since(start), connC.ConnState()) // Trust the transport? Yeah... right. if connC.RemotePeer() != p { diff --git a/p2p/net/swarm/swarm_listen.go b/p2p/net/swarm/swarm_listen.go index 9c5394d438..0c905075ee 100644 --- a/p2p/net/swarm/swarm_listen.go +++ b/p2p/net/swarm/swarm_listen.go @@ -130,6 +130,7 @@ func (s *Swarm) AddListenAddr(a ma.Multiaddr) error { return } canonicallog.LogPeerStatus(100, c.RemotePeer(), c.RemoteMultiaddr(), "connection_status", "established", "dir", "inbound") + recordConnectionOpened(network.DirInbound, c.RemotePublicKey(), c.ConnState()) log.Debugf("swarm listener accepted connection: %s <-> %s", c.LocalMultiaddr(), c.RemoteMultiaddr()) s.refs.Add(1) diff --git a/p2p/net/swarm/swarm_metrics.go b/p2p/net/swarm/swarm_metrics.go new file mode 100644 index 0000000000..1193bb26d3 --- /dev/null +++ b/p2p/net/swarm/swarm_metrics.go @@ -0,0 +1,152 @@ +package swarm + +import ( + "context" + "errors" + "fmt" + "net" + "strings" + "time" + + "github.com/libp2p/go-libp2p/core/crypto" + "github.com/libp2p/go-libp2p/core/network" + + ma "github.com/multiformats/go-multiaddr" + + "github.com/prometheus/client_golang/prometheus" +) + +const metricNamespace = "libp2p_swarm_" + +var ( + connsOpened = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: metricNamespace + "connections_opened_total", + Help: "Connections Opened", + }, + []string{"dir", "transport", "security", "muxer"}, + ) + keyTypes = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: metricNamespace + "key_types_total", + Help: "key type", + }, + []string{"dir", "key_type"}, + ) + connsClosed = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: metricNamespace + "connections_closed_total", + Help: "Connections Closed", + }, + []string{"dir", "transport", "security", "muxer"}, + ) + dialError = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: metricNamespace + "dial_errors_total", + Help: "Dial Error", + }, + []string{"error"}, + ) + connDuration = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: metricNamespace + "connection_duration_seconds", + Help: "Duration of a Connection", + Buckets: prometheus.ExponentialBuckets(1.0/16, 2, 25), // up to 24 days + }, + []string{"dir", "transport", "security", "muxer"}, + ) + connHandshakeLatency = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: metricNamespace + "handshake_latency_seconds", + Help: "Duration of the libp2p Handshake", + Buckets: prometheus.ExponentialBuckets(0.001, 1.3, 35), + }, + []string{"transport", "security", "muxer"}, + ) +) + +func init() { + prometheus.MustRegister(connsOpened, keyTypes, connsClosed, dialError, connDuration, connHandshakeLatency) +} + +var transports = [...]int{ma.P_CIRCUIT, ma.P_WEBRTC, ma.P_WEBTRANSPORT, ma.P_QUIC, ma.P_QUIC_V1, ma.P_WSS, ma.P_WS, ma.P_TCP} + +func getDirection(dir network.Direction) string { + switch dir { + case network.DirOutbound: + return "outbound" + case network.DirInbound: + return "inbound" + default: + return "unknown" + } +} + +func appendConnectionState(tags []string, cs network.ConnectionState) []string { + if cs.Transport == "" { + // This shouldn't happen, unless the transport doesn't properly set the Transport field in the ConnectionState. + tags = append(tags, "unknown") + } else { + tags = append(tags, cs.Transport) + } + // These might be empty, depending on the transport. + // For example, QUIC doesn't set security nor muxer. + tags = append(tags, cs.Security) + tags = append(tags, cs.StreamMultiplexer) + return tags +} + +func recordConnectionOpened(dir network.Direction, p crypto.PubKey, cs network.ConnectionState) { + tags := make([]string, 0, 4) + tags = append(tags, getDirection(dir)) + tags = appendConnectionState(tags, cs) + connsOpened.WithLabelValues(tags...).Inc() + keyTypes.WithLabelValues(getDirection(dir), p.Type().String()).Inc() +} + +func recordConnectionClosed(dir network.Direction, cs network.ConnectionState) { + tags := make([]string, 0, 4) + tags = append(tags, getDirection(dir)) + tags = appendConnectionState(tags, cs) + connsClosed.WithLabelValues(tags...).Inc() +} + +func recordConnectionDuration(dir network.Direction, t time.Duration, cs network.ConnectionState) { + tags := make([]string, 0, 4) + tags = append(tags, getDirection(dir)) + tags = appendConnectionState(tags, cs) + connDuration.WithLabelValues(tags...).Observe(t.Seconds()) +} + +func recordHandshakeLatency(t time.Duration, cs network.ConnectionState) { + tags := make([]string, 0, 3) + tags = appendConnectionState(tags, cs) + connHandshakeLatency.WithLabelValues(tags...).Observe(t.Seconds()) +} + +func recordDialFailed(addr ma.Multiaddr, err error) { + var transport string + for _, p := range transports { + if _, err := addr.ValueForProtocol(p); err == nil { + transport = ma.ProtocolWithCode(p).Name + break + } + } + e := "other" + if errors.Is(err, context.Canceled) { + e = "canceled" + } else if errors.Is(err, context.DeadlineExceeded) { + e = "deadline" + } else { + nerr, ok := err.(net.Error) + if ok && nerr.Timeout() { + e = "timeout" + } else if strings.Contains(err.Error(), "connect: connection refused") { + e = "connection refused" + } + } + if e == "other" { + fmt.Printf("transport: %s, category: %s (orig: %s)\n", transport, e, err) + } + dialError.WithLabelValues(e).Inc() +} diff --git a/p2p/net/upgrader/upgrader.go b/p2p/net/upgrader/upgrader.go index 5a69efb0bd..38c6faea4c 100644 --- a/p2p/net/upgrader/upgrader.go +++ b/p2p/net/upgrader/upgrader.go @@ -144,7 +144,7 @@ func (u *upgrader) upgrade(ctx context.Context, t transport.Transport, maconn ma pconn, err := pnet.NewProtectedConn(u.psk, conn) if err != nil { conn.Close() - return nil, fmt.Errorf("failed to setup private network protector: %s", err) + return nil, fmt.Errorf("failed to setup private network protector: %w", err) } conn = pconn } else if ipnet.ForcePrivateNetwork { @@ -155,7 +155,7 @@ func (u *upgrader) upgrade(ctx context.Context, t transport.Transport, maconn ma sconn, security, server, err := u.setupSecurity(ctx, conn, p, dir) if err != nil { conn.Close() - return nil, fmt.Errorf("failed to negotiate security protocol: %s", err) + return nil, fmt.Errorf("failed to negotiate security protocol: %w", err) } // call the connection gater, if one is registered. @@ -182,7 +182,7 @@ func (u *upgrader) upgrade(ctx context.Context, t transport.Transport, maconn ma muxer, smconn, err := u.setupMuxer(ctx, sconn, server, connScope.PeerScope()) if err != nil { sconn.Close() - return nil, fmt.Errorf("failed to negotiate stream multiplexer: %s", err) + return nil, fmt.Errorf("failed to negotiate stream multiplexer: %w", err) } tc := &transportConn{ From 18b94d4e9bfb7f48b6fb2a39a4a37c235fbea0e2 Mon Sep 17 00:00:00 2001 From: Marten Seemann Date: Mon, 2 Jan 2023 14:38:54 +1300 Subject: [PATCH 2/6] swarm: use a sync.Pool to make metrics collection allocation-free --- p2p/net/swarm/swarm_metrics.go | 55 ++++++++++++++++++++--------- p2p/net/swarm/swarm_metrics_test.go | 31 ++++++++++++++++ 2 files changed, 70 insertions(+), 16 deletions(-) create mode 100644 p2p/net/swarm/swarm_metrics_test.go diff --git a/p2p/net/swarm/swarm_metrics.go b/p2p/net/swarm/swarm_metrics.go index 1193bb26d3..c65622f815 100644 --- a/p2p/net/swarm/swarm_metrics.go +++ b/p2p/net/swarm/swarm_metrics.go @@ -6,6 +6,7 @@ import ( "fmt" "net" "strings" + "sync" "time" "github.com/libp2p/go-libp2p/core/crypto" @@ -69,6 +70,19 @@ func init() { prometheus.MustRegister(connsOpened, keyTypes, connsClosed, dialError, connDuration, connHandshakeLatency) } +var stringPool = sync.Pool{New: func() any { + s := make([]string, 0, 8) + return &s +}} + +func getStringSlice() *[]string { + s := stringPool.Get().(*[]string) + *s = (*s)[:0] + return s +} + +func putStringSlice(s *[]string) { stringPool.Put(s) } + var transports = [...]int{ma.P_CIRCUIT, ma.P_WEBRTC, ma.P_WEBTRANSPORT, ma.P_QUIC, ma.P_QUIC_V1, ma.P_WSS, ma.P_WS, ma.P_TCP} func getDirection(dir network.Direction) string { @@ -97,31 +111,40 @@ func appendConnectionState(tags []string, cs network.ConnectionState) []string { } func recordConnectionOpened(dir network.Direction, p crypto.PubKey, cs network.ConnectionState) { - tags := make([]string, 0, 4) - tags = append(tags, getDirection(dir)) - tags = appendConnectionState(tags, cs) - connsOpened.WithLabelValues(tags...).Inc() - keyTypes.WithLabelValues(getDirection(dir), p.Type().String()).Inc() + tags := getStringSlice() + defer putStringSlice(tags) + + *tags = append(*tags, getDirection(dir)) + *tags = appendConnectionState(*tags, cs) + connsOpened.WithLabelValues(*tags...).Inc() + + *tags = (*tags)[:0] + *tags = append(*tags, getDirection(dir)) + *tags = append(*tags, p.Type().String()) + keyTypes.WithLabelValues(*tags...).Inc() } func recordConnectionClosed(dir network.Direction, cs network.ConnectionState) { - tags := make([]string, 0, 4) - tags = append(tags, getDirection(dir)) - tags = appendConnectionState(tags, cs) - connsClosed.WithLabelValues(tags...).Inc() + tags := getStringSlice() + defer putStringSlice(tags) + *tags = append(*tags, getDirection(dir)) + *tags = appendConnectionState(*tags, cs) + connsClosed.WithLabelValues(*tags...).Inc() } func recordConnectionDuration(dir network.Direction, t time.Duration, cs network.ConnectionState) { - tags := make([]string, 0, 4) - tags = append(tags, getDirection(dir)) - tags = appendConnectionState(tags, cs) - connDuration.WithLabelValues(tags...).Observe(t.Seconds()) + tags := getStringSlice() + defer putStringSlice(tags) + *tags = append(*tags, getDirection(dir)) + *tags = appendConnectionState(*tags, cs) + connDuration.WithLabelValues(*tags...).Observe(t.Seconds()) } func recordHandshakeLatency(t time.Duration, cs network.ConnectionState) { - tags := make([]string, 0, 3) - tags = appendConnectionState(tags, cs) - connHandshakeLatency.WithLabelValues(tags...).Observe(t.Seconds()) + tags := getStringSlice() + defer putStringSlice(tags) + *tags = appendConnectionState(*tags, cs) + connHandshakeLatency.WithLabelValues(*tags...).Observe(t.Seconds()) } func recordDialFailed(addr ma.Multiaddr, err error) { diff --git a/p2p/net/swarm/swarm_metrics_test.go b/p2p/net/swarm/swarm_metrics_test.go new file mode 100644 index 0000000000..df1c0504f0 --- /dev/null +++ b/p2p/net/swarm/swarm_metrics_test.go @@ -0,0 +1,31 @@ +package swarm + +import ( + "crypto/rand" + "testing" + + "github.com/libp2p/go-libp2p/core/crypto" + "github.com/libp2p/go-libp2p/core/network" + + "github.com/stretchr/testify/require" +) + +func BenchmarkMetricsConnOpen(b *testing.B) { + b.ReportAllocs() + quicConnState := network.ConnectionState{Transport: "quic"} + tcpConnState := network.ConnectionState{ + StreamMultiplexer: "yamux", + Security: "tls", + Transport: "tcp", + } + _, pub, err := crypto.GenerateEd25519Key(rand.Reader) + require.NoError(b, err) + for i := 0; i < b.N; i++ { + switch i % 2 { + case 0: + recordConnectionOpened(network.DirInbound, pub, quicConnState) + case 1: + recordConnectionOpened(network.DirInbound, pub, tcpConnState) + } + } +} From a000184ad3752ab37f0c2268ad995601c6d39bba Mon Sep 17 00:00:00 2001 From: Marten Seemann Date: Mon, 2 Jan 2023 15:35:24 +1300 Subject: [PATCH 3/6] swarm: introduce a MetricsTracer interface --- config/config.go | 11 ++++--- p2p/net/swarm/swarm.go | 10 ++++++- p2p/net/swarm/swarm_conn.go | 5 ++-- p2p/net/swarm/swarm_dial.go | 11 +++++-- p2p/net/swarm/swarm_listen.go | 4 ++- p2p/net/swarm/swarm_metrics.go | 46 ++++++++++++++++------------- p2p/net/swarm/swarm_metrics_test.go | 5 ++-- 7 files changed, 58 insertions(+), 34 deletions(-) diff --git a/config/config.go b/config/config.go index 94e18be27f..874bcb5a5a 100644 --- a/config/config.go +++ b/config/config.go @@ -119,7 +119,7 @@ type Config struct { HolePunchingOptions []holepunch.Option } -func (cfg *Config) makeSwarm() (*swarm.Swarm, error) { +func (cfg *Config) makeSwarm(enableMetrics bool) (*swarm.Swarm, error) { if cfg.Peerstore == nil { return nil, fmt.Errorf("no peerstore specified") } @@ -151,7 +151,7 @@ func (cfg *Config) makeSwarm() (*swarm.Swarm, error) { return nil, err } - opts := make([]swarm.Option, 0, 3) + opts := make([]swarm.Option, 0, 6) if cfg.Reporter != nil { opts = append(opts, swarm.WithMetrics(cfg.Reporter)) } @@ -167,6 +167,9 @@ func (cfg *Config) makeSwarm() (*swarm.Swarm, error) { if cfg.MultiaddrResolver != nil { opts = append(opts, swarm.WithMultiaddrResolver(cfg.MultiaddrResolver)) } + if enableMetrics { + opts = append(opts, swarm.WithMetricsTracer(swarm.NewMetricsTracer())) + } // TODO: Make the swarm implementation configurable. return swarm.NewSwarm(pid, cfg.Peerstore, opts...) } @@ -276,7 +279,7 @@ func (cfg *Config) addTransports(h host.Host) error { // // This function consumes the config. Do not reuse it (really!). func (cfg *Config) NewNode() (host.Host, error) { - swrm, err := cfg.makeSwarm() + swrm, err := cfg.makeSwarm(true) if err != nil { return nil, err } @@ -382,7 +385,7 @@ func (cfg *Config) NewNode() (host.Host, error) { Peerstore: ps, } - dialer, err := autoNatCfg.makeSwarm() + dialer, err := autoNatCfg.makeSwarm(false) if err != nil { h.Close() return nil, err diff --git a/p2p/net/swarm/swarm.go b/p2p/net/swarm/swarm.go index 7606b80c82..18d5183f52 100644 --- a/p2p/net/swarm/swarm.go +++ b/p2p/net/swarm/swarm.go @@ -71,6 +71,13 @@ func WithMetrics(reporter metrics.Reporter) Option { } } +func WithMetricsTracer(t MetricsTracer) Option { + return func(s *Swarm) error { + s.metricsTracer = t + return nil + } +} + func WithDialTimeout(t time.Duration) Option { return func(s *Swarm) error { s.dialTimeout = t @@ -151,7 +158,8 @@ type Swarm struct { ctx context.Context // is canceled when Close is called ctxCancel context.CancelFunc - bwc metrics.Reporter + bwc metrics.Reporter + metricsTracer MetricsTracer } // NewSwarm constructs a Swarm. diff --git a/p2p/net/swarm/swarm_conn.go b/p2p/net/swarm/swarm_conn.go index f85cd907cb..c24ddee310 100644 --- a/p2p/net/swarm/swarm_conn.go +++ b/p2p/net/swarm/swarm_conn.go @@ -60,8 +60,9 @@ func (c *Conn) Close() error { } func (c *Conn) doClose() { - recordConnectionClosed(c.stat.Direction, c.ConnState()) - recordConnectionDuration(c.stat.Direction, time.Since(c.stat.Stats.Opened), c.ConnState()) + if c.swarm.metricsTracer != nil { + c.swarm.metricsTracer.ClosedConnection(c.stat.Direction, time.Since(c.stat.Stats.Opened), c.ConnState()) + } c.swarm.removeConn(c) diff --git a/p2p/net/swarm/swarm_dial.go b/p2p/net/swarm/swarm_dial.go index 09e296ae4b..01b955e865 100644 --- a/p2p/net/swarm/swarm_dial.go +++ b/p2p/net/swarm/swarm_dial.go @@ -493,12 +493,17 @@ func (s *Swarm) dialAddr(ctx context.Context, p peer.ID, addr ma.Multiaddr) (tra start := time.Now() connC, err := tpt.Dial(ctx, addr, p) if err != nil { - recordDialFailed(addr, err) + if s.metricsTracer != nil { + s.metricsTracer.FailedDialing(addr, err) + } return nil, err } canonicallog.LogPeerStatus(100, connC.RemotePeer(), connC.RemoteMultiaddr(), "connection_status", "established", "dir", "outbound") - recordConnectionOpened(network.DirOutbound, connC.RemotePublicKey(), connC.ConnState()) - recordHandshakeLatency(time.Since(start), connC.ConnState()) + if s.metricsTracer != nil { + connState := connC.ConnState() + s.metricsTracer.OpenedConnection(network.DirOutbound, connC.RemotePublicKey(), connState) + s.metricsTracer.CompletedHandshake(time.Since(start), connState) + } // Trust the transport? Yeah... right. if connC.RemotePeer() != p { diff --git a/p2p/net/swarm/swarm_listen.go b/p2p/net/swarm/swarm_listen.go index 0c905075ee..334abb4ea3 100644 --- a/p2p/net/swarm/swarm_listen.go +++ b/p2p/net/swarm/swarm_listen.go @@ -130,7 +130,9 @@ func (s *Swarm) AddListenAddr(a ma.Multiaddr) error { return } canonicallog.LogPeerStatus(100, c.RemotePeer(), c.RemoteMultiaddr(), "connection_status", "established", "dir", "inbound") - recordConnectionOpened(network.DirInbound, c.RemotePublicKey(), c.ConnState()) + if s.metricsTracer != nil { + s.metricsTracer.OpenedConnection(network.DirInbound, c.RemotePublicKey(), c.ConnState()) + } log.Debugf("swarm listener accepted connection: %s <-> %s", c.LocalMultiaddr(), c.RemoteMultiaddr()) s.refs.Add(1) diff --git a/p2p/net/swarm/swarm_metrics.go b/p2p/net/swarm/swarm_metrics.go index c65622f815..292ad589e3 100644 --- a/p2p/net/swarm/swarm_metrics.go +++ b/p2p/net/swarm/swarm_metrics.go @@ -3,7 +3,6 @@ package swarm import ( "context" "errors" - "fmt" "net" "strings" "sync" @@ -66,10 +65,28 @@ var ( ) ) -func init() { +var initMetricsOnce sync.Once + +func initMetrics() { prometheus.MustRegister(connsOpened, keyTypes, connsClosed, dialError, connDuration, connHandshakeLatency) } +type MetricsTracer interface { + OpenedConnection(network.Direction, crypto.PubKey, network.ConnectionState) + ClosedConnection(network.Direction, time.Duration, network.ConnectionState) + CompletedHandshake(time.Duration, network.ConnectionState) + FailedDialing(ma.Multiaddr, error) +} + +type metricsTracer struct{} + +var _ MetricsTracer = &metricsTracer{} + +func NewMetricsTracer() *metricsTracer { + initMetricsOnce.Do(initMetrics) + return &metricsTracer{} +} + var stringPool = sync.Pool{New: func() any { s := make([]string, 0, 8) return &s @@ -110,7 +127,7 @@ func appendConnectionState(tags []string, cs network.ConnectionState) []string { return tags } -func recordConnectionOpened(dir network.Direction, p crypto.PubKey, cs network.ConnectionState) { +func (m *metricsTracer) OpenedConnection(dir network.Direction, p crypto.PubKey, cs network.ConnectionState) { tags := getStringSlice() defer putStringSlice(tags) @@ -124,37 +141,27 @@ func recordConnectionOpened(dir network.Direction, p crypto.PubKey, cs network.C keyTypes.WithLabelValues(*tags...).Inc() } -func recordConnectionClosed(dir network.Direction, cs network.ConnectionState) { +func (m *metricsTracer) ClosedConnection(dir network.Direction, duration time.Duration, cs network.ConnectionState) { tags := getStringSlice() defer putStringSlice(tags) *tags = append(*tags, getDirection(dir)) *tags = appendConnectionState(*tags, cs) connsClosed.WithLabelValues(*tags...).Inc() -} -func recordConnectionDuration(dir network.Direction, t time.Duration, cs network.ConnectionState) { - tags := getStringSlice() - defer putStringSlice(tags) + *tags = (*tags)[:0] *tags = append(*tags, getDirection(dir)) *tags = appendConnectionState(*tags, cs) - connDuration.WithLabelValues(*tags...).Observe(t.Seconds()) + connDuration.WithLabelValues(*tags...).Observe(duration.Seconds()) } -func recordHandshakeLatency(t time.Duration, cs network.ConnectionState) { +func (m *metricsTracer) CompletedHandshake(t time.Duration, cs network.ConnectionState) { tags := getStringSlice() defer putStringSlice(tags) *tags = appendConnectionState(*tags, cs) connHandshakeLatency.WithLabelValues(*tags...).Observe(t.Seconds()) } -func recordDialFailed(addr ma.Multiaddr, err error) { - var transport string - for _, p := range transports { - if _, err := addr.ValueForProtocol(p); err == nil { - transport = ma.ProtocolWithCode(p).Name - break - } - } +func (m *metricsTracer) FailedDialing(_ ma.Multiaddr, err error) { e := "other" if errors.Is(err, context.Canceled) { e = "canceled" @@ -168,8 +175,5 @@ func recordDialFailed(addr ma.Multiaddr, err error) { e = "connection refused" } } - if e == "other" { - fmt.Printf("transport: %s, category: %s (orig: %s)\n", transport, e, err) - } dialError.WithLabelValues(e).Inc() } diff --git a/p2p/net/swarm/swarm_metrics_test.go b/p2p/net/swarm/swarm_metrics_test.go index df1c0504f0..9ccc9d10d2 100644 --- a/p2p/net/swarm/swarm_metrics_test.go +++ b/p2p/net/swarm/swarm_metrics_test.go @@ -20,12 +20,13 @@ func BenchmarkMetricsConnOpen(b *testing.B) { } _, pub, err := crypto.GenerateEd25519Key(rand.Reader) require.NoError(b, err) + tr := NewMetricsTracer() for i := 0; i < b.N; i++ { switch i % 2 { case 0: - recordConnectionOpened(network.DirInbound, pub, quicConnState) + tr.OpenedConnection(network.DirInbound, pub, quicConnState) case 1: - recordConnectionOpened(network.DirInbound, pub, tcpConnState) + tr.OpenedConnection(network.DirInbound, pub, tcpConnState) } } } From 0b412dd6df8a9cd500aa627bd041104877518687 Mon Sep 17 00:00:00 2001 From: Marten Seemann Date: Sat, 7 Jan 2023 18:57:07 +1300 Subject: [PATCH 4/6] swarm: add the transport to the dial error metric --- p2p/net/swarm/swarm_metrics.go | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/p2p/net/swarm/swarm_metrics.go b/p2p/net/swarm/swarm_metrics.go index 292ad589e3..dedacc7d16 100644 --- a/p2p/net/swarm/swarm_metrics.go +++ b/p2p/net/swarm/swarm_metrics.go @@ -45,7 +45,7 @@ var ( Name: metricNamespace + "dial_errors_total", Help: "Dial Error", }, - []string{"error"}, + []string{"transport", "error"}, ) connDuration = prometheus.NewHistogramVec( prometheus.HistogramOpts{ @@ -100,8 +100,6 @@ func getStringSlice() *[]string { func putStringSlice(s *[]string) { stringPool.Put(s) } -var transports = [...]int{ma.P_CIRCUIT, ma.P_WEBRTC, ma.P_WEBTRANSPORT, ma.P_QUIC, ma.P_QUIC_V1, ma.P_WSS, ma.P_WS, ma.P_TCP} - func getDirection(dir network.Direction) string { switch dir { case network.DirOutbound: @@ -161,7 +159,15 @@ func (m *metricsTracer) CompletedHandshake(t time.Duration, cs network.Connectio connHandshakeLatency.WithLabelValues(*tags...).Observe(t.Seconds()) } -func (m *metricsTracer) FailedDialing(_ ma.Multiaddr, err error) { +var transports = [...]int{ma.P_CIRCUIT, ma.P_WEBRTC, ma.P_WEBTRANSPORT, ma.P_QUIC, ma.P_QUIC_V1, ma.P_WSS, ma.P_WS, ma.P_TCP} + +func (m *metricsTracer) FailedDialing(addr ma.Multiaddr, err error) { + var transport string + for _, t := range transports { + if _, err := addr.ValueForProtocol(t); err == nil { + transport = ma.ProtocolWithCode(t).Name + } + } e := "other" if errors.Is(err, context.Canceled) { e = "canceled" @@ -175,5 +181,5 @@ func (m *metricsTracer) FailedDialing(_ ma.Multiaddr, err error) { e = "connection refused" } } - dialError.WithLabelValues(e).Inc() + dialError.WithLabelValues(transport, e).Inc() } From 844f8e61d7534a9c44403d3eeadbe7528b6a7f7c Mon Sep 17 00:00:00 2001 From: Marten Seemann Date: Sun, 22 Jan 2023 18:50:37 +1300 Subject: [PATCH 5/6] swarm: add Grafana dashboard --- p2p/net/swarm/grafana-dashboards/swarm.json | 2263 +++++++++++++++++++ 1 file changed, 2263 insertions(+) create mode 100644 p2p/net/swarm/grafana-dashboards/swarm.json diff --git a/p2p/net/swarm/grafana-dashboards/swarm.json b/p2p/net/swarm/grafana-dashboards/swarm.json new file mode 100644 index 0000000000..be1cbee90e --- /dev/null +++ b/p2p/net/swarm/grafana-dashboards/swarm.json @@ -0,0 +1,2263 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + }, + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "enable": true, + "iconColor": "red", + "name": "New annotation" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 6, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 21, + "panels": [], + "title": "Currently Established Connections", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 50 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "quic" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "quic-v1" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "webtransport" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "websocket" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "p2p-circuit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 23, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "9.3.2-67a213dc85", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "editorMode": "code", + "expr": "sum by (transport) (libp2p_swarm_connections_opened_total) - sum by (transport) (libp2p_swarm_connections_closed_total)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Active Connections", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "quic " + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "quic-v1 " + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "webtransport " + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp /noise /yamux/1.0.0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp /tls/1.0.0 /yamux/1.0.0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp /noise /mplex/6.7.0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "websocket /noise /yamux/1.0.0" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "p2p-circuit " + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp /tls/1.0.0 /mplex/6.7.0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "editorMode": "code", + "expr": "libp2p_swarm_connections_opened_total{dir=\"inbound\"} - libp2p_swarm_connections_closed_total{dir=\"inbound\"}", + "legendFormat": "{{transport}} {{security}} {{muxer}}", + "range": true, + "refId": "A" + } + ], + "title": "Active Connections: Inbound", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "quic " + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "quic-v1 " + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "webtransport " + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp /noise /yamux/1.0.0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp /tls/1.0.0 /yamux/1.0.0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp /noise /mplex/6.7.0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "websocket /noise /yamux/1.0.0" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "p2p-circuit " + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp /tls/1.0.0 /mplex/6.7.0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "editorMode": "code", + "expr": "libp2p_swarm_connections_opened_total{dir=\"outbound\"} - libp2p_swarm_connections_closed_total{dir=\"outbound\"}", + "legendFormat": "{{transport}} {{security}} {{muxer}}", + "range": true, + "refId": "A" + } + ], + "title": "Active Connections: Outgoing", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 17 + }, + "id": 29, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, sum by(le) (rate(libp2p_swarm_handshake_latency_seconds_bucket{transport=~\"quic|quic-v1\"}[$__rate_interval])))", + "hide": false, + "legendFormat": "50th percentile", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.90, sum by(le) (rate(libp2p_swarm_handshake_latency_seconds_bucket{transport=~\"quic|quic-v1\"}[$__rate_interval])))", + "hide": false, + "legendFormat": "90th percentile", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le) (rate(libp2p_swarm_handshake_latency_seconds_bucket{transport=~\"quic|quic-v1\"}[$__rate_interval])))", + "hide": false, + "legendFormat": "95th percentile", + "range": true, + "refId": "C" + } + ], + "title": "Handshake Latency (QUIC, QUIC v1)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 17 + }, + "id": 30, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, sum by(le) (rate(libp2p_swarm_handshake_latency_seconds_bucket{transport=\"tcp\"}[$__rate_interval])))", + "hide": false, + "legendFormat": "50th percentile", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.90, sum by(le) (rate(libp2p_swarm_handshake_latency_seconds_bucket{transport=\"tcp\"}[$__rate_interval])))", + "hide": false, + "legendFormat": "90th percentile", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le) (rate(libp2p_swarm_handshake_latency_seconds_bucket{transport=\"tcp\"}[$__rate_interval])))", + "hide": false, + "legendFormat": "95th percentile", + "range": true, + "refId": "C" + } + ], + "title": "Handshake Latency (TCP)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 27, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "editorMode": "code", + "exemplar": false, + "expr": "histogram_quantile(0.5, sum(rate(libp2p_swarm_connection_duration_seconds_bucket[$__rate_interval])) by (le))", + "instant": false, + "legendFormat": "50th percentile", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum(rate(libp2p_swarm_connection_duration_seconds_bucket[$__rate_interval])) by (le))", + "hide": false, + "legendFormat": "90th percentile", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(libp2p_swarm_connection_duration_seconds_bucket[$__rate_interval])) by (le))", + "hide": false, + "legendFormat": "95th percentile", + "range": true, + "refId": "C" + } + ], + "title": "Connection Duration", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 19, + "panels": [], + "title": "Connection Establishment", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "quic " + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "quic-v1 " + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "webtransport " + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp /noise /yamux/1.0.0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp /tls/1.0.0 /yamux/1.0.0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp /noise /mplex/6.7.0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "websocket /noise /yamux/1.0.0" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "p2p-circuit " + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp /tls/1.0.0 /mplex/6.7.0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 34 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "editorMode": "code", + "expr": "rate(libp2p_swarm_connections_opened_total{dir=\"inbound\"}[$__rate_interval])", + "legendFormat": "{{transport}} {{security}} {{muxer}}", + "range": true, + "refId": "A" + } + ], + "title": "New Connections: Inbound", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "quic " + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "quic-v1 " + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "webtransport " + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp /noise /yamux/1.0.0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp /tls/1.0.0 /yamux/1.0.0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp /noise /mplex/6.7.0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "websocket /noise /yamux/1.0.0" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "p2p-circuit " + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp /tls/1.0.0 /mplex/6.7.0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 34 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "editorMode": "code", + "expr": "rate(libp2p_swarm_connections_opened_total{dir=\"outbound\"}[$__rate_interval])", + "legendFormat": "{{transport}} {{security}} {{muxer}}", + "range": true, + "refId": "A" + } + ], + "title": "New Connections: Outgoing", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "quic " + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "quic-v1 " + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "webtransport " + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp /noise /yamux/1.0.0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp /tls/1.0.0 /yamux/1.0.0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp /noise /mplex/6.7.0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "websocket /noise /yamux/1.0.0" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "p2p-circuit " + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp /tls/1.0.0 /mplex/6.7.0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 43 + }, + "id": 4, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "editorMode": "code", + "expr": "increase(libp2p_swarm_connections_opened_total{dir=\"inbound\"}[$__range])", + "legendFormat": "{{transport}} {{security}} {{muxer}}", + "range": true, + "refId": "A" + } + ], + "title": "New Inbound Connections: Transports / Security / Muxers", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "quic " + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "quic-v1 " + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "webtransport " + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp /noise /yamux/1.0.0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp /tls/1.0.0 /yamux/1.0.0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp /noise /mplex/6.7.0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "websocket /noise /yamux/1.0.0" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "p2p-circuit " + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp /tls/1.0.0 /mplex/6.7.0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 43 + }, + "id": 6, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "values": [] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "editorMode": "code", + "expr": "increase(libp2p_swarm_connections_opened_total{dir=\"outbound\"}[$__range])", + "legendFormat": "{{transport}} {{security}} {{muxer}}", + "range": true, + "refId": "A" + } + ], + "title": "New Outgoing Connections: Transports / Security / Muxers", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 51 + }, + "id": 15, + "options": { + "legend": { + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "pieType": "donut", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.3.2-45365", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "editorMode": "code", + "expr": "sum(increase(libp2p_swarm_dial_errors_total[$__range])) by (error)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Dial Errors", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "quic" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "quic-v1" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "webtransport" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tcp" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "websocket" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "p2p-circuit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 51 + }, + "id": 17, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.3.2-67a213dc85", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "editorMode": "code", + "expr": "sum by (transport) (rate(libp2p_swarm_connections_opened_total{dir=\"outbound\"}[$__rate_interval])) ", + "hide": true, + "legendFormat": "{{transport}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "editorMode": "code", + "expr": "sum by(transport, error) (rate(libp2p_swarm_dial_errors_total[$__rate_interval]))", + "hide": true, + "legendFormat": "dial error ({{error}}, {{transport}})", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "editorMode": "code", + "expr": "sum(rate(libp2p_swarm_connections_opened_total{dir=\"outbound\"}[$__rate_interval])) by (transport) / (sum(rate(libp2p_swarm_connections_opened_total{dir=\"outbound\"}[$__rate_interval])) by (transport) + (sum(rate(libp2p_swarm_dial_errors_total[$__rate_interval])) by (transport)))", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "C" + } + ], + "title": "Dial Success Rates", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "description": "on newly established connections", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 59 + }, + "id": 25, + "options": { + "legend": { + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "editorMode": "code", + "expr": "sum by (key_type) (increase(libp2p_swarm_key_types_total[$__range]))", + "legendFormat": "{{key_type}}", + "range": true, + "refId": "A" + } + ], + "title": "libp2p key types", + "type": "piechart" + } + ], + "schemaVersion": 37, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "utGyV1i7z" + }, + "definition": "label_values(up, instance)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "instance", + "options": [], + "query": { + "query": "label_values(up, instance)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "/(.*):[0-9].*/", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "libp2p Swarm", + "uid": "a15PyhO4z", + "version": 68, + "weekStart": "" +} From efb535fff97f4b4f142300d33184ff785944cc50 Mon Sep 17 00:00:00 2001 From: Marten Seemann Date: Fri, 27 Jan 2023 09:20:53 +1300 Subject: [PATCH 6/6] swarm: use the prometheus namespace option --- p2p/net/swarm/swarm_metrics.go | 36 ++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/p2p/net/swarm/swarm_metrics.go b/p2p/net/swarm/swarm_metrics.go index dedacc7d16..fec82c17a1 100644 --- a/p2p/net/swarm/swarm_metrics.go +++ b/p2p/net/swarm/swarm_metrics.go @@ -16,50 +16,56 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -const metricNamespace = "libp2p_swarm_" +const metricNamespace = "libp2p_swarm" var ( connsOpened = prometheus.NewCounterVec( prometheus.CounterOpts{ - Name: metricNamespace + "connections_opened_total", - Help: "Connections Opened", + Namespace: metricNamespace, + Name: "connections_opened_total", + Help: "Connections Opened", }, []string{"dir", "transport", "security", "muxer"}, ) keyTypes = prometheus.NewCounterVec( prometheus.CounterOpts{ - Name: metricNamespace + "key_types_total", - Help: "key type", + Namespace: metricNamespace, + Name: "key_types_total", + Help: "key type", }, []string{"dir", "key_type"}, ) connsClosed = prometheus.NewCounterVec( prometheus.CounterOpts{ - Name: metricNamespace + "connections_closed_total", - Help: "Connections Closed", + Namespace: metricNamespace, + Name: "connections_closed_total", + Help: "Connections Closed", }, []string{"dir", "transport", "security", "muxer"}, ) dialError = prometheus.NewCounterVec( prometheus.CounterOpts{ - Name: metricNamespace + "dial_errors_total", - Help: "Dial Error", + Namespace: metricNamespace, + Name: "dial_errors_total", + Help: "Dial Error", }, []string{"transport", "error"}, ) connDuration = prometheus.NewHistogramVec( prometheus.HistogramOpts{ - Name: metricNamespace + "connection_duration_seconds", - Help: "Duration of a Connection", - Buckets: prometheus.ExponentialBuckets(1.0/16, 2, 25), // up to 24 days + Namespace: metricNamespace, + Name: "connection_duration_seconds", + Help: "Duration of a Connection", + Buckets: prometheus.ExponentialBuckets(1.0/16, 2, 25), // up to 24 days }, []string{"dir", "transport", "security", "muxer"}, ) connHandshakeLatency = prometheus.NewHistogramVec( prometheus.HistogramOpts{ - Name: metricNamespace + "handshake_latency_seconds", - Help: "Duration of the libp2p Handshake", - Buckets: prometheus.ExponentialBuckets(0.001, 1.3, 35), + Namespace: metricNamespace, + Name: "handshake_latency_seconds", + Help: "Duration of the libp2p Handshake", + Buckets: prometheus.ExponentialBuckets(0.001, 1.3, 35), }, []string{"transport", "security", "muxer"}, )