Skip to content

Commit

Permalink
add gNMIc prometheus metric gnmic_target_up
Browse files Browse the repository at this point in the history
  • Loading branch information
karimra committed Nov 4, 2024
1 parent cc35c12 commit 024a59c
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 1 deletion.
1 change: 1 addition & 0 deletions pkg/app/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ func (a *App) newAPIServer() (*http.Server, error) {
a.reg.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}))
a.reg.MustRegister(subscribeResponseReceivedCounter)
a.reg.MustRegister(subscribeResponseFailedCounter)
a.registerTargetMetrics()
go a.startClusterMetrics()
}
s := &http.Server{
Expand Down
2 changes: 1 addition & 1 deletion pkg/app/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ func (a *App) StartCollector(ctx context.Context) {
case tErr := <-errChan:
if errors.Is(tErr.Err, io.EOF) {
a.Logger.Printf("target %q: subscription %s closed stream(EOF)", t.Config.Name, tErr.SubscriptionName)
} else {
} else {
subscribeResponseFailedCounter.WithLabelValues(t.Config.Name, tErr.SubscriptionName).Inc()
a.Logger.Printf("target %q: subscription %s rcv error: %v", t.Config.Name, tErr.SubscriptionName, tErr.Err)
}
Expand Down
69 changes: 69 additions & 0 deletions pkg/app/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ package app
import (
"context"
"fmt"
"strings"
"time"

"github.com/prometheus/client_golang/prometheus"
Expand All @@ -35,6 +36,14 @@ var subscribeResponseFailedCounter = prometheus.NewCounterVec(prometheus.Counter
Help: "Total number of failed subscribe requests",
}, []string{"source", "subscription"})

// target
var targetUPMetric = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "gnmic",
Subsystem: "target",
Name: "up",
Help: "Has value 1 if the gNMI connection to the target is established; otherwise, 0.",
}, []string{"name"})

// cluster
var clusterNumberOfLockedTargets = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "gnmic",
Expand All @@ -49,6 +58,66 @@ var clusterIsLeader = prometheus.NewGauge(prometheus.GaugeOpts{
Help: "Has value 1 if this gnmic instance is the cluster leader, 0 otherwise",
})

func (a *App) registerTargetMetrics() {
err := a.reg.Register(targetUPMetric)
if err != nil {
a.Logger.Printf("failed to register target metric: %v", err)
}
a.configLock.RLock()
for _, t := range a.Config.Targets {
targetUPMetric.WithLabelValues(t.Name).Set(0)
}
a.configLock.RUnlock()
go func() {
ticker := time.NewTicker(clusterMetricsUpdatePeriod)
defer ticker.Stop()
for {
select {
case <-a.ctx.Done():
return
case <-ticker.C:
ownTargets := make(map[string]string)
if a.isLeader {
lockedNodesPrefix := fmt.Sprintf("gnmic/%s/targets", a.Config.ClusterName)
ctx, cancel := context.WithTimeout(a.ctx, clusterMetricsUpdatePeriod/2)
lockedNodes, err := a.locker.List(ctx, lockedNodesPrefix)
cancel()
if err != nil {
a.Logger.Printf("failed to get locked nodes key: %v", err)
}
for k, v := range lockedNodes {
ownTargets[strings.TrimPrefix(k, lockedNodesPrefix+"/")] = v
}
}

a.configLock.RLock()
for _, tc := range a.Config.Targets {
a.operLock.RLock()
t, ok := a.Targets[tc.Name]
a.operLock.RUnlock()
if ok {
switch t.ConnState() {
case "IDLE", "READY":
targetUPMetric.WithLabelValues(tc.Name).Set(1)
default:
targetUPMetric.WithLabelValues(tc.Name).Set(0)
}
} else {
if a.isLeader {
if ownTargets[tc.Name] == a.Config.Clustering.InstanceName {
targetUPMetric.WithLabelValues(tc.Name).Set(0)
}
} else {
targetUPMetric.WithLabelValues(tc.Name).Set(0)
}
}
}
a.configLock.RUnlock()
}
}
}()
}

func (a *App) startClusterMetrics() {
if a.Config.APIServer == nil || !a.Config.APIServer.EnableMetrics || a.Config.Clustering == nil {
return
Expand Down

0 comments on commit 024a59c

Please sign in to comment.