Skip to content

Commit

Permalink
Amazon Elasticache: enable the agent to automatically discover Elasti…
Browse files Browse the repository at this point in the history
…cache instances and apply the necessary instrumentation using the redis or memcached exporters
  • Loading branch information
def committed Jun 5, 2023
1 parent 6ee5c4c commit 89c6c2f
Show file tree
Hide file tree
Showing 11 changed files with 776 additions and 174 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@

Coroot-aws-agent is an open-source prometheus exporter that gathers metrics from AWS services.

|Serivce|Description|
|-|-|
|RDS for Postgres (including Aurora)|autodiscovery, OS metrics from Enhanced Monitoring, Postgres metrics, metrics from logs|
|RDS for Mysql (including Aurora)|coming soon|
|EBS|coming soon|
| Serivce | Description |
|-------------------------------------|---------------------------------------------------------------------------------------------|
| RDS for Postgres (including Aurora) | autodiscovery, OS metrics based on Enhanced Monitoring, Postgres metrics, log-based metrics |
| RDS for Mysql (including Aurora) | autodiscovery, OS metrics based on Enhanced Monitoring |
| Elasticache | autodiscovery, Redis & Memcached metrics |

## Documentation

Expand Down
139 changes: 139 additions & 0 deletions elasticache/collector.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
package elasticache

import (
"fmt"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/elasticache"
"github.com/coroot/coroot-aws-agent/flags"
"github.com/coroot/coroot-aws-agent/utils"
"github.com/coroot/logger"
"github.com/oliver006/redis_exporter/exporter"
"github.com/prometheus/client_golang/prometheus"
mcExporter "github.com/prometheus/memcached_exporter/pkg/exporter"
"net"
"strconv"
"time"
)

var (
dInfo = utils.Desc("aws_elasticache_info", "Elasticache instance info",
"region", "availability_zone", "endpoint", "ipv4", "port",
"engine", "engine_version", "instance_type", "cluster_id",
)
dStatus = utils.Desc("aws_elasticache_status", "Status of the Elasticache instance", "status")
)

type Collector struct {
sess *session.Session

metricCollector prometheus.Collector
cluster elasticache.CacheCluster
node elasticache.CacheNode

logger logger.Logger
}

func NewCollector(sess *session.Session, cluster *elasticache.CacheCluster, node *elasticache.CacheNode) (*Collector, error) {
if node.Endpoint == nil || node.Endpoint.Address == nil {
return nil, fmt.Errorf("endpoint is not defined")
}
c := &Collector{
sess: sess,
cluster: *cluster,
node: *node,
logger: logger.NewKlog(aws.StringValue(cluster.CacheClusterId)),
}

c.startMetricCollector()
return c, nil
}

func (c *Collector) update(cluster *elasticache.CacheCluster, n *elasticache.CacheNode) {
if aws.Int64Value(c.node.Endpoint.Port) != aws.Int64Value(n.Endpoint.Port) || aws.StringValue(c.node.Endpoint.Address) != aws.StringValue(n.Endpoint.Address) {
c.cluster = *cluster
c.node = *n
c.startMetricCollector()
}
c.cluster = *cluster
c.node = *n
}

func (c *Collector) startMetricCollector() {
switch aws.StringValue(c.cluster.Engine) {
case "redis":
url := fmt.Sprintf("redis://%s:%d", aws.StringValue(c.node.Endpoint.Address), aws.Int64Value(c.node.Endpoint.Port))
opts := exporter.Options{
Namespace: "redis",
ConfigCommandName: "CONFIG",
IsCluster: false,
ConnectionTimeouts: *flags.ElasticacheConnectTimeout,
RedisMetricsOnly: true,
}
if collector, err := exporter.NewRedisExporter(url, opts); err != nil {
c.logger.Warning("failed to init redis collector:", err)
} else {
c.logger.Info("redis collector ->", url)
c.metricCollector = collector
}
case "memcached":
address := fmt.Sprintf("%s:%d", aws.StringValue(c.node.Endpoint.Address), aws.Int64Value(c.node.Endpoint.Port))
c.metricCollector = mcExporter.New(
address,
*flags.ElasticacheConnectTimeout,
&promLogger{c.logger},
nil,
)
c.logger.Info("memcached collector ->", address)
}
}

func (c *Collector) Close() {}

func (c *Collector) Collect(ch chan<- prometheus.Metric) {
ch <- utils.Gauge(dStatus, 1, aws.StringValue(c.node.CacheNodeStatus))

var ip string
if a, err := net.ResolveIPAddr("", aws.StringValue(c.node.Endpoint.Address)); err != nil {
c.logger.Warning(err)
} else {
ip = a.String()
}

cluster := aws.StringValue(c.cluster.ReplicationGroupId)
if cluster == "" {
cluster = aws.StringValue(c.cluster.CacheClusterId)
}

ch <- utils.Gauge(dInfo, 1,
aws.StringValue(c.sess.Config.Region),
aws.StringValue(c.node.CustomerAvailabilityZone),
aws.StringValue(c.node.Endpoint.Address),
ip,
strconv.Itoa(int(aws.Int64Value(c.node.Endpoint.Port))),
aws.StringValue(c.cluster.Engine),
aws.StringValue(c.cluster.EngineVersion),
aws.StringValue(c.cluster.CacheNodeType),
cluster,
)

if c.metricCollector != nil {
t := time.Now()
c.metricCollector.Collect(ch)
c.logger.Info("cache metrics collected in:", time.Since(t))
}
}

func (c *Collector) Describe(ch chan<- *prometheus.Desc) {
ch <- dInfo
ch <- dStatus
}

type promLogger struct {
l logger.Logger
}

func (l *promLogger) Log(keyvals ...interface{}) error {
l.l.Info(keyvals...)
return nil
}
108 changes: 108 additions & 0 deletions elasticache/discovery.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
package elasticache

import (
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/elasticache"
"github.com/coroot/coroot-aws-agent/flags"
"github.com/coroot/coroot-aws-agent/utils"
"github.com/coroot/logger"
"github.com/prometheus/client_golang/prometheus"
"time"
)

type Discoverer struct {
reg prometheus.Registerer

awsSession *session.Session

instances map[string]*Collector

logger logger.Logger
}

func NewDiscoverer(reg prometheus.Registerer, awsSession *session.Session) *Discoverer {
d := &Discoverer{
reg: reg,
awsSession: awsSession,
instances: map[string]*Collector{},
logger: logger.NewKlog(""),
}
return d
}

func (d *Discoverer) Run() {
api := elasticache.New(d.awsSession)

if err := d.refresh(api); err != nil {
d.logger.Warning(err)
}

ticker := time.Tick(*flags.DiscoveryInterval)

for range ticker {
if err := d.refresh(api); err != nil {
d.logger.Warning(err)
}
}
}

func (d *Discoverer) refresh(api *elasticache.ElastiCache) error {
t := time.Now()
defer func() {
d.logger.Info("elasticache clusters refreshed in:", time.Since(t))
}()

var clusters []*elasticache.CacheCluster
var err error

input := &elasticache.DescribeCacheClustersInput{}
input.ShowCacheNodeInfo = aws.Bool(true)

for _, v := range []bool{false, true} {
input.ShowCacheClustersNotInReplicationGroups = aws.Bool(v)
output, err := api.DescribeCacheClusters(input)
if err != nil {
return err
}
clusters = append(clusters, output.CacheClusters...)
}

actualInstances := map[string]bool{}
for _, cluster := range clusters {
for _, node := range cluster.CacheNodes {
id := aws.StringValue(cluster.CacheClusterId) + "/" + aws.StringValue(node.CacheNodeId)
actualInstances[id] = true
i, ok := d.instances[id]
if !ok {
d.logger.Info("new Elasticache instance found:", id)
i, err = NewCollector(d.awsSession, cluster, node)
if err != nil {
d.logger.Warning("failed to init Elasticache collector:", err)
continue
}
if err := d.wrappedReg(id).Register(i); err != nil {
d.logger.Warning(err)
continue
}
d.instances[id] = i
}
i.update(cluster, node)
}
}

for id, i := range d.instances {
if !actualInstances[id] {
d.logger.Info("Elasticache instance no longer exists:", id)
d.wrappedReg(id).Unregister(i)
i.Close()
delete(d.instances, id)
}
}
return nil
}

func (d *Discoverer) wrappedReg(instanceId string) prometheus.Registerer {
id := utils.IdWithRegion(aws.StringValue(d.awsSession.Config.Region), instanceId)
return prometheus.WrapRegistererWith(prometheus.Labels{"ec_instance_id": id}, d.reg)
}
16 changes: 16 additions & 0 deletions flags/flags.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package flags

import "gopkg.in/alecthomas/kingpin.v2"

var (
AwsRegion = kingpin.Flag("aws-region", "AWS region (env: AWS_REGION)").Envar("AWS_REGION").Required().String()
DiscoveryInterval = kingpin.Flag("discovery-interval", "discovery interval").Default("60s").Duration()
RdsDbUser = kingpin.Flag("rds-db-user", "RDS db user (env: RDS_DB_USER)").Envar("RDS_DB_USER").String()
RdsDbPassword = kingpin.Flag("rds-db-password", "RDS db password (env: RDS_DB_PASSWORD)").Envar("RDS_DB_PASSWORD").String()
RdsDbConnectTimeout = kingpin.Flag("rds-db-connect-timeout", "RDS db connect timeout").Default("1s").Duration()
RdsDbQueryTimeout = kingpin.Flag("rds-db-query-timeout", "RDS db query timeout").Default("30s").Duration()
RdsLogsScrapeInterval = kingpin.Flag("rds-logs-scrape-interval", "RDS logs scrape interval (0 to disable)").Default("30s").Duration()
DbScrapeInterval = kingpin.Flag("db-scrape-interval", "How often to scrape DB system views").Default("30s").Duration()
ElasticacheConnectTimeout = kingpin.Flag("ec-connect-timeout", "Elasticache connect timeout").Default("1s").Duration()
ListenAddress = kingpin.Flag("listen-address", `Listen address (env: LISTEN_ADDRESS) - "<ip>:<port>" or ":<port>".`).Envar("LISTEN_ADDRESS").Default("0.0.0.0:80").String()
)
33 changes: 19 additions & 14 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,34 @@ require (
github.com/coroot/coroot-pg-agent v1.2.2
github.com/coroot/logger v1.0.0
github.com/coroot/logparser v1.0.5
github.com/prometheus/client_golang v1.11.1
github.com/oliver006/redis_exporter v1.50.0
github.com/prometheus/client_golang v1.15.1
github.com/prometheus/memcached_exporter v0.13.0
gopkg.in/alecthomas/kingpin.v2 v2.2.6
k8s.io/klog/v2 v2.20.0
)

require (
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d // indirect
github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/blang/semver v3.5.1+incompatible // indirect
github.com/cespare/xxhash/v2 v2.1.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/dustin/go-humanize v1.0.0 // indirect
github.com/go-kit/log v0.2.1 // indirect
github.com/go-logfmt/logfmt v0.6.0 // indirect
github.com/go-logr/logr v1.0.0 // indirect
github.com/golang/protobuf v1.5.0 // indirect
github.com/google/go-cmp v0.5.6 // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/gomodule/redigo v1.8.9 // indirect
github.com/grobie/gomemcache v0.0.0-20230213081705-239240bbc445 // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/lib/pq v1.10.3 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect
github.com/prometheus/client_model v0.2.0 // indirect
github.com/prometheus/common v0.26.0 // indirect
github.com/prometheus/procfs v0.6.0 // indirect
golang.org/x/sys v0.1.0 // indirect
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
google.golang.org/protobuf v1.26.0 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
k8s.io/klog/v2 v2.20.0 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/mna/redisc v1.3.2 // indirect
github.com/prometheus/client_model v0.4.0 // indirect
github.com/prometheus/common v0.44.0 // indirect
github.com/prometheus/procfs v0.10.1 // indirect
github.com/sirupsen/logrus v1.9.0 // indirect
golang.org/x/sys v0.8.0 // indirect
google.golang.org/protobuf v1.30.0 // indirect
)
Loading

0 comments on commit 89c6c2f

Please sign in to comment.