Skip to content

Commit

Permalink
compute: allow "instance" label to be set (behind a flag)
Browse files Browse the repository at this point in the history
Fixes #262.

*## What

Allow `instance` label name to be set via new `-compute.instance-label` flag.

*## Why

See #262 for the rationale and discussion.

*## How

* for each compute `Collector`, create new `collectorMetrics` type
  containing compute metrics (moved from _global var_), to let each
  `Collector` own an allocated instance obtained via
  `newCollectorMetrics(instanceLabel)`

* changing metrics from _global var_ to dynamically allocated object is
  needed for:
  * setting its label at runtime
  * allowing tests to be self-contained, else the 1st test to run would "own"
    setting `instanceLabel`

* there's a new `config.CommonConfig` type, currently only having a
  single field `ComputeInstanceLabel`, which is passed down from `cmd/`
  to each instantiated _Provider_, then to each compute _Collector_

*## Testing

Adapted testing with a couple `ComputeInstanceLabel="node"` changes,
although it should be improved to test changes to the label name.

--
Signed-off-by: JuanJo Ciarlante <[email protected]>
  • Loading branch information
jjo committed Jul 23, 2024
1 parent 0bc00ba commit a97d718
Show file tree
Hide file tree
Showing 13 changed files with 262 additions and 158 deletions.
11 changes: 8 additions & 3 deletions cmd/exporter/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,15 @@ import (
"time"
)

type CommonConfig struct {
ComputeInstanceLabel string
}

type Config struct {
Provider string
ProjectID string
Providers struct {
Provider string
ProjectID string
CommonConfig CommonConfig
Providers struct {
AWS struct {
Profile string
Region string
Expand Down
8 changes: 8 additions & 0 deletions cmd/exporter/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (

func main() {
var cfg config.Config
commonFlags(&cfg)
providerFlags(flag.CommandLine, &cfg)
operationalFlags(&cfg)
flag.Parse()
Expand Down Expand Up @@ -78,6 +79,10 @@ func providerFlags(fs *flag.FlagSet, cfg *config.Config) {
flag.IntVar(&cfg.Providers.GCP.DefaultGCSDiscount, "gcp.default-discount", 19, "GCP default discount")
}

func commonFlags(cfg *config.Config) {
flag.StringVar(&cfg.CommonConfig.ComputeInstanceLabel, "compute.instance-label", "instance", "instance label name")
}

// operationalFlags is a helper method that is responsible for setting up the flags that are used to configure the operational aspects of the application.
// TODO: This should probably be moved over to the config package.
func operationalFlags(cfg *config.Config) {
Expand Down Expand Up @@ -169,13 +174,15 @@ func selectProvider(ctx context.Context, cfg *config.Config) (provider.Provider,
switch cfg.Provider {
case "azure":
return azure.New(ctx, &azure.Config{
CommonConfig: &cfg.CommonConfig,
Logger: cfg.Logger,
SubscriptionId: cfg.Providers.Azure.SubscriptionId,
Services: cfg.Providers.Azure.Services,
CollectorTimeout: cfg.Collector.Timeout,
})
case "aws":
return aws.New(ctx, &aws.Config{
CommonConfig: &cfg.CommonConfig,
Logger: cfg.Logger,
Region: cfg.Providers.AWS.Region,
Profile: cfg.Providers.AWS.Profile,
Expand All @@ -185,6 +192,7 @@ func selectProvider(ctx context.Context, cfg *config.Config) (provider.Provider,

case "gcp":
return google.New(&google.Config{
CommonConfig: &cfg.CommonConfig,
Logger: cfg.Logger,
ProjectId: cfg.ProjectID,
Region: cfg.Providers.GCP.Region,
Expand Down
3 changes: 3 additions & 0 deletions pkg/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/aws/aws-sdk-go-v2/service/pricing"
"github.com/prometheus/client_golang/prometheus"

"github.com/grafana/cloudcost-exporter/cmd/exporter/config"
ec2Collector "github.com/grafana/cloudcost-exporter/pkg/aws/ec2"

cloudcost_exporter "github.com/grafana/cloudcost-exporter"
Expand All @@ -28,6 +29,7 @@ type Config struct {
Region string
Profile string
ScrapeInterval time.Duration
CommonConfig *config.CommonConfig
Logger *slog.Logger
}

Expand Down Expand Up @@ -144,6 +146,7 @@ func New(ctx context.Context, config *Config) (*AWS, error) {
regionClientMap[*r.RegionName] = client
}
collector := ec2Collector.New(&ec2Collector.Config{
CommonConfig: config.CommonConfig,
Regions: regions.Regions,
RegionClients: regionClientMap,
Logger: logger,
Expand Down
64 changes: 38 additions & 26 deletions pkg/aws/ec2/ec2.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"golang.org/x/sync/errgroup"

cloudcostexporter "github.com/grafana/cloudcost-exporter"
"github.com/grafana/cloudcost-exporter/cmd/exporter/config"
ec2client "github.com/grafana/cloudcost-exporter/pkg/aws/services/ec2"
pricingClient "github.com/grafana/cloudcost-exporter/pkg/aws/services/pricing"
"github.com/grafana/cloudcost-exporter/pkg/provider"
Expand All @@ -29,26 +30,11 @@ var (
ErrGeneratePricingMap = errors.New("error generating pricing map")
)

var (
InstanceCPUHourlyCostDesc = prometheus.NewDesc(
prometheus.BuildFQName(cloudcostexporter.MetricPrefix, subsystem, "instance_cpu_usd_per_core_hour"),
"The cpu cost a ec2 instance in USD/(core*h)",
[]string{"instance", "region", "family", "machine_type", "cluster_name", "price_tier"},
nil,
)
InstanceMemoryHourlyCostDesc = prometheus.NewDesc(
prometheus.BuildFQName(cloudcostexporter.MetricPrefix, subsystem, "instance_memory_usd_per_gib_hour"),
"The memory cost of a ec2 instance in USD/(GiB*h)",
[]string{"instance", "region", "family", "machine_type", "cluster_name", "price_tier"},
nil,
)
InstanceTotalHourlyCostDesc = prometheus.NewDesc(
prometheus.BuildFQName(cloudcostexporter.MetricPrefix, subsystem, "instance_total_usd_per_hour"),
"The total cost of the ec2 instance in USD/h",
[]string{"instance", "region", "family", "machine_type", "cluster_name", "price_tier"},
nil,
)
)
type CollectorMetrics struct {
InstanceCPUHourlyCostDesc *prometheus.Desc
InstanceMemoryHourlyCostDesc *prometheus.Desc
InstanceTotalHourlyCostDesc *prometheus.Desc
}

// Collector is a prometheus collector that collects metrics from AWS EKS clusters.
type Collector struct {
Expand All @@ -58,23 +44,49 @@ type Collector struct {
pricingService pricingClient.Pricing
NextScrape time.Time
ec2RegionClients map[string]ec2client.EC2
metrics *CollectorMetrics
logger *slog.Logger
}

type Config struct {
CommonConfig *config.CommonConfig
ScrapeInterval time.Duration
Regions []ec2Types.Region
RegionClients map[string]ec2client.EC2
Logger *slog.Logger
}

func newCollectorMetrics(instanceLabel string) *CollectorMetrics {
return &CollectorMetrics{
InstanceCPUHourlyCostDesc: prometheus.NewDesc(
prometheus.BuildFQName(cloudcostexporter.MetricPrefix, subsystem, "instance_cpu_usd_per_core_hour"),
"The cpu cost a ec2 instance in USD/(core*h)",
[]string{instanceLabel, "region", "family", "machine_type", "cluster_name", "price_tier"},
nil,
),
InstanceMemoryHourlyCostDesc: prometheus.NewDesc(
prometheus.BuildFQName(cloudcostexporter.MetricPrefix, subsystem, "instance_memory_usd_per_gib_hour"),
"The memory cost of a ec2 instance in USD/(GiB*h)",
[]string{instanceLabel, "region", "family", "machine_type", "cluster_name", "price_tier"},
nil,
),
InstanceTotalHourlyCostDesc: prometheus.NewDesc(
prometheus.BuildFQName(cloudcostexporter.MetricPrefix, subsystem, "instance_total_usd_per_hour"),
"The total cost of the ec2 instance in USD/h",
[]string{instanceLabel, "region", "family", "machine_type", "cluster_name", "price_tier"},
nil,
),
}
}

// New creates an ec2 collector
func New(config *Config, ps pricingClient.Pricing) *Collector {
logger := config.Logger.With("logger", "ec2")
return &Collector{
ScrapeInterval: config.ScrapeInterval,
Regions: config.Regions,
ec2RegionClients: config.RegionClients,
metrics: newCollectorMetrics(config.CommonConfig.ComputeInstanceLabel),
logger: logger,
pricingService: ps,
pricingMap: NewStructuredPricingMap(),
Expand Down Expand Up @@ -202,9 +214,9 @@ func (c *Collector) emitMetricsFromChannel(reservationsCh chan []ec2Types.Reserv
clusterName,
pricetier,
}
ch <- prometheus.MustNewConstMetric(InstanceCPUHourlyCostDesc, prometheus.GaugeValue, price.Cpu, labelValues...)
ch <- prometheus.MustNewConstMetric(InstanceMemoryHourlyCostDesc, prometheus.GaugeValue, price.Ram, labelValues...)
ch <- prometheus.MustNewConstMetric(InstanceTotalHourlyCostDesc, prometheus.GaugeValue, price.Total, labelValues...)
ch <- prometheus.MustNewConstMetric(c.metrics.InstanceCPUHourlyCostDesc, prometheus.GaugeValue, price.Cpu, labelValues...)
ch <- prometheus.MustNewConstMetric(c.metrics.InstanceMemoryHourlyCostDesc, prometheus.GaugeValue, price.Ram, labelValues...)
ch <- prometheus.MustNewConstMetric(c.metrics.InstanceTotalHourlyCostDesc, prometheus.GaugeValue, price.Total, labelValues...)
}
}
}
Expand All @@ -215,9 +227,9 @@ func (c *Collector) CheckReadiness() bool {
}

func (c *Collector) Describe(ch chan<- *prometheus.Desc) error {
ch <- InstanceCPUHourlyCostDesc
ch <- InstanceMemoryHourlyCostDesc
ch <- InstanceTotalHourlyCostDesc
ch <- c.metrics.InstanceCPUHourlyCostDesc
ch <- c.metrics.InstanceMemoryHourlyCostDesc
ch <- c.metrics.InstanceTotalHourlyCostDesc
return nil
}

Expand Down
30 changes: 23 additions & 7 deletions pkg/aws/ec2/ec2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"

"github.com/grafana/cloudcost-exporter/cmd/exporter/config"
mockec2 "github.com/grafana/cloudcost-exporter/mocks/pkg/aws/services/ec2"
mockpricing "github.com/grafana/cloudcost-exporter/mocks/pkg/aws/services/pricing"
ec2client "github.com/grafana/cloudcost-exporter/pkg/aws/services/ec2"
Expand Down Expand Up @@ -114,19 +115,22 @@ func TestCollector_ListOnDemandPrices(t *testing.T) {

func TestNewCollector(t *testing.T) {
tests := map[string]struct {
instanceLabel string
region string
profile string
scrapeInternal time.Duration
ps pricingClient.Pricing
ec2s ec2client.EC2
}{
"Empty Region and profile should return a collector": {
instanceLabel: "instance",
region: "",
profile: "",
scrapeInternal: 0,
ps: nil,
},
"Region and profile should return a collector": {
instanceLabel: "instance",
region: "us-east-1",
profile: "default",
scrapeInternal: 0,
Expand All @@ -136,7 +140,8 @@ func TestNewCollector(t *testing.T) {
for name, tt := range tests {
t.Run(name, func(t *testing.T) {
collector := New(&Config{
Logger: logger,
CommonConfig: &config.CommonConfig{ComputeInstanceLabel: tt.instanceLabel},
Logger: logger,
}, tt.ps)
assert.NotNil(t, collector)
})
Expand All @@ -146,21 +151,24 @@ func TestNewCollector(t *testing.T) {
func TestCollector_Name(t *testing.T) {
t.Run("Name should return the same name as the subsystem const", func(t *testing.T) {
collector := New(&Config{
Logger: logger,
CommonConfig: &config.CommonConfig{ComputeInstanceLabel: "instance"},
Logger: logger,
}, nil)
assert.Equal(t, subsystem, collector.Name())
})
}

func TestCollector_Collect(t *testing.T) {
commonConfig := &config.CommonConfig{ComputeInstanceLabel: "node"}
regions := []ec2Types.Region{
{
RegionName: aws.String("us-east-1"),
},
}
t.Run("Collect should return no error", func(t *testing.T) {
collector := New(&Config{
Logger: logger,
CommonConfig: commonConfig,
Logger: logger,
}, nil)
ch := make(chan prometheus.Metric)
go func() {
Expand All @@ -178,8 +186,9 @@ func TestCollector_Collect(t *testing.T) {
return nil, assert.AnError
}).Times(1)
collector := New(&Config{
Regions: regions,
Logger: logger,
CommonConfig: commonConfig,
Regions: regions,
Logger: logger,
}, ps)
ch := make(chan prometheus.Metric)
err := collector.Collect(ch)
Expand All @@ -196,8 +205,9 @@ func TestCollector_Collect(t *testing.T) {
}, nil
}).Times(1)
collector := New(&Config{
Regions: regions,
Logger: logger,
CommonConfig: commonConfig,
Regions: regions,
Logger: logger,
}, ps)
ch := make(chan prometheus.Metric)
err := collector.Collect(ch)
Expand All @@ -224,6 +234,7 @@ func TestCollector_Collect(t *testing.T) {
regionClientMap[*r.RegionName] = ec2s
}
collector := New(&Config{
CommonConfig: commonConfig,
Regions: regions,
RegionClients: regionClientMap,
Logger: logger,
Expand Down Expand Up @@ -263,6 +274,7 @@ func TestCollector_Collect(t *testing.T) {
regionClientMap[*r.RegionName] = ec2s
}
collector := New(&Config{
CommonConfig: commonConfig,
Regions: regions,
RegionClients: regionClientMap,
Logger: logger,
Expand Down Expand Up @@ -358,6 +370,7 @@ func TestCollector_Collect(t *testing.T) {
regionClientMap[*r.RegionName] = ec2s
}
collector := New(&Config{
CommonConfig: commonConfig,
Regions: regions,
RegionClients: regionClientMap,
Logger: logger,
Expand All @@ -377,5 +390,8 @@ func TestCollector_Collect(t *testing.T) {
metrics = append(metrics, utils.ReadMetrics(metric))
}
assert.Len(t, metrics, 6)
// Assert we properly set the instanceLabel
assert.Equal(t, "ip-172-31-0-1.ec2.internal", metrics[0].Labels["node"])
assert.Equal(t, "", metrics[0].Labels["instance"])
})
}
Loading

0 comments on commit a97d718

Please sign in to comment.