From 219e14579f0c0961f1a2ed9ca1c4191f978475b6 Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Thu, 21 Mar 2024 10:55:44 -0400 Subject: [PATCH] all: remove dead code from static mode While grafana/alloy#15 removed static mode and operator from being reachable from the binary, all static mode code remained in the database to be removed later. This commit removes all unreachable code; what remains is constrained to config structre which is still used for config conversion. This commit does accidentally remove code that changed the order of static mode conversion components. This isn't a big deal, and the converted configs are still correct, so that code is being left out rather than being hunted down. (It is likely code that implements an interface but wasn't called directly.) --- Makefile | 12 +- .../testdata-v2/integrations_v2.river | 94 +-- .../testdata-v2/unsupported.river | 8 +- .../staticconvert/testdata/integrations.river | 54 +- .../staticconvert/testdata/integrations.yaml | 2 +- internal/static/agentctl/sync.go | 136 ---- internal/static/agentctl/sync_test.go | 137 ---- .../static/agentctl/testdata/agent-1.yaml | 12 - .../static/agentctl/testdata/agent-2.yaml | 12 - .../static/agentctl/testdata/agent-3.yaml | 12 - internal/static/agentproto/agent.pb.go | 416 ----------- internal/static/agentproto/agent.proto | 20 - internal/static/agentproto/func.go | 21 - internal/static/agentproto/gen.go | 3 - internal/static/client/client.go | 179 ----- .../agent_management_remote_config_test.go | 364 ---------- internal/static/config/agentmanagement.go | 308 -------- .../config/agentmanagement_remote_config.go | 179 ----- .../static/config/agentmanagement_test.go | 460 ------------ internal/static/config/config.go | 130 ---- internal/static/config/config_test.go | 71 +- internal/static/config/integrations.go | 65 -- internal/static/config/integrations_test.go | 69 -- internal/static/config/remote_config.go | 145 ---- internal/static/config/remote_config_test.go | 155 ---- .../integrations/cadvisor/cadvisor_stub.go | 25 +- internal/static/integrations/manager.go | 418 ----------- internal/static/integrations/manager_test.go | 433 ------------ .../static/integrations/stub_integration.go | 27 - .../app_agent_receiver/app_agent_receiver.go | 180 +---- .../app_agent_receiver_test.go | 169 ----- .../v2/app_agent_receiver/handler.go | 126 ---- .../v2/app_agent_receiver/handler_test.go | 356 ---------- .../v2/app_agent_receiver/logs_exporter.go | 140 ---- .../app_agent_receiver/logs_exporter_test.go | 120 ---- .../v2/app_agent_receiver/payload.go | 420 ----------- .../v2/app_agent_receiver/payload_test.go | 142 ---- .../receiver_metrics_exporter.go | 61 -- .../receiver_metrics_test.go | 141 ---- .../v2/app_agent_receiver/sourcemaps.go | 357 ---------- .../v2/app_agent_receiver/sourcemaps_test.go | 495 ------------- .../v2/app_agent_receiver/testdata/foo.js | 39 -- .../v2/app_agent_receiver/testdata/foo.js.map | 1 - .../app_agent_receiver/testdata/payload.json | 330 --------- .../testdata/payload_2.json | 393 ----------- .../v2/app_agent_receiver/traces_exporter.go | 41 -- .../v2/app_agent_receiver/traces_test.go | 53 -- .../v2/app_agent_receiver/utils.go | 84 --- .../v2/app_agent_receiver/utils_test.go | 36 - .../integrations/v2/autoscrape/appender.go | 42 -- .../integrations/v2/autoscrape/autoscrape.go | 266 ------- .../v2/autoscrape/autoscrape_test.go | 118 ---- internal/static/integrations/v2/controller.go | 444 ------------ .../v2/controller_httpintegration_test.go | 259 ------- .../v2/controller_metricsintegration_test.go | 184 ----- .../static/integrations/v2/controller_test.go | 286 -------- .../v2/controller_updateintegration_test.go | 79 --- .../v2/eventhandler/eventhandler.go | 472 ------------- .../v2/eventhandler/eventhandler_test.go | 54 -- .../v2/eventhandler/integration.go | 17 +- .../eventhandler/testdata/eventhandler.cache | 1 - .../static/integrations/v2/integrations.go | 11 - internal/static/integrations/v2/subsystem.go | 180 ----- .../static/integrations/v2/targetgroup.go | 28 - internal/static/integrations/v2/workers.go | 122 ---- internal/static/logs/http.go | 84 --- internal/static/logs/http_test.go | 177 ----- internal/static/logs/logs.go | 222 ------ internal/static/logs/logs_test.go | 206 ------ internal/static/metrics/agent.go | 268 +------ internal/static/metrics/agent_test.go | 226 ------ internal/static/metrics/cleaner.go | 271 ------- internal/static/metrics/cleaner_test.go | 146 ---- .../static/metrics/cluster/client/client.go | 50 -- internal/static/metrics/cluster/cluster.go | 179 ----- .../static/metrics/cluster/config_watcher.go | 340 --------- .../metrics/cluster/config_watcher_test.go | 267 ------- .../static/metrics/cluster/configapi/types.go | 73 -- internal/static/metrics/cluster/node.go | 381 ---------- internal/static/metrics/cluster/node_test.go | 223 ------ internal/static/metrics/cluster/validation.go | 150 ---- .../static/metrics/cluster/validation_test.go | 118 ---- internal/static/metrics/http.go | 166 ----- internal/static/metrics/http_test.go | 142 ---- .../metrics/instance/configstore/api.go | 268 ------- .../metrics/instance/configstore/api_test.go | 408 ----------- .../metrics/instance/configstore/codec.go | 65 -- .../instance/configstore/codec_test.go | 41 -- .../metrics/instance/configstore/errors.go | 27 - .../metrics/instance/configstore/mock.go | 74 -- .../metrics/instance/configstore/remote.go | 471 ------------- .../instance/configstore/remote_test.go | 271 ------- .../metrics/instance/configstore/store.go | 49 -- .../metrics/instance/configstore/unique.go | 35 - internal/static/metrics/instance/errors.go | 44 -- .../static/metrics/instance/group_manager.go | 358 ---------- .../metrics/instance/group_manager_test.go | 446 ------------ .../static/metrics/instance/host_filter.go | 238 ------- .../metrics/instance/host_filter_test.go | 201 ------ internal/static/metrics/instance/instance.go | 661 ------------------ .../instance/instance_integration_test.go | 281 -------- .../static/metrics/instance/instance_test.go | 250 ------- internal/static/metrics/instance/manager.go | 379 ---------- .../static/metrics/instance/manager_test.go | 158 ----- .../static/metrics/instance/modal_manager.go | 178 ----- internal/static/metrics/instance/noop.go | 49 -- internal/static/server/logger.go | 118 ---- internal/static/server/logger_test.go | 58 -- internal/static/server/logger_windows.go | 110 --- internal/static/server/server.go | 429 ------------ internal/static/server/server_test.go | 193 ----- internal/static/server/signal_context.go | 41 -- internal/static/server/tls.go | 152 ---- internal/static/server/tls_certstore_stub.go | 6 - .../static/server/tls_certstore_windows.go | 60 -- internal/static/server/tls_test.go | 68 -- .../static/supportbundle/supportbundle.go | 235 ------- .../automaticloggingprocessor.go | 209 +----- .../automaticloggingprocessor_test.go | 238 ------- internal/static/traces/instance.go | 194 ----- .../traces/remotewriteexporter/exporter.go | 296 +------- .../remotewriteexporter/exporter_test.go | 183 ----- internal/static/traces/traces.go | 111 --- internal/static/traces/traces_test.go | 193 ----- internal/util/log/log.go | 114 --- internal/util/otel_feature_gate.go | 55 +- internal/util/otel_feature_gate_test.go | 4 - internal/util/sanitize.go | 10 - internal/util/structwalk/structwalk.go | 77 -- internal/util/structwalk/structwalk_test.go | 63 -- internal/util/subset/subset.go | 120 ---- internal/util/subset/subset_test.go | 92 --- internal/util/unregisterer.go | 63 -- 133 files changed, 160 insertions(+), 22187 deletions(-) delete mode 100644 internal/static/agentctl/sync.go delete mode 100644 internal/static/agentctl/sync_test.go delete mode 100644 internal/static/agentctl/testdata/agent-1.yaml delete mode 100644 internal/static/agentctl/testdata/agent-2.yaml delete mode 100644 internal/static/agentctl/testdata/agent-3.yaml delete mode 100644 internal/static/agentproto/agent.pb.go delete mode 100644 internal/static/agentproto/agent.proto delete mode 100644 internal/static/agentproto/func.go delete mode 100644 internal/static/agentproto/gen.go delete mode 100644 internal/static/client/client.go delete mode 100644 internal/static/config/agent_management_remote_config_test.go delete mode 100644 internal/static/config/agentmanagement_remote_config.go delete mode 100644 internal/static/config/remote_config.go delete mode 100644 internal/static/config/remote_config_test.go delete mode 100644 internal/static/integrations/manager_test.go delete mode 100644 internal/static/integrations/stub_integration.go delete mode 100644 internal/static/integrations/v2/app_agent_receiver/app_agent_receiver_test.go delete mode 100644 internal/static/integrations/v2/app_agent_receiver/handler.go delete mode 100644 internal/static/integrations/v2/app_agent_receiver/handler_test.go delete mode 100644 internal/static/integrations/v2/app_agent_receiver/logs_exporter.go delete mode 100644 internal/static/integrations/v2/app_agent_receiver/logs_exporter_test.go delete mode 100644 internal/static/integrations/v2/app_agent_receiver/payload.go delete mode 100644 internal/static/integrations/v2/app_agent_receiver/payload_test.go delete mode 100644 internal/static/integrations/v2/app_agent_receiver/receiver_metrics_exporter.go delete mode 100644 internal/static/integrations/v2/app_agent_receiver/receiver_metrics_test.go delete mode 100644 internal/static/integrations/v2/app_agent_receiver/sourcemaps.go delete mode 100644 internal/static/integrations/v2/app_agent_receiver/sourcemaps_test.go delete mode 100644 internal/static/integrations/v2/app_agent_receiver/testdata/foo.js delete mode 100644 internal/static/integrations/v2/app_agent_receiver/testdata/foo.js.map delete mode 100644 internal/static/integrations/v2/app_agent_receiver/testdata/payload.json delete mode 100644 internal/static/integrations/v2/app_agent_receiver/testdata/payload_2.json delete mode 100644 internal/static/integrations/v2/app_agent_receiver/traces_exporter.go delete mode 100644 internal/static/integrations/v2/app_agent_receiver/traces_test.go delete mode 100644 internal/static/integrations/v2/app_agent_receiver/utils.go delete mode 100644 internal/static/integrations/v2/app_agent_receiver/utils_test.go delete mode 100644 internal/static/integrations/v2/autoscrape/appender.go delete mode 100644 internal/static/integrations/v2/autoscrape/autoscrape_test.go delete mode 100644 internal/static/integrations/v2/controller.go delete mode 100644 internal/static/integrations/v2/controller_httpintegration_test.go delete mode 100644 internal/static/integrations/v2/controller_metricsintegration_test.go delete mode 100644 internal/static/integrations/v2/controller_test.go delete mode 100644 internal/static/integrations/v2/controller_updateintegration_test.go delete mode 100644 internal/static/integrations/v2/eventhandler/eventhandler.go delete mode 100644 internal/static/integrations/v2/eventhandler/eventhandler_test.go delete mode 100644 internal/static/integrations/v2/eventhandler/testdata/eventhandler.cache delete mode 100644 internal/static/integrations/v2/targetgroup.go delete mode 100644 internal/static/integrations/v2/workers.go delete mode 100644 internal/static/logs/http.go delete mode 100644 internal/static/logs/http_test.go delete mode 100644 internal/static/logs/logs_test.go delete mode 100644 internal/static/metrics/cleaner.go delete mode 100644 internal/static/metrics/cleaner_test.go delete mode 100644 internal/static/metrics/cluster/cluster.go delete mode 100644 internal/static/metrics/cluster/config_watcher.go delete mode 100644 internal/static/metrics/cluster/config_watcher_test.go delete mode 100644 internal/static/metrics/cluster/configapi/types.go delete mode 100644 internal/static/metrics/cluster/node.go delete mode 100644 internal/static/metrics/cluster/node_test.go delete mode 100644 internal/static/metrics/cluster/validation.go delete mode 100644 internal/static/metrics/cluster/validation_test.go delete mode 100644 internal/static/metrics/http.go delete mode 100644 internal/static/metrics/http_test.go delete mode 100644 internal/static/metrics/instance/configstore/api.go delete mode 100644 internal/static/metrics/instance/configstore/api_test.go delete mode 100644 internal/static/metrics/instance/configstore/codec.go delete mode 100644 internal/static/metrics/instance/configstore/codec_test.go delete mode 100644 internal/static/metrics/instance/configstore/errors.go delete mode 100644 internal/static/metrics/instance/configstore/mock.go delete mode 100644 internal/static/metrics/instance/configstore/remote.go delete mode 100644 internal/static/metrics/instance/configstore/remote_test.go delete mode 100644 internal/static/metrics/instance/configstore/store.go delete mode 100644 internal/static/metrics/instance/configstore/unique.go delete mode 100644 internal/static/metrics/instance/errors.go delete mode 100644 internal/static/metrics/instance/group_manager.go delete mode 100644 internal/static/metrics/instance/group_manager_test.go delete mode 100644 internal/static/metrics/instance/host_filter.go delete mode 100644 internal/static/metrics/instance/host_filter_test.go delete mode 100644 internal/static/metrics/instance/instance_integration_test.go delete mode 100644 internal/static/metrics/instance/manager.go delete mode 100644 internal/static/metrics/instance/manager_test.go delete mode 100644 internal/static/metrics/instance/noop.go delete mode 100644 internal/static/server/logger.go delete mode 100644 internal/static/server/logger_test.go delete mode 100644 internal/static/server/logger_windows.go delete mode 100644 internal/static/server/server_test.go delete mode 100644 internal/static/server/signal_context.go delete mode 100644 internal/static/server/tls_test.go delete mode 100644 internal/static/supportbundle/supportbundle.go delete mode 100644 internal/static/traces/instance.go delete mode 100644 internal/static/traces/remotewriteexporter/exporter_test.go delete mode 100644 internal/static/traces/traces.go delete mode 100644 internal/static/traces/traces_test.go delete mode 100644 internal/util/sanitize.go delete mode 100644 internal/util/structwalk/structwalk.go delete mode 100644 internal/util/structwalk/structwalk_test.go delete mode 100644 internal/util/subset/subset.go delete mode 100644 internal/util/subset/subset_test.go delete mode 100644 internal/util/unregisterer.go diff --git a/Makefile b/Makefile index 47ce69da76..f844732d9a 100644 --- a/Makefile +++ b/Makefile @@ -46,7 +46,6 @@ ## generate-drone Generate the Drone YAML from Jsonnet. ## generate-helm-docs Generate Helm chart documentation. ## generate-helm-tests Generate Helm chart tests. -## generate-protos Generate protobuf files. ## generate-ui Generate the UI assets. ## generate-versioned-files Generate versioned files. ## @@ -219,8 +218,8 @@ agent-boringcrypto-image: # Targets for generating assets # -.PHONY: generate generate-drone generate-helm-docs generate-helm-tests generate-protos generate-ui generate-versioned-files -generate: generate-drone generate-helm-docs generate-helm-tests generate-protos generate-ui generate-versioned-files generate-docs +.PHONY: generate generate-drone generate-helm-docs generate-helm-tests generate-ui generate-versioned-files +generate: generate-drone generate-helm-docs generate-helm-tests generate-ui generate-versioned-files generate-docs generate-drone: drone jsonnet -V BUILD_IMAGE_VERSION=$(BUILD_IMAGE_VERSION) --stream --format --source .drone/drone.jsonnet --target .drone/drone.yml @@ -239,13 +238,6 @@ else bash ./operations/helm/scripts/rebuild-tests.sh endif -generate-protos: -ifeq ($(USE_CONTAINER),1) - $(RERUN_IN_CONTAINER) -else - go generate ./internal/static/agentproto/ -endif - generate-ui: ifeq ($(USE_CONTAINER),1) $(RERUN_IN_CONTAINER) diff --git a/internal/converter/internal/staticconvert/testdata-v2/integrations_v2.river b/internal/converter/internal/staticconvert/testdata-v2/integrations_v2.river index f7c22ade64..b8a1b67208 100644 --- a/internal/converter/internal/staticconvert/testdata-v2/integrations_v2.river +++ b/internal/converter/internal/staticconvert/testdata-v2/integrations_v2.river @@ -21,6 +21,32 @@ logging { format = "json" } +faro.receiver "integrations_app_agent_receiver" { + extra_log_labels = {} + + server { + listen_address = "localhost" + listen_port = 55678 + max_allowed_payload_size = "4MiB786KiB832B" + + rate_limiting { + enabled = true + rate = 100 + burst_size = 50 + } + } + + sourcemaps { + download_from_origins = ["*"] + download_timeout = "1s" + } + + output { + logs = [loki.write.logs_log_config.receiver] + traces = [] + } +} + loki.relabel "integrations_eventhandler" { forward_to = [loki.write.logs_log_config.receiver] @@ -237,27 +263,6 @@ prometheus.scrape "integrations_elasticsearch" { job_name = "integrations/elasticsearch" } -prometheus.exporter.gcp "integrations_gcp_exporter" { - project_ids = [""] - metrics_prefixes = ["loadbalancing.googleapis.com/https/request_bytes_count", "loadbalancing.googleapis.com/https/total_latencies"] - extra_filters = ["loadbalancing.googleapis.com:resource.labels.backend_target_name=\"sample-value\""] -} - -discovery.relabel "integrations_gcp" { - targets = prometheus.exporter.gcp.integrations_gcp_exporter.targets - - rule { - target_label = "job" - replacement = "integrations/gcp" - } -} - -prometheus.scrape "integrations_gcp" { - targets = discovery.relabel.integrations_gcp.output - forward_to = [prometheus.remote_write.metrics_default.receiver] - job_name = "integrations/gcp" -} - prometheus.exporter.github "integrations_github_exporter" { repositories = ["grafana/agent", "grafana/agent-modules"] api_token = "ABCDEFGH-1234-ABCD-1234-ABCDEFGHIJKL" @@ -680,32 +685,6 @@ prometheus.scrape "integrations_apache2" { job_name = "integrations/apache2" } -faro.receiver "integrations_app_agent_receiver" { - extra_log_labels = {} - - server { - listen_address = "localhost" - listen_port = 55678 - max_allowed_payload_size = "4MiB786KiB832B" - - rate_limiting { - enabled = true - rate = 100 - burst_size = 50 - } - } - - sourcemaps { - download_from_origins = ["*"] - download_timeout = "1s" - } - - output { - logs = [loki.write.logs_log_config.receiver] - traces = [] - } -} - prometheus.exporter.blackbox "integrations_blackbox" { config = "modules:\n http_2xx:\n prober: http\n timeout: 5s\n http:\n method: POST\n headers:\n Content-Type: application/json\n body: '{}'\n preferred_ip_protocol: ip4\n" @@ -762,3 +741,24 @@ prometheus.scrape "integrations_snmp" { forward_to = [prometheus.remote_write.metrics_default.receiver] job_name = "integrations/snmp" } + +prometheus.exporter.gcp "integrations_gcp_exporter" { + project_ids = [""] + metrics_prefixes = ["loadbalancing.googleapis.com/https/request_bytes_count", "loadbalancing.googleapis.com/https/total_latencies"] + extra_filters = ["loadbalancing.googleapis.com:resource.labels.backend_target_name=\"sample-value\""] +} + +discovery.relabel "integrations_gcp" { + targets = prometheus.exporter.gcp.integrations_gcp_exporter.targets + + rule { + target_label = "job" + replacement = "integrations/gcp" + } +} + +prometheus.scrape "integrations_gcp" { + targets = discovery.relabel.integrations_gcp.output + forward_to = [prometheus.remote_write.metrics_default.receiver] + job_name = "integrations/gcp" +} diff --git a/internal/converter/internal/staticconvert/testdata-v2/unsupported.river b/internal/converter/internal/staticconvert/testdata-v2/unsupported.river index c9585a88c5..c854f1bd9d 100644 --- a/internal/converter/internal/staticconvert/testdata-v2/unsupported.river +++ b/internal/converter/internal/staticconvert/testdata-v2/unsupported.river @@ -16,10 +16,6 @@ loki.write "logs_log_config" { external_labels = {} } -loki.source.kubernetes_events "integrations_eventhandler" { - forward_to = [loki.write.logs_log_config.receiver] -} - faro.receiver "integrations_app_agent_receiver" { extra_log_labels = {} @@ -45,3 +41,7 @@ faro.receiver "integrations_app_agent_receiver" { traces = [] } } + +loki.source.kubernetes_events "integrations_eventhandler" { + forward_to = [loki.write.logs_log_config.receiver] +} diff --git a/internal/converter/internal/staticconvert/testdata/integrations.river b/internal/converter/internal/staticconvert/testdata/integrations.river index 201f5e99e1..0c7bdaee61 100644 --- a/internal/converter/internal/staticconvert/testdata/integrations.river +++ b/internal/converter/internal/staticconvert/testdata/integrations.river @@ -324,33 +324,6 @@ prometheus.scrape "integrations_elasticsearch_exporter" { } } -prometheus.exporter.gcp "integrations_gcp_exporter" { - project_ids = [""] - metrics_prefixes = ["loadbalancing.googleapis.com/https/request_bytes_count", "loadbalancing.googleapis.com/https/total_latencies"] - extra_filters = ["loadbalancing.googleapis.com:resource.labels.backend_target_name=\"sample-value\""] -} - -discovery.relabel "integrations_gcp_exporter" { - targets = prometheus.exporter.gcp.integrations_gcp_exporter.targets - - rule { - target_label = "job" - replacement = "integrations/gcp_exporter" - } -} - -prometheus.scrape "integrations_gcp_exporter" { - targets = discovery.relabel.integrations_gcp_exporter.output - forward_to = [prometheus.remote_write.integrations.receiver] - job_name = "integrations/gcp_exporter" - - tls_config { - ca_file = "/something7.cert" - cert_file = "/something8.cert" - key_file = "/something9.cert" - } -} - prometheus.exporter.github "integrations_github_exporter" { repositories = ["grafana/agent", "grafana/agent-modules"] api_token = "ABCDEFGH-1234-ABCD-1234-ABCDEFGHIJKL" @@ -780,3 +753,30 @@ prometheus.scrape "integrations_statsd_exporter" { key_file = "/something9.cert" } } + +prometheus.exporter.gcp "integrations_gcp_exporter" { + project_ids = [""] + metrics_prefixes = ["loadbalancing.googleapis.com/https/request_bytes_count", "loadbalancing.googleapis.com/https/total_latencies"] + extra_filters = ["loadbalancing.googleapis.com:resource.labels.backend_target_name=\"sample-value\""] +} + +discovery.relabel "integrations_gcp_exporter" { + targets = prometheus.exporter.gcp.integrations_gcp_exporter.targets + + rule { + target_label = "job" + replacement = "integrations/gcp_exporter" + } +} + +prometheus.scrape "integrations_gcp_exporter" { + targets = discovery.relabel.integrations_gcp_exporter.output + forward_to = [prometheus.remote_write.integrations.receiver] + job_name = "integrations/gcp_exporter" + + tls_config { + ca_file = "/something7.cert" + cert_file = "/something8.cert" + key_file = "/something9.cert" + } +} diff --git a/internal/converter/internal/staticconvert/testdata/integrations.yaml b/internal/converter/internal/staticconvert/testdata/integrations.yaml index ced11cf6d9..337eaa01cf 100644 --- a/internal/converter/internal/staticconvert/testdata/integrations.yaml +++ b/internal/converter/internal/staticconvert/testdata/integrations.yaml @@ -148,7 +148,7 @@ integrations: scrape_integration: true postgres_exporter: enabled: true - data_source_names: + data_source_names: - postgres://postgres:password@localhost:5432/postgres?sslmode=disable relabel_configs: - source_labels: [__address__] diff --git a/internal/static/agentctl/sync.go b/internal/static/agentctl/sync.go deleted file mode 100644 index 3246405921..0000000000 --- a/internal/static/agentctl/sync.go +++ /dev/null @@ -1,136 +0,0 @@ -package agentctl - -import ( - "context" - "errors" - "fmt" - "os" - "path/filepath" - "strings" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/grafana/agent/internal/static/client" - "github.com/grafana/agent/internal/static/metrics/instance" -) - -// ConfigSync loads YAML files from a directory and syncs them to the -// provided PrometheusClient API. All YAML files will be synced and -// must be valid. -// -// The base name of the YAML file (i.e., without the file extension) -// is used as the config name. -// -// ConfigSync will completely overwrite the set of active configs -// present in the provided PrometheusClient - configs present in the -// API but not in the directory will be deleted. -func ConfigSync(logger log.Logger, cli client.PrometheusClient, dir string, dryRun bool) error { - if logger == nil { - logger = log.NewNopLogger() - } - - ctx := context.Background() - cfgs, err := ConfigsFromDirectory(dir) - if err != nil { - return err - } - - if dryRun { - level.Info(logger).Log("msg", "config files validated successfully") - return nil - } - - uploaded := make(map[string]struct{}, len(cfgs)) - var hadErrors bool - - for _, cfg := range cfgs { - level.Info(logger).Log("msg", "uploading config", "name", cfg.Name) - err := cli.PutConfiguration(ctx, cfg.Name, cfg) - if err != nil { - level.Error(logger).Log("msg", "failed to upload config", "name", cfg.Name, "err", err) - hadErrors = true - } - uploaded[cfg.Name] = struct{}{} - } - - existing, err := cli.ListConfigs(ctx) - if err != nil { - return fmt.Errorf("could not list configs: %w", err) - } - - // Delete configs from the existing API list that we didn't upload. - for _, existing := range existing.Configs { - if _, existsLocally := uploaded[existing]; !existsLocally { - level.Info(logger).Log("msg", "deleting config", "name", existing) - err := cli.DeleteConfiguration(ctx, existing) - if err != nil { - level.Error(logger).Log("msg", "failed to delete outdated config", "name", existing, "err", err) - hadErrors = true - } - } - } - - if hadErrors { - return errors.New("one or more configurations failed to be modified; check the logs for more details") - } - - return nil -} - -// ConfigsFromDirectory parses all YAML files from a directory and -// loads each as an instance.Config. -func ConfigsFromDirectory(dir string) ([]*instance.Config, error) { - var files []string - err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - if info.IsDir() { - if dir == path { - return nil - } - return filepath.SkipDir - } - - if strings.HasSuffix(path, ".yaml") || strings.HasSuffix(path, ".yml") { - files = append(files, path) - } - return nil - }) - if err != nil { - return nil, err - } - - var configs []*instance.Config - for _, file := range files { - cfg, err := configFromFile(file) - if err != nil { - return nil, err - } - configs = append(configs, cfg) - } - - return configs, nil -} - -func configFromFile(path string) (*instance.Config, error) { - var ( - fileName = filepath.Base(path) - configName = strings.TrimSuffix(fileName, filepath.Ext(fileName)) - ) - - f, err := os.Open(path) - if f != nil { - defer f.Close() - } - if err != nil { - return nil, err - } - - cfg, err := instance.UnmarshalConfig(f) - if err != nil { - return nil, err - } - cfg.Name = configName - return cfg, nil -} diff --git a/internal/static/agentctl/sync_test.go b/internal/static/agentctl/sync_test.go deleted file mode 100644 index 8cd490256c..0000000000 --- a/internal/static/agentctl/sync_test.go +++ /dev/null @@ -1,137 +0,0 @@ -package agentctl - -import ( - "context" - "errors" - "testing" - - "github.com/grafana/agent/internal/static/metrics/cluster/configapi" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/stretchr/testify/require" -) - -func TestConfigSync_EmptyStore(t *testing.T) { - cli := &mockFuncPromClient{} - cli.ListConfigsFunc = func(_ context.Context) (*configapi.ListConfigurationsResponse, error) { - return &configapi.ListConfigurationsResponse{}, nil - } - - var putConfigs []string - cli.PutConfigurationFunc = func(_ context.Context, name string, _ *instance.Config) error { - putConfigs = append(putConfigs, name) - return nil - } - - err := ConfigSync(nil, cli, "./testdata", false) - require.NoError(t, err) - - expect := []string{ - "agent-1", - "agent-2", - "agent-3", - } - require.Equal(t, expect, putConfigs) -} - -func TestConfigSync_PrepopulatedStore(t *testing.T) { - cli := &mockFuncPromClient{} - cli.ListConfigsFunc = func(_ context.Context) (*configapi.ListConfigurationsResponse, error) { - return &configapi.ListConfigurationsResponse{ - Configs: []string{"delete-a", "agent-1", "delete-b", "delete-c"}, - }, nil - } - - var putConfigs []string - cli.PutConfigurationFunc = func(_ context.Context, name string, _ *instance.Config) error { - putConfigs = append(putConfigs, name) - return nil - } - - var deletedConfigs []string - cli.DeleteConfigurationFunc = func(_ context.Context, name string) error { - deletedConfigs = append(deletedConfigs, name) - return nil - } - - err := ConfigSync(nil, cli, "./testdata", false) - require.NoError(t, err) - - expectUpdated := []string{ - "agent-1", - "agent-2", - "agent-3", - } - require.Equal(t, expectUpdated, putConfigs) - - expectDeleted := []string{ - "delete-a", - "delete-b", - "delete-c", - } - require.Equal(t, expectDeleted, deletedConfigs) -} - -func TestConfigSync_DryRun(t *testing.T) { - cli := &mockFuncPromClient{} - cli.ListConfigsFunc = func(_ context.Context) (*configapi.ListConfigurationsResponse, error) { - return &configapi.ListConfigurationsResponse{ - Configs: []string{"delete-a", "agent-1", "delete-b", "delete-c"}, - }, nil - } - - cli.PutConfigurationFunc = func(_ context.Context, name string, _ *instance.Config) error { - t.FailNow() - return nil - } - - cli.DeleteConfigurationFunc = func(_ context.Context, name string) error { - t.FailNow() - return nil - } - - err := ConfigSync(nil, cli, "./testdata", true) - require.NoError(t, err) -} - -type mockFuncPromClient struct { - InstancesFunc func(ctx context.Context) ([]string, error) - ListConfigsFunc func(ctx context.Context) (*configapi.ListConfigurationsResponse, error) - GetConfigurationFunc func(ctx context.Context, name string) (*instance.Config, error) - PutConfigurationFunc func(ctx context.Context, name string, cfg *instance.Config) error - DeleteConfigurationFunc func(ctx context.Context, name string) error -} - -func (m mockFuncPromClient) Instances(ctx context.Context) ([]string, error) { - if m.InstancesFunc != nil { - return m.InstancesFunc(ctx) - } - return nil, errors.New("not implemented") -} - -func (m mockFuncPromClient) ListConfigs(ctx context.Context) (*configapi.ListConfigurationsResponse, error) { - if m.ListConfigsFunc != nil { - return m.ListConfigsFunc(ctx) - } - return nil, errors.New("not implemented") -} - -func (m mockFuncPromClient) GetConfiguration(ctx context.Context, name string) (*instance.Config, error) { - if m.GetConfigurationFunc != nil { - return m.GetConfigurationFunc(ctx, name) - } - return nil, errors.New("not implemented") -} - -func (m mockFuncPromClient) PutConfiguration(ctx context.Context, name string, cfg *instance.Config) error { - if m.PutConfigurationFunc != nil { - return m.PutConfigurationFunc(ctx, name, cfg) - } - return errors.New("not implemented") -} - -func (m mockFuncPromClient) DeleteConfiguration(ctx context.Context, name string) error { - if m.DeleteConfigurationFunc != nil { - return m.DeleteConfigurationFunc(ctx, name) - } - return errors.New("not implemented") -} diff --git a/internal/static/agentctl/testdata/agent-1.yaml b/internal/static/agentctl/testdata/agent-1.yaml deleted file mode 100644 index d62ce80496..0000000000 --- a/internal/static/agentctl/testdata/agent-1.yaml +++ /dev/null @@ -1,12 +0,0 @@ -host_filter: false -write_stale_on_shutdown: true -scrape_configs: - - job_name: agent-1 - static_configs: - - targets: ['agent-1:12345'] - labels: - cluster: 'testdata' - origin: 'agent' - container: 'agent-1' -remote_write: - - url: http://cortex:9009/api/prom/push diff --git a/internal/static/agentctl/testdata/agent-2.yaml b/internal/static/agentctl/testdata/agent-2.yaml deleted file mode 100644 index d38252fb52..0000000000 --- a/internal/static/agentctl/testdata/agent-2.yaml +++ /dev/null @@ -1,12 +0,0 @@ -host_filter: false -write_stale_on_shutdown: true -scrape_configs: - - job_name: agent-2 - static_configs: - - targets: ['agent-2:12345'] - labels: - cluster: 'testdata' - origin: 'agent' - container: 'agent-2' -remote_write: - - url: http://cortex:9009/api/prom/push diff --git a/internal/static/agentctl/testdata/agent-3.yaml b/internal/static/agentctl/testdata/agent-3.yaml deleted file mode 100644 index 9312e87078..0000000000 --- a/internal/static/agentctl/testdata/agent-3.yaml +++ /dev/null @@ -1,12 +0,0 @@ -host_filter: false -write_stale_on_shutdown: true -scrape_configs: - - job_name: agent-3 - static_configs: - - targets: ['agent-3:12345'] - labels: - cluster: 'testdata' - origin: 'agent' - container: 'agent-3' -remote_write: - - url: http://cortex:9009/api/prom/push diff --git a/internal/static/agentproto/agent.pb.go b/internal/static/agentproto/agent.pb.go deleted file mode 100644 index 2f5c8bcfff..0000000000 --- a/internal/static/agentproto/agent.pb.go +++ /dev/null @@ -1,416 +0,0 @@ -// Code generated by protoc-gen-gogo. DO NOT EDIT. -// source: agent.proto - -package agentproto - -import ( - context "context" - fmt "fmt" - proto "github.com/gogo/protobuf/proto" - grpc "google.golang.org/grpc" - codes "google.golang.org/grpc/codes" - status "google.golang.org/grpc/status" - emptypb "google.golang.org/protobuf/types/known/emptypb" - io "io" - math "math" - math_bits "math/bits" - reflect "reflect" - strings "strings" -) - -// Reference imports to suppress errors if they are not otherwise used. -var _ = proto.Marshal -var _ = fmt.Errorf -var _ = math.Inf - -// This is a compile-time assertion to ensure that this generated file -// is compatible with the proto package it is being compiled against. -// A compilation error at this line likely means your copy of the -// proto package needs to be updated. -const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package - -type ReshardRequest struct { -} - -func (m *ReshardRequest) Reset() { *m = ReshardRequest{} } -func (*ReshardRequest) ProtoMessage() {} -func (*ReshardRequest) Descriptor() ([]byte, []int) { - return fileDescriptor_56ede974c0020f77, []int{0} -} -func (m *ReshardRequest) XXX_Unmarshal(b []byte) error { - return m.Unmarshal(b) -} -func (m *ReshardRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { - if deterministic { - return xxx_messageInfo_ReshardRequest.Marshal(b, m, deterministic) - } else { - b = b[:cap(b)] - n, err := m.MarshalToSizedBuffer(b) - if err != nil { - return nil, err - } - return b[:n], nil - } -} -func (m *ReshardRequest) XXX_Merge(src proto.Message) { - xxx_messageInfo_ReshardRequest.Merge(m, src) -} -func (m *ReshardRequest) XXX_Size() int { - return m.Size() -} -func (m *ReshardRequest) XXX_DiscardUnknown() { - xxx_messageInfo_ReshardRequest.DiscardUnknown(m) -} - -var xxx_messageInfo_ReshardRequest proto.InternalMessageInfo - -func init() { - proto.RegisterType((*ReshardRequest)(nil), "agentproto.ReshardRequest") -} - -func init() { proto.RegisterFile("agent.proto", fileDescriptor_56ede974c0020f77) } - -var fileDescriptor_56ede974c0020f77 = []byte{ - // 220 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0xe2, 0x4e, 0x4c, 0x4f, 0xcd, - 0x2b, 0xd1, 0x2b, 0x28, 0xca, 0x2f, 0xc9, 0x17, 0xe2, 0x02, 0x73, 0xc0, 0x6c, 0x29, 0xe9, 0xf4, - 0xfc, 0xfc, 0xf4, 0x9c, 0x54, 0x7d, 0x30, 0x2f, 0xa9, 0x34, 0x4d, 0x3f, 0x35, 0xb7, 0xa0, 0xa4, - 0x12, 0xa2, 0x50, 0x49, 0x80, 0x8b, 0x2f, 0x28, 0xb5, 0x38, 0x23, 0xb1, 0x28, 0x25, 0x28, 0xb5, - 0xb0, 0x34, 0xb5, 0xb8, 0xc4, 0x28, 0x80, 0x8b, 0x3f, 0x38, 0xb9, 0x28, 0xb1, 0x20, 0x33, 0x2f, - 0x3d, 0x38, 0xb5, 0xa8, 0x2c, 0x33, 0x39, 0x55, 0xc8, 0x96, 0x8b, 0x1d, 0xaa, 0x48, 0x48, 0x4a, - 0x0f, 0x61, 0xb2, 0x1e, 0xaa, 0x4e, 0x29, 0x31, 0x3d, 0x88, 0x4d, 0x7a, 0x30, 0x9b, 0xf4, 0x5c, - 0x41, 0x36, 0x39, 0xc5, 0x5e, 0x78, 0x28, 0xc7, 0x70, 0xe3, 0xa1, 0x1c, 0xc3, 0x87, 0x87, 0x72, - 0x8c, 0x0d, 0x8f, 0xe4, 0x18, 0x57, 0x3c, 0x92, 0x63, 0x3c, 0xf1, 0x48, 0x8e, 0xf1, 0xc2, 0x23, - 0x39, 0xc6, 0x07, 0x8f, 0xe4, 0x18, 0x5f, 0x3c, 0x92, 0x63, 0xf8, 0xf0, 0x48, 0x8e, 0x71, 0xc2, - 0x63, 0x39, 0x86, 0x0b, 0x8f, 0xe5, 0x18, 0x6e, 0x3c, 0x96, 0x63, 0x88, 0x52, 0x4f, 0xcf, 0x2c, - 0xc9, 0x28, 0x4d, 0xd2, 0x4b, 0xce, 0xcf, 0xd5, 0x4f, 0x2f, 0x4a, 0x4c, 0x4b, 0xcc, 0x4b, 0xd4, - 0x07, 0xdb, 0xad, 0x5f, 0x90, 0x9d, 0xae, 0x8f, 0x70, 0x45, 0x12, 0x1b, 0x98, 0x32, 0x06, 0x04, - 0x00, 0x00, 0xff, 0xff, 0x15, 0xe9, 0x8a, 0xfc, 0x01, 0x01, 0x00, 0x00, -} - -func (this *ReshardRequest) Equal(that interface{}) bool { - if that == nil { - return this == nil - } - - that1, ok := that.(*ReshardRequest) - if !ok { - that2, ok := that.(ReshardRequest) - if ok { - that1 = &that2 - } else { - return false - } - } - if that1 == nil { - return this == nil - } else if this == nil { - return false - } - return true -} -func (this *ReshardRequest) GoString() string { - if this == nil { - return "nil" - } - s := make([]string, 0, 4) - s = append(s, "&agentproto.ReshardRequest{") - s = append(s, "}") - return strings.Join(s, "") -} -func valueToGoStringAgent(v interface{}, typ string) string { - rv := reflect.ValueOf(v) - if rv.IsNil() { - return "nil" - } - pv := reflect.Indirect(rv).Interface() - return fmt.Sprintf("func(v %v) *%v { return &v } ( %#v )", typ, typ, pv) -} - -// Reference imports to suppress errors if they are not otherwise used. -var _ context.Context -var _ grpc.ClientConn - -// This is a compile-time assertion to ensure that this generated file -// is compatible with the grpc package it is being compiled against. -const _ = grpc.SupportPackageIsVersion4 - -// ScrapingServiceClient is the client API for ScrapingService service. -// -// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://godoc.org/google.golang.org/grpc#ClientConn.NewStream. -type ScrapingServiceClient interface { - // Reshard tells the implementing service to reshard all of its running - // configs. - Reshard(ctx context.Context, in *ReshardRequest, opts ...grpc.CallOption) (*emptypb.Empty, error) -} - -type scrapingServiceClient struct { - cc *grpc.ClientConn -} - -func NewScrapingServiceClient(cc *grpc.ClientConn) ScrapingServiceClient { - return &scrapingServiceClient{cc} -} - -func (c *scrapingServiceClient) Reshard(ctx context.Context, in *ReshardRequest, opts ...grpc.CallOption) (*emptypb.Empty, error) { - out := new(emptypb.Empty) - err := c.cc.Invoke(ctx, "/agentproto.ScrapingService/Reshard", in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -// ScrapingServiceServer is the server API for ScrapingService service. -type ScrapingServiceServer interface { - // Reshard tells the implementing service to reshard all of its running - // configs. - Reshard(context.Context, *ReshardRequest) (*emptypb.Empty, error) -} - -// UnimplementedScrapingServiceServer can be embedded to have forward compatible implementations. -type UnimplementedScrapingServiceServer struct { -} - -func (*UnimplementedScrapingServiceServer) Reshard(ctx context.Context, req *ReshardRequest) (*emptypb.Empty, error) { - return nil, status.Errorf(codes.Unimplemented, "method Reshard not implemented") -} - -func RegisterScrapingServiceServer(s *grpc.Server, srv ScrapingServiceServer) { - s.RegisterService(&_ScrapingService_serviceDesc, srv) -} - -func _ScrapingService_Reshard_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(ReshardRequest) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(ScrapingServiceServer).Reshard(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: "/agentproto.ScrapingService/Reshard", - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(ScrapingServiceServer).Reshard(ctx, req.(*ReshardRequest)) - } - return interceptor(ctx, in, info, handler) -} - -var _ScrapingService_serviceDesc = grpc.ServiceDesc{ - ServiceName: "agentproto.ScrapingService", - HandlerType: (*ScrapingServiceServer)(nil), - Methods: []grpc.MethodDesc{ - { - MethodName: "Reshard", - Handler: _ScrapingService_Reshard_Handler, - }, - }, - Streams: []grpc.StreamDesc{}, - Metadata: "agent.proto", -} - -func (m *ReshardRequest) Marshal() (dAtA []byte, err error) { - size := m.Size() - dAtA = make([]byte, size) - n, err := m.MarshalToSizedBuffer(dAtA[:size]) - if err != nil { - return nil, err - } - return dAtA[:n], nil -} - -func (m *ReshardRequest) MarshalTo(dAtA []byte) (int, error) { - size := m.Size() - return m.MarshalToSizedBuffer(dAtA[:size]) -} - -func (m *ReshardRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) { - i := len(dAtA) - _ = i - var l int - _ = l - return len(dAtA) - i, nil -} - -func encodeVarintAgent(dAtA []byte, offset int, v uint64) int { - offset -= sovAgent(v) - base := offset - for v >= 1<<7 { - dAtA[offset] = uint8(v&0x7f | 0x80) - v >>= 7 - offset++ - } - dAtA[offset] = uint8(v) - return base -} -func (m *ReshardRequest) Size() (n int) { - if m == nil { - return 0 - } - var l int - _ = l - return n -} - -func sovAgent(x uint64) (n int) { - return (math_bits.Len64(x|1) + 6) / 7 -} -func sozAgent(x uint64) (n int) { - return sovAgent(uint64((x << 1) ^ uint64((int64(x) >> 63)))) -} -func (this *ReshardRequest) String() string { - if this == nil { - return "nil" - } - s := strings.Join([]string{`&ReshardRequest{`, - `}`, - }, "") - return s -} -func valueToStringAgent(v interface{}) string { - rv := reflect.ValueOf(v) - if rv.IsNil() { - return "nil" - } - pv := reflect.Indirect(rv).Interface() - return fmt.Sprintf("*%v", pv) -} -func (m *ReshardRequest) Unmarshal(dAtA []byte) error { - l := len(dAtA) - iNdEx := 0 - for iNdEx < l { - preIndex := iNdEx - var wire uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowAgent - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - wire |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - fieldNum := int32(wire >> 3) - wireType := int(wire & 0x7) - if wireType == 4 { - return fmt.Errorf("proto: ReshardRequest: wiretype end group for non-group") - } - if fieldNum <= 0 { - return fmt.Errorf("proto: ReshardRequest: illegal tag %d (wire type %d)", fieldNum, wire) - } - switch fieldNum { - default: - iNdEx = preIndex - skippy, err := skipAgent(dAtA[iNdEx:]) - if err != nil { - return err - } - if skippy < 0 { - return ErrInvalidLengthAgent - } - if (iNdEx + skippy) < 0 { - return ErrInvalidLengthAgent - } - if (iNdEx + skippy) > l { - return io.ErrUnexpectedEOF - } - iNdEx += skippy - } - } - - if iNdEx > l { - return io.ErrUnexpectedEOF - } - return nil -} -func skipAgent(dAtA []byte) (n int, err error) { - l := len(dAtA) - iNdEx := 0 - depth := 0 - for iNdEx < l { - var wire uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return 0, ErrIntOverflowAgent - } - if iNdEx >= l { - return 0, io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - wire |= (uint64(b) & 0x7F) << shift - if b < 0x80 { - break - } - } - wireType := int(wire & 0x7) - switch wireType { - case 0: - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return 0, ErrIntOverflowAgent - } - if iNdEx >= l { - return 0, io.ErrUnexpectedEOF - } - iNdEx++ - if dAtA[iNdEx-1] < 0x80 { - break - } - } - case 1: - iNdEx += 8 - case 2: - var length int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return 0, ErrIntOverflowAgent - } - if iNdEx >= l { - return 0, io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - length |= (int(b) & 0x7F) << shift - if b < 0x80 { - break - } - } - if length < 0 { - return 0, ErrInvalidLengthAgent - } - iNdEx += length - case 3: - depth++ - case 4: - if depth == 0 { - return 0, ErrUnexpectedEndOfGroupAgent - } - depth-- - case 5: - iNdEx += 4 - default: - return 0, fmt.Errorf("proto: illegal wireType %d", wireType) - } - if iNdEx < 0 { - return 0, ErrInvalidLengthAgent - } - if depth == 0 { - return iNdEx, nil - } - } - return 0, io.ErrUnexpectedEOF -} - -var ( - ErrInvalidLengthAgent = fmt.Errorf("proto: negative length found during unmarshaling") - ErrIntOverflowAgent = fmt.Errorf("proto: integer overflow") - ErrUnexpectedEndOfGroupAgent = fmt.Errorf("proto: unexpected end of group") -) diff --git a/internal/static/agentproto/agent.proto b/internal/static/agentproto/agent.proto deleted file mode 100644 index 405f7779c4..0000000000 --- a/internal/static/agentproto/agent.proto +++ /dev/null @@ -1,20 +0,0 @@ -syntax = "proto3"; - -package agentproto; -option go_package = "github.com/grafana/agent/internal/static/agentproto"; - -import "google/protobuf/empty.proto"; - -// ScrapingService holds methods that can be called against a Prometheus -// Scraping Service instance. -// -// These methods are only available when the agent config file has enabled the -// scraping service mode. If the scraping service mode is not enabling, -// invoking any of the RPCs here will return a not found error. -service ScrapingService { - // Reshard tells the implementing service to reshard all of its running - // configs. - rpc Reshard(ReshardRequest) returns (google.protobuf.Empty); -} - -message ReshardRequest {} diff --git a/internal/static/agentproto/func.go b/internal/static/agentproto/func.go deleted file mode 100644 index 64bf9cc204..0000000000 --- a/internal/static/agentproto/func.go +++ /dev/null @@ -1,21 +0,0 @@ -package agentproto - -import ( - "context" - - empty "github.com/golang/protobuf/ptypes/empty" -) - -// FuncScrapingServiceServer is an implementation of ScrapingServiceServer that -// uses function fields to implement the interface. Useful for tests. -type FuncScrapingServiceServer struct { - ReshardFunc func(context.Context, *ReshardRequest) (*empty.Empty, error) -} - -// Reshard implements ScrapingServiceServer. -func (f *FuncScrapingServiceServer) Reshard(ctx context.Context, req *ReshardRequest) (*empty.Empty, error) { - if f.ReshardFunc != nil { - return f.ReshardFunc(ctx, req) - } - panic("ReshardFunc is nil") -} diff --git a/internal/static/agentproto/gen.go b/internal/static/agentproto/gen.go deleted file mode 100644 index 591102b899..0000000000 --- a/internal/static/agentproto/gen.go +++ /dev/null @@ -1,3 +0,0 @@ -package agentproto - -//go:generate protoc --gogoslick_out=Mgoogle/protobuf/timestamp.proto=github.com/gogo/protobuf/types,plugins=grpc,paths=source_relative:./ ./agent.proto diff --git a/internal/static/client/client.go b/internal/static/client/client.go deleted file mode 100644 index 68048cbce3..0000000000 --- a/internal/static/client/client.go +++ /dev/null @@ -1,179 +0,0 @@ -// Package client provides a client interface to the Agent HTTP -// API. -package client - -import ( - "bytes" - "context" - "encoding/json" - "fmt" - "io" - "net/http" - "strings" - - "github.com/grafana/agent/internal/static/metrics/cluster/configapi" - "github.com/grafana/agent/internal/static/metrics/instance" - "gopkg.in/yaml.v2" -) - -// Client is a collection of all subsystem clients. -type Client struct { - PrometheusClient -} - -// New creates a new Client. -func New(addr string) *Client { - return &Client{ - PrometheusClient: &prometheusClient{addr: addr}, - } -} - -// PrometheusClient is the client interface to the API exposed by the -// Prometheus subsystem of the Grafana Agent. -type PrometheusClient interface { - // Instances runs the list of currently running instances. - Instances(ctx context.Context) ([]string, error) - - // The following methods are for the scraping service mode - // only and will fail when not enabled on the Agent. - - // ListConfigs runs the list of instance configs stored in the config - // management KV store. - ListConfigs(ctx context.Context) (*configapi.ListConfigurationsResponse, error) - - // GetConfiguration returns a named configuration from the config - // management KV store. - GetConfiguration(ctx context.Context, name string) (*instance.Config, error) - - // PutConfiguration adds or updates a named configuration into the - // config management KV store. - PutConfiguration(ctx context.Context, name string, cfg *instance.Config) error - - // DeleteConfiguration removes a named configuration from the config - // management KV store. - DeleteConfiguration(ctx context.Context, name string) error -} - -type prometheusClient struct { - addr string -} - -func (c *prometheusClient) Instances(ctx context.Context) ([]string, error) { - url := fmt.Sprintf("%s/agent/api/v1/metrics/instances", c.addr) - - resp, err := c.doRequest(ctx, "GET", url, nil) - if err != nil { - return nil, err - } - - var data []string - err = unmarshalPrometheusAPIResponse(resp.Body, &data) - return data, err -} - -func (c *prometheusClient) ListConfigs(ctx context.Context) (*configapi.ListConfigurationsResponse, error) { - url := fmt.Sprintf("%s/agent/api/v1/configs", c.addr) - - resp, err := c.doRequest(ctx, "GET", url, nil) - if err != nil { - return nil, err - } - - var data configapi.ListConfigurationsResponse - err = unmarshalPrometheusAPIResponse(resp.Body, &data) - return &data, err -} - -func (c *prometheusClient) GetConfiguration(ctx context.Context, name string) (*instance.Config, error) { - url := fmt.Sprintf("%s/agent/api/v1/configs/%s", c.addr, name) - - resp, err := c.doRequest(ctx, "GET", url, nil) - if err != nil { - return nil, err - } - - var data configapi.GetConfigurationResponse - if err := unmarshalPrometheusAPIResponse(resp.Body, &data); err != nil { - return nil, err - } - - var config instance.Config - err = yaml.NewDecoder(strings.NewReader(data.Value)).Decode(&config) - return &config, err -} - -func (c *prometheusClient) PutConfiguration(ctx context.Context, name string, cfg *instance.Config) error { - url := fmt.Sprintf("%s/agent/api/v1/config/%s", c.addr, name) - - bb, err := instance.MarshalConfig(cfg, false) - if err != nil { - return err - } - - resp, err := c.doRequest(ctx, "POST", url, bytes.NewReader(bb)) - if err != nil { - return err - } - - return unmarshalPrometheusAPIResponse(resp.Body, nil) -} - -func (c *prometheusClient) DeleteConfiguration(ctx context.Context, name string) error { - url := fmt.Sprintf("%s/agent/api/v1/config/%s", c.addr, name) - - resp, err := c.doRequest(ctx, "DELETE", url, nil) - if err != nil { - return err - } - - return unmarshalPrometheusAPIResponse(resp.Body, nil) -} - -func (c *prometheusClient) doRequest(ctx context.Context, method string, url string, body io.Reader) (*http.Response, error) { - req, err := http.NewRequestWithContext(ctx, method, url, body) - if err != nil { - return nil, err - } - return http.DefaultClient.Do(req) -} - -// unmarshalPrometheusAPIResponse will unmarshal a response from the Prometheus -// subsystem API. -// -// r will be closed after this method is called. -func unmarshalPrometheusAPIResponse(r io.ReadCloser, v interface{}) error { - defer func() { - _ = r.Close() - }() - - resp := struct { - Status string `json:"status"` - Data json.RawMessage `json:"data"` - }{} - - err := json.NewDecoder(r).Decode(&resp) - if err != nil { - return fmt.Errorf("could not read response: %w", err) - } - - if v != nil && resp.Status == "success" { - err := json.Unmarshal(resp.Data, v) - if err != nil { - return fmt.Errorf("unmarshaling response: %w", err) - } - } else if resp.Status == "error" { - var errResp configapi.ErrorResponse - err := json.Unmarshal(resp.Data, &errResp) - if err != nil { - return fmt.Errorf("unmarshaling error: %w", err) - } - - return fmt.Errorf("%s", errResp.Error) - } - - if resp.Status != "success" && resp.Status != "error" { - return fmt.Errorf("unknown API response status: %s", resp.Status) - } - - return nil -} diff --git a/internal/static/config/agent_management_remote_config_test.go b/internal/static/config/agent_management_remote_config_test.go deleted file mode 100644 index 820801cf70..0000000000 --- a/internal/static/config/agent_management_remote_config_test.go +++ /dev/null @@ -1,364 +0,0 @@ -package config - -import ( - "testing" - "time" - - process_exporter "github.com/grafana/agent/internal/static/integrations/process_exporter" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/model/labels" - "github.com/stretchr/testify/require" -) - -func TestBuildRemoteConfig(t *testing.T) { - baseConfig := ` -server: - log_level: debug -` - metricsSnippets := []Snippet{{ - Config: ` -metrics_scrape_configs: - - job_name: 'prometheus' - scrape_interval: 15s - static_configs: - - targets: ['localhost:9090'] -`, - }} - logsSnippets := []Snippet{{ - Config: ` -logs_scrape_configs: - - job_name: 'loki' - static_configs: - - targets: ['localhost:3100'] -`, - }} - integrationSnippets := []Snippet{{ - Config: ` -integration_configs: - agent: - enabled: true - relabel_configs: - - action: replace - source_labels: - - agent_hostname - target_label: instance -`, - }} - - allSnippets := []Snippet{} - allSnippets = append(allSnippets, metricsSnippets...) - allSnippets = append(allSnippets, logsSnippets...) - allSnippets = append(allSnippets, integrationSnippets...) - - t.Run("only metrics snippets provided", func(t *testing.T) { - rc := RemoteConfig{ - BaseConfig: BaseConfigContent(baseConfig), - Snippets: metricsSnippets, - } - c, err := rc.BuildAgentConfig() - require.NoError(t, err) - require.Equal(t, len(c.Metrics.Configs), 1) - require.Empty(t, c.Logs) - require.Empty(t, c.Integrations.ConfigV1.Integrations) - }) - - t.Run("only log snippets provided", func(t *testing.T) { - rc := RemoteConfig{ - BaseConfig: BaseConfigContent(baseConfig), - Snippets: logsSnippets, - } - c, err := rc.BuildAgentConfig() - require.NoError(t, err) - require.Equal(t, len(c.Logs.Configs), 1) - require.Empty(t, c.Metrics.Configs) - require.Empty(t, c.Integrations.ConfigV1.Integrations) - }) - - t.Run("only integration snippets provided", func(t *testing.T) { - rc := RemoteConfig{ - BaseConfig: BaseConfigContent(baseConfig), - Snippets: integrationSnippets, - } - c, err := rc.BuildAgentConfig() - require.NoError(t, err) - require.Empty(t, c.Metrics.Configs) - require.Empty(t, c.Logs) - require.Equal(t, 1, len(c.Integrations.ConfigV1.Integrations)) - }) - - t.Run("base with already logs, metrics and integrations provided", func(t *testing.T) { - fullConfig := ` -metrics: - configs: - - name: default - scrape_configs: - - job_name: default-prom - static_configs: - - targets: - - localhost:9090 -logs: - positions_directory: /tmp/grafana-agent-positions - configs: - - name: default - scrape_configs: - - job_name: default-loki - static_configs: - - targets: - - localhost:3100 -integrations: - node_exporter: - enabled: true -` - rc := RemoteConfig{ - BaseConfig: BaseConfigContent(fullConfig), - Snippets: allSnippets, - } - c, err := rc.BuildAgentConfig() - require.NoError(t, err) - require.Equal(t, len(c.Logs.Configs), 2) - require.Equal(t, len(c.Metrics.Configs), 2) - require.Equal(t, 2, len(c.Integrations.ConfigV1.Integrations)) - }) - - t.Run("do not override integrations defined in base config with the ones defined in snippets", func(t *testing.T) { - baseConfig := ` -integrations: - node_exporter: - enabled: false -` - - snippets := []Snippet{{ - Config: ` -integration_configs: - node_exporter: - enabled: true`, - }} - - rc := RemoteConfig{ - BaseConfig: BaseConfigContent(baseConfig), - Snippets: snippets, - } - c, err := rc.BuildAgentConfig() - require.NoError(t, err) - require.Equal(t, 1, len(c.Integrations.ConfigV1.Integrations)) - require.False(t, c.Integrations.ConfigV1.Integrations[0].Common.Enabled) - }) - - t.Run("all snippets provided", func(t *testing.T) { - rc := RemoteConfig{ - BaseConfig: BaseConfigContent(baseConfig), - Snippets: allSnippets, - } - c, err := rc.BuildAgentConfig() - require.NoError(t, err) - require.Equal(t, 1, len(c.Logs.Configs)) - require.Equal(t, 1, len(c.Metrics.Configs)) - require.Equal(t, 1, len(c.Integrations.ConfigV1.Integrations)) - - // check some fields to make sure the config was parsed correctly - require.Equal(t, "prometheus", c.Metrics.Configs[0].ScrapeConfigs[0].JobName) - require.Equal(t, "loki", c.Logs.Configs[0].ScrapeConfig[0].JobName) - require.Equal(t, "agent", c.Integrations.ConfigV1.Integrations[0].Name()) - - // make sure defaults for metric snippets are applied - require.Equal(t, instance.DefaultConfig.WALTruncateFrequency, c.Metrics.Configs[0].WALTruncateFrequency) - require.Equal(t, instance.DefaultConfig.HostFilter, c.Metrics.Configs[0].HostFilter) - require.Equal(t, instance.DefaultConfig.MinWALTime, c.Metrics.Configs[0].MinWALTime) - require.Equal(t, instance.DefaultConfig.MaxWALTime, c.Metrics.Configs[0].MaxWALTime) - require.Equal(t, instance.DefaultConfig.RemoteFlushDeadline, c.Metrics.Configs[0].RemoteFlushDeadline) - require.Equal(t, instance.DefaultConfig.WriteStaleOnShutdown, c.Metrics.Configs[0].WriteStaleOnShutdown) - require.Equal(t, instance.DefaultGlobalConfig, c.Metrics.Global) - - // make sure defaults for log snippets are applied - require.Equal(t, 10*time.Second, c.Logs.Configs[0].PositionsConfig.SyncPeriod) - require.Equal(t, "", c.Logs.Configs[0].PositionsConfig.PositionsFile) - require.Equal(t, false, c.Logs.Configs[0].PositionsConfig.IgnoreInvalidYaml) - require.Equal(t, false, c.Logs.Configs[0].TargetConfig.Stdin) - - // make sure defaults for integration snippets are applied - require.Equal(t, true, c.Integrations.ConfigV1.ScrapeIntegrations) - require.Equal(t, true, c.Integrations.ConfigV1.UseHostnameLabel) - require.Equal(t, true, c.Integrations.ConfigV1.ReplaceInstanceLabel) - require.Equal(t, 5*time.Second, c.Integrations.ConfigV1.IntegrationRestartBackoff) - }) - - t.Run("template variables provided", func(t *testing.T) { - baseConfig := ` -server: - log_level: {{.log_level}} -` - templateInsideTemplate := "`{{ .template_inside_template }}`" - snippet := Snippet{ - Config: ` -integration_configs: - process_exporter: - enabled: true - process_names: - - name: "grafana-agent" - cmdline: - - 'grafana-agent' - - name: "{{.nonexistent.foo.bar.baz.bat}}" - cmdline: - - "{{ ` + templateInsideTemplate + ` }}" - # Custom process monitors - {{- range $key, $value := .process_exporter_processes }} - - name: "{{ $value.name }}" - cmdline: - - "{{ $value.cmdline }}" - {{if $value.exe}} - exe: - - "{{ $value.exe }}" - {{end}} - {{- end }} -`, - } - - rc := RemoteConfig{ - BaseConfig: BaseConfigContent(baseConfig), - Snippets: []Snippet{snippet}, - AgentMetadata: AgentMetadata{ - TemplateVariables: map[string]any{ - "log_level": "debug", - "process_exporter_processes": []map[string]string{ - { - "name": "java_processes", - "cmdline": ".*/java", - }, - { - "name": "{{.ExeFull}}:{{.Matches.Cfgfile}}", - "cmdline": `-config.path\\s+(?P\\S+)`, - "exe": "/usr/local/bin/process-exporter", - }, - }, - }, - }, - } - - c, err := rc.BuildAgentConfig() - require.NoError(t, err) - require.Equal(t, 1, len(c.Integrations.ConfigV1.Integrations)) - processExporterConfig := c.Integrations.ConfigV1.Integrations[0].Config.(*process_exporter.Config) - - require.Equal(t, 4, len(processExporterConfig.ProcessExporter)) - - require.Equal(t, "grafana-agent", processExporterConfig.ProcessExporter[0].Name) - require.Equal(t, "grafana-agent", processExporterConfig.ProcessExporter[0].CmdlineRules[0]) - require.Equal(t, 0, len(processExporterConfig.ProcessExporter[0].ExeRules)) - - require.Equal(t, "", processExporterConfig.ProcessExporter[1].Name) - require.Equal(t, "{{ .template_inside_template }}", processExporterConfig.ProcessExporter[1].CmdlineRules[0]) - require.Equal(t, 0, len(processExporterConfig.ProcessExporter[1].ExeRules)) - - require.Equal(t, "java_processes", processExporterConfig.ProcessExporter[2].Name) - require.Equal(t, ".*/java", processExporterConfig.ProcessExporter[2].CmdlineRules[0]) - require.Equal(t, 0, len(processExporterConfig.ProcessExporter[2].ExeRules)) - - require.Equal(t, "{{.ExeFull}}:{{.Matches.Cfgfile}}", processExporterConfig.ProcessExporter[3].Name) - require.Equal(t, `-config.path\s+(?P\S+)`, processExporterConfig.ProcessExporter[3].CmdlineRules[0]) - require.Equal(t, "/usr/local/bin/process-exporter", processExporterConfig.ProcessExporter[3].ExeRules[0]) - }) - - t.Run("no external labels provided", func(t *testing.T) { - rc := RemoteConfig{ - BaseConfig: BaseConfigContent(baseConfig), - Snippets: allSnippets, - } - c, err := rc.BuildAgentConfig() - require.NoError(t, err) - require.Equal(t, 1, len(c.Logs.Configs)) - require.Empty(t, c.Metrics.Global.Prometheus.ExternalLabels) - }) - - t.Run("no external labels provided in remote config", func(t *testing.T) { - baseConfig := ` -server: - log_level: debug -metrics: - global: - external_labels: - foo: bar -logs: - global: - clients: - - external_labels: - foo: bar -` - rc := RemoteConfig{ - BaseConfig: BaseConfigContent(baseConfig), - Snippets: allSnippets, - } - c, err := rc.BuildAgentConfig() - require.NoError(t, err) - require.Equal(t, 1, len(c.Logs.Configs)) - require.Equal(t, 1, len(c.Logs.Global.ClientConfigs)) - require.Equal(t, c.Logs.Global.ClientConfigs[0].ExternalLabels.LabelSet, model.LabelSet{"foo": "bar"}) - require.Equal(t, 1, len(c.Metrics.Global.Prometheus.ExternalLabels)) - require.Contains(t, c.Metrics.Global.Prometheus.ExternalLabels, labels.Label{Name: "foo", Value: "bar"}) - }) - - t.Run("external labels provided", func(t *testing.T) { - baseConfig := ` -server: - log_level: debug -metrics: - global: - remote_write: - - url: http://localhost:9090/api/prom/push -logs: - global: - clients: - - url: http://localhost:3100/loki/api/v1/push -` - - rc := RemoteConfig{ - BaseConfig: BaseConfigContent(baseConfig), - Snippets: allSnippets, - AgentMetadata: AgentMetadata{ - ExternalLabels: map[string]string{ - "foo": "bar", - }, - }, - } - c, err := rc.BuildAgentConfig() - require.NoError(t, err) - require.Equal(t, 1, len(c.Logs.Configs)) - require.Equal(t, 1, len(c.Metrics.Configs)) - require.Equal(t, 1, len(c.Logs.Global.ClientConfigs)) - require.Equal(t, c.Logs.Global.ClientConfigs[0].ExternalLabels.LabelSet, model.LabelSet{"foo": "bar"}) - require.Contains(t, c.Metrics.Global.Prometheus.ExternalLabels, labels.Label{Name: "foo", Value: "bar"}) - }) - - t.Run("external labels don't override base config", func(t *testing.T) { - baseConfig := ` -server: - log_level: debug -metrics: - global: - external_labels: - foo: bar -logs: - global: - clients: - - external_labels: - foo: bar -` - rc := RemoteConfig{ - BaseConfig: BaseConfigContent(baseConfig), - Snippets: allSnippets, - AgentMetadata: AgentMetadata{ - ExternalLabels: map[string]string{ - "foo": "baz", - }, - }, - } - c, err := rc.BuildAgentConfig() - require.NoError(t, err) - require.Equal(t, 1, len(c.Logs.Configs)) - require.Equal(t, 1, len(c.Metrics.Configs)) - require.Equal(t, 1, len(c.Logs.Global.ClientConfigs)) - require.Equal(t, c.Logs.Global.ClientConfigs[0].ExternalLabels.LabelSet, model.LabelSet{"foo": "bar"}) - require.Contains(t, c.Metrics.Global.Prometheus.ExternalLabels, labels.Label{Name: "foo", Value: "bar"}) - require.NotContains(t, c.Metrics.Global.Prometheus.ExternalLabels, labels.Label{Name: "foo", Value: "baz"}) - }) -} diff --git a/internal/static/config/agentmanagement.go b/internal/static/config/agentmanagement.go index 9f91ba21a7..7b9c686d8a 100644 --- a/internal/static/config/agentmanagement.go +++ b/internal/static/config/agentmanagement.go @@ -1,202 +1,19 @@ package config import ( - "crypto/sha256" - "encoding/hex" - "encoding/json" "errors" - "flag" "fmt" - "math/rand" - "net/url" - "os" - "path/filepath" "time" - "github.com/go-kit/log/level" - "github.com/grafana/agent/internal/static/config/instrumentation" - "github.com/grafana/agent/internal/static/server" "github.com/prometheus/common/config" - "gopkg.in/yaml.v2" -) - -const ( - cacheFilename = "remote-config-cache.yaml" - apiPath = "/agent-management/api/agent/v2" - labelManagementEnabledHeader = "X-LabelManagementEnabled" - agentIDHeader = "X-AgentID" - agentNamespaceVersionHeader = "X-AgentNamespaceVersion" - agentInfoVersionHeader = "X-AgentInfoVersion" - acceptNotModifiedHeader = "X-AcceptHTTPNotModified" ) var ( - agentInfoVersion string - agentNamespaceVersion string defaultRemoteConfiguration = RemoteConfiguration{ AcceptHTTPNotModified: true, } ) -type remoteConfigProvider interface { - GetCachedRemoteConfig() ([]byte, error) - CacheRemoteConfig(remoteConfigBytes []byte) error - FetchRemoteConfig() ([]byte, error) - GetPollingInterval() time.Duration -} - -type remoteConfigHTTPProvider struct { - InitialConfig *AgentManagementConfig -} - -func newRemoteConfigHTTPProvider(c *Config) (*remoteConfigHTTPProvider, error) { - err := c.AgentManagement.Validate() - if err != nil { - return nil, err - } - return &remoteConfigHTTPProvider{ - InitialConfig: &c.AgentManagement, - }, nil -} - -type remoteConfigCache struct { - InitialConfigHash string `json:"initial_config_hash"` - Config string `json:"config"` -} - -func hashInitialConfig(am AgentManagementConfig) (string, error) { - marshalled, err := yaml.Marshal(am) - if err != nil { - return "", fmt.Errorf("could not marshal initial config: %w", err) - } - hashed := sha256.Sum256(marshalled) - return hex.EncodeToString(hashed[:]), nil -} - -// initialConfigHashCheck checks if the hash of initialConfig matches what is stored in configCache.InitialConfigHash. -// If an error is encountered while hashing initialConfig or the hashes do not match, initialConfigHashCheck -// returns an error. Otherwise, it returns nil. -func initialConfigHashCheck(initialConfig AgentManagementConfig, configCache remoteConfigCache) error { - initialConfigHash, err := hashInitialConfig(initialConfig) - if err != nil { - return err - } - - if !(configCache.InitialConfigHash == initialConfigHash) { - return errors.New("invalid remote config cache: initial config hashes don't match") - } - return nil -} - -// GetCachedRemoteConfig retrieves the cached remote config from the location specified -// in r.AgentManagement.CacheLocation -func (r remoteConfigHTTPProvider) GetCachedRemoteConfig() ([]byte, error) { - cachePath := filepath.Join(r.InitialConfig.RemoteConfiguration.CacheLocation, cacheFilename) - - var configCache remoteConfigCache - buf, err := os.ReadFile(cachePath) - - if err != nil { - return nil, fmt.Errorf("error reading remote config cache: %w", err) - } - - if err := json.Unmarshal(buf, &configCache); err != nil { - return nil, fmt.Errorf("error trying to load cached remote config from file: %w", err) - } - - if err = initialConfigHashCheck(*r.InitialConfig, configCache); err != nil { - return nil, err - } - - return []byte(configCache.Config), nil -} - -// CacheRemoteConfig caches the remote config to the location specified in -// r.AgentManagement.CacheLocation -func (r remoteConfigHTTPProvider) CacheRemoteConfig(remoteConfigBytes []byte) error { - cachePath := filepath.Join(r.InitialConfig.RemoteConfiguration.CacheLocation, cacheFilename) - initialConfigHash, err := hashInitialConfig(*r.InitialConfig) - if err != nil { - return err - } - configCache := remoteConfigCache{ - InitialConfigHash: initialConfigHash, - Config: string(remoteConfigBytes), - } - marshalled, err := json.Marshal(configCache) - if err != nil { - return fmt.Errorf("could not marshal remote config cache: %w", err) - } - return os.WriteFile(cachePath, marshalled, 0666) -} - -// FetchRemoteConfig fetches the raw bytes of the config from a remote API using -// the values in r.AgentManagement. -func (r remoteConfigHTTPProvider) FetchRemoteConfig() ([]byte, error) { - httpClientConfig := &r.InitialConfig.HTTPClientConfig - - dir, err := os.Getwd() - if err != nil { - return nil, fmt.Errorf("failed to get current working directory: %w", err) - } - httpClientConfig.SetDirectory(dir) - - remoteOpts := &remoteOpts{ - HTTPClientConfig: httpClientConfig, - } - - if r.InitialConfig.RemoteConfiguration.LabelManagementEnabled && r.InitialConfig.RemoteConfiguration.AgentID != "" { - remoteOpts.headers = map[string]string{ - labelManagementEnabledHeader: "1", - agentIDHeader: r.InitialConfig.RemoteConfiguration.AgentID, - } - - if agentNamespaceVersion != "" { - remoteOpts.headers[agentNamespaceVersionHeader] = agentNamespaceVersion - } - if agentInfoVersion != "" { - remoteOpts.headers[agentInfoVersionHeader] = agentInfoVersion - } - if r.InitialConfig.RemoteConfiguration.AcceptHTTPNotModified { - remoteOpts.headers[acceptNotModifiedHeader] = "1" - } - } - - url, err := r.InitialConfig.fullUrl() - if err != nil { - return nil, fmt.Errorf("error trying to create full url: %w", err) - } - rc, err := newRemoteProvider(url, remoteOpts) - if err != nil { - return nil, fmt.Errorf("error reading remote config: %w", err) - } - - bb, headers, err := rc.retrieve() - - // If the server returns a 304, return it and the caller will handle it. - var nme notModifiedError - if errors.Is(err, nme) { - return nil, nme - } - - if err != nil { - return nil, fmt.Errorf("error retrieving remote config: %w", err) - } - - nsVersion := headers.Get(agentNamespaceVersionHeader) - infoVersion := headers.Get(agentInfoVersionHeader) - if nsVersion != "" && infoVersion != "" { - agentNamespaceVersion = nsVersion - agentInfoVersion = infoVersion - } - - return bb, nil -} - -func (r remoteConfigHTTPProvider) GetPollingInterval() time.Duration { - return r.InitialConfig.PollingInterval -} - type labelMap map[string]string type RemoteConfiguration struct { @@ -226,131 +43,6 @@ type AgentManagementConfig struct { RemoteConfiguration RemoteConfiguration `yaml:"remote_configuration"` } -// getRemoteConfig gets the remote config specified in the initial config, falling back to a local, cached copy -// of the remote config if the request to the remote fails. If both fail, an empty config and an -// error will be returned. -func getRemoteConfig(expandEnvVars bool, configProvider remoteConfigProvider, log *server.Logger, fs *flag.FlagSet, retry bool) (*Config, error) { - remoteConfigBytes, err := configProvider.FetchRemoteConfig() - if errors.Is(err, notModifiedError{}) { - level.Info(log).Log("msg", "remote config has not changed since last fetch, using cached copy") - remoteConfigBytes, err = configProvider.GetCachedRemoteConfig() - } - if err != nil { - var retryAfterErr retryAfterError - if errors.As(err, &retryAfterErr) && retry { - // In the case that the server is telling us to retry after a time greater than our polling interval, - // the agent should sleep for the duration of the retry-after header. - // - // If the duration of the retry-after is lower than the polling interval, the agent will simply - // fall back to the cache and continue polling at the polling interval, effectively skipping - // this poll. - if retryAfterErr.retryAfter > configProvider.GetPollingInterval() { - level.Info(log).Log("msg", "received retry-after from API, sleeping and falling back to cache", "retry-after", retryAfterErr.retryAfter) - time.Sleep(retryAfterErr.retryAfter) - } else { - level.Info(log).Log("msg", "received retry-after from API, falling back to cache", "retry-after", retryAfterErr.retryAfter) - } - // Return the cached config, as this is the last known good config and a config must be returned here. - return getCachedRemoteConfig(expandEnvVars, configProvider, fs, log) - } - level.Error(log).Log("msg", "could not fetch from API, falling back to cache", "err", err) - return getCachedRemoteConfig(expandEnvVars, configProvider, fs, log) - } - - config, err := loadRemoteConfig(remoteConfigBytes, expandEnvVars, fs) - if err != nil { - level.Error(log).Log("msg", "could not load remote config, falling back to cache", "err", err) - return getCachedRemoteConfig(expandEnvVars, configProvider, fs, log) - } - - level.Info(log).Log("msg", "fetched and loaded remote config from API") - - if err = configProvider.CacheRemoteConfig(remoteConfigBytes); err != nil { - level.Error(log).Log("err", fmt.Errorf("could not cache config locally: %w", err)) - } - return config, nil -} - -// getCachedRemoteConfig gets the cached remote config, falling back to the default config if the cache is invalid or not found. -func getCachedRemoteConfig(expandEnvVars bool, configProvider remoteConfigProvider, fs *flag.FlagSet, log *server.Logger) (*Config, error) { - rc, err := configProvider.GetCachedRemoteConfig() - if err != nil { - level.Error(log).Log("msg", "could not get cached remote config, falling back to default (empty) config", "err", err) - d := DefaultConfig() - instrumentation.InstrumentAgentManagementConfigFallback("empty_config") - return &d, nil - } - instrumentation.InstrumentAgentManagementConfigFallback("cache") - return loadRemoteConfig(rc, expandEnvVars, fs) -} - -// loadRemoteConfig parses and validates the remote config, both syntactically and semantically. -func loadRemoteConfig(remoteConfigBytes []byte, expandEnvVars bool, fs *flag.FlagSet) (*Config, error) { - expandedRemoteConfigBytes, err := performEnvVarExpansion(remoteConfigBytes, expandEnvVars) - if err != nil { - instrumentation.InstrumentInvalidRemoteConfig("env_var_expansion") - return nil, fmt.Errorf("could not expand env vars for remote config: %w", err) - } - - remoteConfig, err := NewRemoteConfig(expandedRemoteConfigBytes) - if err != nil { - instrumentation.InstrumentInvalidRemoteConfig("invalid_yaml") - return nil, fmt.Errorf("could not unmarshal remote config: %w", err) - } - - config, err := remoteConfig.BuildAgentConfig() - if err != nil { - instrumentation.InstrumentInvalidRemoteConfig("invalid_remote_config") - return nil, fmt.Errorf("could not build agent config: %w", err) - } - - if err = config.Validate(fs); err != nil { - instrumentation.InstrumentInvalidRemoteConfig("semantically_invalid_agent_config") - return nil, fmt.Errorf("semantically invalid config received from the API: %w", err) - } - return config, nil -} - -// newRemoteConfigProvider creates a remoteConfigProvider based on the protocol -// specified in c.AgentManagement -func newRemoteConfigProvider(c *Config) (*remoteConfigHTTPProvider, error) { - switch p := c.AgentManagement.Protocol; { - case p == "https" || p == "http": - return newRemoteConfigHTTPProvider(c) - default: - return nil, fmt.Errorf("unsupported protocol for agent management api: %s", p) - } -} - -// fullUrl creates and returns the URL that should be used when querying the Agent Management API, -// including the namespace, base config id, and any labels that have been specified. -func (am *AgentManagementConfig) fullUrl() (string, error) { - fullPath, err := url.JoinPath(am.Protocol+"://", am.Host, apiPath, "namespace", am.RemoteConfiguration.Namespace, "remote_config") - if err != nil { - return "", fmt.Errorf("error trying to join url: %w", err) - } - u, err := url.Parse(fullPath) - if err != nil { - return "", fmt.Errorf("error trying to parse url: %w", err) - } - q := u.Query() - for label, value := range am.RemoteConfiguration.Labels { - q.Add(label, value) - } - u.RawQuery = q.Encode() - return u.String(), nil -} - -// SleepTime returns the duration in between config fetches. -func (am *AgentManagementConfig) SleepTime() time.Duration { - return am.PollingInterval -} - -// jitterTime returns a random duration in the range [0, am.PollingInterval). -func (am *AgentManagementConfig) JitterTime() time.Duration { - return time.Duration(rand.Int63n(int64(am.PollingInterval))) -} - // Validate checks that necessary portions of the config have been set. func (am *AgentManagementConfig) Validate() error { if am.HTTPClientConfig.BasicAuth == nil || am.HTTPClientConfig.BasicAuth.Username == "" || am.HTTPClientConfig.BasicAuth.PasswordFile == "" { diff --git a/internal/static/config/agentmanagement_remote_config.go b/internal/static/config/agentmanagement_remote_config.go deleted file mode 100644 index 6e658e7053..0000000000 --- a/internal/static/config/agentmanagement_remote_config.go +++ /dev/null @@ -1,179 +0,0 @@ -package config - -import ( - "bytes" - "text/template" - - "github.com/grafana/agent/internal/static/integrations" - "github.com/grafana/agent/internal/static/logs" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/prometheus/common/model" - pc "github.com/prometheus/prometheus/config" - "github.com/prometheus/prometheus/model/labels" - "gopkg.in/yaml.v2" -) - -type ( - RemoteConfig struct { - BaseConfig BaseConfigContent `json:"base_config" yaml:"base_config"` - Snippets []Snippet `json:"snippets" yaml:"snippets"` - AgentMetadata AgentMetadata `json:"agent_metadata,omitempty" yaml:"agent_metadata,omitempty"` - } - - // BaseConfigContent is the content of a base config - BaseConfigContent string - - // Snippet is a snippet of configuration returned by the config API. - Snippet struct { - // Config is the snippet of config to be included. - Config string `json:"config" yaml:"config"` - } - - AgentMetadata struct { - ExternalLabels map[string]string `json:"external_labels,omitempty" yaml:"external_labels,omitempty"` - TemplateVariables map[string]any `json:"template_variables,omitempty" yaml:"template_variables,omitempty"` - } - - // SnippetContent defines the internal structure of a snippet configuration. - SnippetContent struct { - // MetricsScrapeConfigs is a YAML containing list of metrics scrape configs. - MetricsScrapeConfigs []*pc.ScrapeConfig `yaml:"metrics_scrape_configs,omitempty"` - - // LogsScrapeConfigs is a YAML containing list of logs scrape configs. - LogsScrapeConfigs []scrapeconfig.Config `yaml:"logs_scrape_configs,omitempty"` - - // IntegrationConfigs is a YAML containing list of integrations. - IntegrationConfigs integrations.ManagerConfig `yaml:"integration_configs,omitempty"` - } -) - -func NewRemoteConfig(buf []byte) (*RemoteConfig, error) { - rc := &RemoteConfig{} - err := yaml.Unmarshal(buf, rc) - if err != nil { - return nil, err - } - return rc, nil -} - -// BuildAgentConfig builds an agent configuration from a base config and a list of snippets -func (rc *RemoteConfig) BuildAgentConfig() (*Config, error) { - baseConfig, err := evaluateTemplate(string(rc.BaseConfig), rc.AgentMetadata.TemplateVariables) - if err != nil { - return nil, err - } - - c := DefaultConfig() - err = yaml.Unmarshal([]byte(baseConfig), &c) - if err != nil { - return nil, err - } - - // For now Agent Management only supports integrations v1 - if err := c.Integrations.setVersion(IntegrationsVersion1); err != nil { - return nil, err - } - - err = appendSnippets(&c, rc.Snippets, rc.AgentMetadata.TemplateVariables) - if err != nil { - return nil, err - } - appendExternalLabels(&c, rc.AgentMetadata.ExternalLabels) - return &c, nil -} - -func appendSnippets(c *Config, snippets []Snippet, templateVars map[string]any) error { - metricsConfigs := instance.DefaultConfig - metricsConfigs.Name = "snippets" - logsConfigs := logs.InstanceConfig{ - Name: "snippets", - ScrapeConfig: []scrapeconfig.Config{}, - } - logsConfigs.Initialize() - integrationConfigs := integrations.DefaultManagerConfig() - - // Map used to identify if an integration is already configured and avoid overriding it - configuredIntegrations := map[string]bool{} - for _, itg := range c.Integrations.ConfigV1.Integrations { - configuredIntegrations[itg.Name()] = true - } - - for _, snippet := range snippets { - snippetConfig, err := evaluateTemplate(snippet.Config, templateVars) - if err != nil { - return err - } - - var snippetContent SnippetContent - err = yaml.Unmarshal([]byte(snippetConfig), &snippetContent) - if err != nil { - return err - } - metricsConfigs.ScrapeConfigs = append(metricsConfigs.ScrapeConfigs, snippetContent.MetricsScrapeConfigs...) - logsConfigs.ScrapeConfig = append(logsConfigs.ScrapeConfig, snippetContent.LogsScrapeConfigs...) - - for _, snip := range snippetContent.IntegrationConfigs.Integrations { - if _, ok := configuredIntegrations[snip.Name()]; !ok { - integrationConfigs.Integrations = append(integrationConfigs.Integrations, snip) - configuredIntegrations[snip.Name()] = true - } - } - } - - if len(metricsConfigs.ScrapeConfigs) > 0 { - c.Metrics.Configs = append(c.Metrics.Configs, metricsConfigs) - } - - if len(logsConfigs.ScrapeConfig) > 0 { - // rc.Config.Logs is initialized as nil, so we need to check if it's nil before appending - if c.Logs == nil { - c.Logs = &logs.Config{ - Configs: []*logs.InstanceConfig{}, - } - } - c.Logs.Configs = append(c.Logs.Configs, &logsConfigs) - } - - c.Integrations.ConfigV1.Integrations = append(c.Integrations.ConfigV1.Integrations, integrationConfigs.Integrations...) - return nil -} - -func appendExternalLabels(c *Config, externalLabels map[string]string) { - // Avoid doing anything if there are no external labels - if len(externalLabels) == 0 { - return - } - // Start off with the existing external labels, which will only be added to (not replaced) - metricsExternalLabels := c.Metrics.Global.Prometheus.ExternalLabels.Map() - for k, v := range externalLabels { - if _, ok := metricsExternalLabels[k]; !ok { - metricsExternalLabels[k] = v - } - } - - logsExternalLabels := make(model.LabelSet) - for k, v := range externalLabels { - logsExternalLabels[model.LabelName(k)] = model.LabelValue(v) - } - - c.Metrics.Global.Prometheus.ExternalLabels = labels.FromMap(metricsExternalLabels) - for i, cc := range c.Logs.Global.ClientConfigs { - c.Logs.Global.ClientConfigs[i].ExternalLabels.LabelSet = logsExternalLabels.Merge(cc.ExternalLabels.LabelSet) - } -} - -func evaluateTemplate(config string, templateVariables map[string]any) (string, error) { - tpl, err := template.New("config").Parse(config) - if err != nil { - return "", err - } - - var buf bytes.Buffer - err = tpl.Execute(&buf, templateVariables) - if err != nil { - return "", err - } - - return buf.String(), nil -} diff --git a/internal/static/config/agentmanagement_test.go b/internal/static/config/agentmanagement_test.go index 2cae67cfa6..28da655d27 100644 --- a/internal/static/config/agentmanagement_test.go +++ b/internal/static/config/agentmanagement_test.go @@ -1,57 +1,14 @@ package config import ( - "crypto/sha256" - "encoding/hex" - "errors" - "flag" "testing" "time" - "github.com/grafana/agent/internal/static/config/features" - "github.com/grafana/agent/internal/static/server" - "github.com/grafana/agent/internal/util" "github.com/prometheus/common/config" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" ) -// testRemoteConfigProvider is an implementation of remoteConfigProvider that can be -// used for testing. It allows setting the values to return for both fetching the -// remote config bytes & errors as well as the cached config & errors. -type testRemoteConfigProvider struct { - InitialConfig *AgentManagementConfig - - fetchedConfigBytesToReturn []byte - fetchedConfigErrorToReturn error - fetchRemoteConfigCallCount int - - cachedConfigToReturn []byte - cachedConfigErrorToReturn error - getCachedConfigCallCount int - didCacheRemoteConfig bool -} - -func (t *testRemoteConfigProvider) GetCachedRemoteConfig() ([]byte, error) { - t.getCachedConfigCallCount += 1 - return t.cachedConfigToReturn, t.cachedConfigErrorToReturn -} - -func (t *testRemoteConfigProvider) FetchRemoteConfig() ([]byte, error) { - t.fetchRemoteConfigCallCount += 1 - return t.fetchedConfigBytesToReturn, t.fetchedConfigErrorToReturn -} - -func (t *testRemoteConfigProvider) CacheRemoteConfig(r []byte) error { - t.didCacheRemoteConfig = true - return nil -} - -func (t *testRemoteConfigProvider) GetPollingInterval() time.Duration { - return t.InitialConfig.PollingInterval -} - var validAgentManagementConfig = AgentManagementConfig{ Enabled: true, Host: "localhost:1234", @@ -71,8 +28,6 @@ var validAgentManagementConfig = AgentManagementConfig{ }, } -var cachedConfig = []byte(`{"base_config":"","snippets":[]}`) - func TestUnmarshalDefault(t *testing.T) { cfg := `host: "localhost:1234" protocol: "https" @@ -162,418 +117,3 @@ func TestValidateLabelManagement(t *testing.T) { cfg.RemoteConfiguration.AgentID = "test_agent_id" assert.NoError(t, cfg.Validate()) } - -func TestSleepTime(t *testing.T) { - cfg := ` -api_url: "http://localhost" -basic_auth: - username: "initial_user" -protocol: "http" -polling_interval: "1m" -remote_configuration: - namespace: "new_namespace" - cache_location: "/etc"` - - var am AgentManagementConfig - yaml.Unmarshal([]byte(cfg), &am) - assert.Equal(t, time.Minute, am.SleepTime()) -} - -func TestFuzzJitterTime(t *testing.T) { - am := validAgentManagementConfig - pollingInterval := 2 * time.Minute - am.PollingInterval = pollingInterval - - zero := time.Duration(0) - - for i := 0; i < 10_000; i++ { - j := am.JitterTime() - assert.GreaterOrEqual(t, j, zero) - assert.Less(t, j, pollingInterval) - } -} - -func TestFullUrl(t *testing.T) { - c := validAgentManagementConfig - actual, err := c.fullUrl() - assert.NoError(t, err) - assert.Equal(t, "https://localhost:1234/agent-management/api/agent/v2/namespace/test_namespace/remote_config?a=A&b=B", actual) -} - -func TestRemoteConfigHashCheck(t *testing.T) { - // not a truly valid Agent Management config, but used for testing against - // precomputed sha256 hash - ic := AgentManagementConfig{ - Protocol: "http", - } - marshalled, err := yaml.Marshal(ic) - require.NoError(t, err) - icHashBytes := sha256.Sum256(marshalled) - icHash := hex.EncodeToString(icHashBytes[:]) - - rcCache := remoteConfigCache{ - InitialConfigHash: icHash, - Config: "server:\\n log_level: debug", - } - - require.NoError(t, initialConfigHashCheck(ic, rcCache)) - rcCache.InitialConfigHash = "abc" - require.Error(t, initialConfigHashCheck(ic, rcCache)) - - differentIc := validAgentManagementConfig - require.Error(t, initialConfigHashCheck(differentIc, rcCache)) -} - -func TestNewRemoteConfigProvider_ValidInitialConfig(t *testing.T) { - invalidAgentManagementConfig := &AgentManagementConfig{ - Enabled: true, - Host: "localhost:1234", - HTTPClientConfig: config.HTTPClientConfig{ - BasicAuth: &config.BasicAuth{ - Username: "test", - PasswordFile: "/test/path", - }, - }, - Protocol: "https", - PollingInterval: time.Minute, - RemoteConfiguration: RemoteConfiguration{ - Labels: labelMap{"b": "B", "a": "A"}, - Namespace: "test_namespace", - CacheLocation: "/test/path/", - }, - } - - cfg := Config{ - AgentManagement: *invalidAgentManagementConfig, - } - _, err := newRemoteConfigProvider(&cfg) - assert.NoError(t, err) -} - -func TestNewRemoteConfigProvider_InvalidProtocol(t *testing.T) { - invalidAgentManagementConfig := &AgentManagementConfig{ - Enabled: true, - Host: "localhost:1234", - HTTPClientConfig: config.HTTPClientConfig{ - BasicAuth: &config.BasicAuth{ - Username: "test", - PasswordFile: "/test/path", - }, - }, - Protocol: "ws", - PollingInterval: time.Minute, - RemoteConfiguration: RemoteConfiguration{ - Labels: labelMap{"b": "B", "a": "A"}, - Namespace: "test_namespace", - CacheLocation: "/test/path/", - }, - } - - cfg := Config{ - AgentManagement: *invalidAgentManagementConfig, - } - _, err := newRemoteConfigProvider(&cfg) - assert.Error(t, err) -} - -func TestNewRemoteConfigHTTPProvider_InvalidInitialConfig(t *testing.T) { - // this is invalid because it is missing the password file - invalidAgentManagementConfig := &AgentManagementConfig{ - Enabled: true, - Host: "localhost:1234", - HTTPClientConfig: config.HTTPClientConfig{ - BasicAuth: &config.BasicAuth{ - Username: "test", - }, - }, - Protocol: "https", - PollingInterval: time.Minute, - RemoteConfiguration: RemoteConfiguration{ - Labels: labelMap{"b": "B", "a": "A"}, - Namespace: "test_namespace", - CacheLocation: "/test/path/", - }, - } - - cfg := Config{ - AgentManagement: *invalidAgentManagementConfig, - } - _, err := newRemoteConfigHTTPProvider(&cfg) - assert.Error(t, err) -} - -func TestGetRemoteConfig_UnmarshallableRemoteConfig(t *testing.T) { - defaultCfg := DefaultConfig() - brokenCfg := `completely invalid config (maybe it got corrupted, maybe it was somehow set this way)` - - invalidCfgBytes := []byte(brokenCfg) - - am := validAgentManagementConfig - logger := server.NewLogger(defaultCfg.Server) - testProvider := testRemoteConfigProvider{InitialConfig: &am} - testProvider.fetchedConfigBytesToReturn = invalidCfgBytes - testProvider.cachedConfigToReturn = cachedConfig - - // flagset is required because some default values are extracted from it. - // In addition, some flags are defined as dependencies for validation - fs := flag.NewFlagSet("test", flag.ExitOnError) - features.Register(fs, allFeatures) - defaultCfg.RegisterFlags(fs) - - cfg, err := getRemoteConfig(true, &testProvider, logger, fs, false) - assert.NoError(t, err) - assert.False(t, testProvider.didCacheRemoteConfig) - - // check that the returned config is the cached one - // Note: Validate is required for the comparison as it mutates the config - expected := defaultCfg - expected.Validate(fs) - assert.True(t, util.CompareYAML(*cfg, expected)) -} - -func TestGetRemoteConfig_RemoteFetchFails(t *testing.T) { - defaultCfg := DefaultConfig() - - am := validAgentManagementConfig - logger := server.NewLogger(defaultCfg.Server) - testProvider := testRemoteConfigProvider{InitialConfig: &am} - testProvider.fetchedConfigErrorToReturn = errors.New("connection refused") - testProvider.cachedConfigToReturn = cachedConfig - - // flagset is required because some default values are extracted from it. - // In addition, some flags are defined as dependencies for validation - fs := flag.NewFlagSet("test", flag.ExitOnError) - features.Register(fs, allFeatures) - defaultCfg.RegisterFlags(fs) - - cfg, err := getRemoteConfig(true, &testProvider, logger, fs, false) - assert.NoError(t, err) - assert.False(t, testProvider.didCacheRemoteConfig) - - // check that the returned config is the cached one - // Note: Validate is required for the comparison as it mutates the config - expected := defaultCfg - expected.Validate(fs) - assert.True(t, util.CompareYAML(*cfg, expected)) -} - -func TestGetRemoteConfig_SemanticallyInvalidBaseConfig(t *testing.T) { - defaultCfg := DefaultConfig() - - // this is semantically invalid because it has two scrape_configs with - // the same job_name - invalidConfig := ` -{ - "base_config": "metrics:\n configs:\n - name: Metrics Snippets\n scrape_configs:\n - job_name: 'prometheus'\n scrape_interval: 15s\n static_configs:\n - targets: ['localhost:12345']\n - job_name: 'prometheus'\n scrape_interval: 15s\n static_configs:\n - targets: ['localhost:12345']\n", - "snippets": [] -}` - invalidCfgBytes := []byte(invalidConfig) - - am := validAgentManagementConfig - logger := server.NewLogger(defaultCfg.Server) - testProvider := testRemoteConfigProvider{InitialConfig: &am} - testProvider.fetchedConfigBytesToReturn = invalidCfgBytes - testProvider.cachedConfigToReturn = cachedConfig - - // flagset is required because some default values are extracted from it. - // In addition, some flags are defined as dependencies for validation - fs := flag.NewFlagSet("test", flag.ExitOnError) - features.Register(fs, allFeatures) - defaultCfg.RegisterFlags(fs) - - cfg, err := getRemoteConfig(true, &testProvider, logger, fs, false) - assert.NoError(t, err) - assert.False(t, testProvider.didCacheRemoteConfig) - - // check that the returned config is the cached one - // Note: Validate is required for the comparison as it mutates the config - expected := defaultCfg - expected.Validate(fs) - assert.True(t, util.CompareYAML(*cfg, expected)) -} - -func TestGetRemoteConfig_InvalidSnippet(t *testing.T) { - defaultCfg := DefaultConfig() - - // this is semantically invalid because it has two scrape_configs with - // the same job_name - invalidConfig := ` -{ - "base_config": "server:\n log_level: info\n log_format: logfmt\n", - "snippets": [ - { - "config": "metrics_scrape_configs:\n- job_name: 'prometheus'\n- job_name: 'prometheus'\n" - } - ] -}` - invalidCfgBytes := []byte(invalidConfig) - - am := validAgentManagementConfig - logger := server.NewLogger(defaultCfg.Server) - testProvider := testRemoteConfigProvider{InitialConfig: &am} - testProvider.fetchedConfigBytesToReturn = invalidCfgBytes - testProvider.cachedConfigToReturn = cachedConfig - - // flagset is required because some default values are extracted from it. - // In addition, some flags are defined as dependencies for validation - fs := flag.NewFlagSet("test", flag.ExitOnError) - features.Register(fs, allFeatures) - defaultCfg.RegisterFlags(fs) - - cfg, err := getRemoteConfig(true, &testProvider, logger, fs, false) - assert.NoError(t, err) - assert.False(t, testProvider.didCacheRemoteConfig) - - // check that the returned config is the cached one - // Note: Validate is required for the comparison as it mutates the config - expected := defaultCfg - expected.Validate(fs) - assert.True(t, util.CompareYAML(*cfg, expected)) -} - -func TestGetRemoteConfig_EmptyBaseConfig(t *testing.T) { - defaultCfg := DefaultConfig() - - validConfig := ` -{ - "base_config": "", - "snippets": [] -}` - cfgBytes := []byte(validConfig) - am := validAgentManagementConfig - logger := server.NewLogger(defaultCfg.Server) - testProvider := testRemoteConfigProvider{InitialConfig: &am} - testProvider.fetchedConfigBytesToReturn = cfgBytes - testProvider.cachedConfigToReturn = cachedConfig - - fs := flag.NewFlagSet("test", flag.ExitOnError) - features.Register(fs, allFeatures) - defaultCfg.RegisterFlags(fs) - - cfg, err := getRemoteConfig(true, &testProvider, logger, fs, false) - assert.NoError(t, err) - assert.True(t, testProvider.didCacheRemoteConfig) - - // check that the returned config is not the cached one - assert.NotEqual(t, "debug", cfg.Server.LogLevel.String()) -} - -func TestGetRemoteConfig_ValidBaseConfig(t *testing.T) { - defaultCfg := DefaultConfig() - validConfig := ` -{ - "base_config": "server:\n log_level: debug\n log_format: logfmt\nlogs:\n positions_directory: /tmp\n global:\n clients:\n - basic_auth:\n password_file: key.txt\n username: 278220\n url: https://logs-prod-eu-west-0.grafana.net/loki/api/v1/push\nintegrations:\n agent:\n enabled: false\n", - "snippets": [ - { - "config": "metrics_scrape_configs:\n- job_name: 'prometheus'\n scrape_interval: 15s\n static_configs:\n - targets: ['localhost:12345']\nlogs_scrape_configs:\n- job_name: yologs\n static_configs:\n - targets: [localhost]\n labels:\n job: yologs\n __path__: /tmp/yo.log\n", - "selector": { - "hostname": "machine-1", - "team": "team-a" - } - } - ] -}` - cfgBytes := []byte(validConfig) - am := validAgentManagementConfig - logger := server.NewLogger(defaultCfg.Server) - testProvider := testRemoteConfigProvider{InitialConfig: &am} - testProvider.fetchedConfigBytesToReturn = cfgBytes - testProvider.cachedConfigToReturn = cachedConfig - - fs := flag.NewFlagSet("test", flag.ExitOnError) - features.Register(fs, allFeatures) - defaultCfg.RegisterFlags(fs) - - cfg, err := getRemoteConfig(true, &testProvider, logger, fs, false) - assert.NoError(t, err) - assert.True(t, testProvider.didCacheRemoteConfig) - - // check that the returned config is not the cached one - assert.False(t, util.CompareYAML(*cfg, defaultCfg)) - - // check some fields to make sure the config was parsed correctly - assert.Equal(t, "debug", cfg.Server.LogLevel.String()) - assert.Equal(t, "278220", cfg.Logs.Global.ClientConfigs[0].Client.BasicAuth.Username) - assert.Equal(t, "prometheus", cfg.Metrics.Configs[0].ScrapeConfigs[0].JobName) - assert.Equal(t, "yologs", cfg.Logs.Configs[0].ScrapeConfig[0].JobName) - assert.Equal(t, 1, len(cfg.Integrations.ConfigV1.Integrations)) -} - -func TestGetRemoteConfig_ExpandsEnvVars(t *testing.T) { - defaultCfg := DefaultConfig() - validConfig := ` -{ - "base_config": "server:\n log_level: info\n log_format: ${LOG_FORMAT}\nlogs:\n positions_directory: /tmp\n global:\n clients:\n - basic_auth:\n password_file: key.txt\n username: 278220\n url: https://logs-prod-eu-west-0.grafana.net/loki/api/v1/push\nintegrations:\n agent:\n enabled: false\n", - "snippets": [ - { - "config": "metrics_scrape_configs:\n- job_name: 'prometheus'\n scrape_interval: ${SCRAPE_INTERVAL}\n static_configs:\n - targets: ['localhost:12345']\n", - "selector": { - "hostname": "machine-1", - "team": "team-a" - } - } - ] -}` - t.Setenv("SCRAPE_INTERVAL", "15s") - t.Setenv("LOG_FORMAT", "json") - - cfgBytes := []byte(validConfig) - am := validAgentManagementConfig - logger := server.NewLogger(defaultCfg.Server) - testProvider := testRemoteConfigProvider{InitialConfig: &am} - testProvider.fetchedConfigBytesToReturn = cfgBytes - testProvider.cachedConfigToReturn = cachedConfig - - fs := flag.NewFlagSet("test", flag.ExitOnError) - var configExpandEnv bool - fs.BoolVar(&configExpandEnv, "config.expand-env", false, "") - features.Register(fs, allFeatures) - defaultCfg.RegisterFlags(fs) - - cfg, err := getRemoteConfig(true, &testProvider, logger, fs, false) - assert.NoError(t, err) - assert.Equal(t, "15s", cfg.Metrics.Configs[0].ScrapeConfigs[0].ScrapeInterval.String()) - assert.Equal(t, "json", cfg.Server.LogFormat) -} - -func TestGetCachedConfig_DefaultConfigFallback(t *testing.T) { - defaultCfg := DefaultConfig() - am := validAgentManagementConfig - logger := server.NewLogger(defaultCfg.Server) - testProvider := testRemoteConfigProvider{InitialConfig: &am} - testProvider.cachedConfigErrorToReturn = errors.New("no cached config") - - fs := flag.NewFlagSet("test", flag.ExitOnError) - features.Register(fs, allFeatures) - defaultCfg.RegisterFlags(fs) - - cfg, err := getCachedRemoteConfig(true, &testProvider, fs, logger) - assert.NoError(t, err) - - // check that the returned config is the default one - assert.True(t, util.CompareYAML(*cfg, defaultCfg)) -} - -func TestGetCachedConfig_RetryAfter(t *testing.T) { - defaultCfg := DefaultConfig() - am := validAgentManagementConfig - logger := server.NewLogger(defaultCfg.Server) - testProvider := testRemoteConfigProvider{InitialConfig: &am} - testProvider.fetchedConfigErrorToReturn = retryAfterError{retryAfter: time.Duration(0)} - testProvider.cachedConfigToReturn = cachedConfig - - fs := flag.NewFlagSet("test", flag.ExitOnError) - features.Register(fs, allFeatures) - defaultCfg.RegisterFlags(fs) - - _, err := getRemoteConfig(true, &testProvider, logger, fs, true) - assert.NoError(t, err) - assert.False(t, testProvider.didCacheRemoteConfig) - - // check that FetchRemoteConfig was called only once on the TestProvider - assert.Equal(t, 1, testProvider.fetchRemoteConfigCallCount) - - // the cached config should have been retrieved once, on the second - // attempt to fetch the remote config - assert.Equal(t, 1, testProvider.getCachedConfigCallCount) -} diff --git a/internal/static/config/config.go b/internal/static/config/config.go index d6f732c2af..2ec8f4f590 100644 --- a/internal/static/config/config.go +++ b/internal/static/config/config.go @@ -10,18 +10,14 @@ import ( "unicode" "github.com/drone/envsubst/v2" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/grafana/agent/internal/build" "github.com/grafana/agent/internal/static/config/encoder" "github.com/grafana/agent/internal/static/config/features" - "github.com/grafana/agent/internal/static/config/instrumentation" "github.com/grafana/agent/internal/static/logs" "github.com/grafana/agent/internal/static/metrics" "github.com/grafana/agent/internal/static/server" "github.com/grafana/agent/internal/static/traces" "github.com/grafana/agent/internal/util" - "github.com/prometheus/common/config" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" ) @@ -166,14 +162,6 @@ func (c Config) MarshalYAML() (interface{}, error) { return m, nil } -// LogDeprecations will log use of any deprecated fields to l as warn-level -// messages. -func (c *Config) LogDeprecations(l log.Logger) { - for _, d := range c.Deprecations { - level.Warn(l).Log("msg", fmt.Sprintf("DEPRECATION NOTICE: %s", d)) - } -} - // Validate validates the config, flags, and sets default values. func (c *Config) Validate(fs *flag.FlagSet) error { if c.Server == nil { @@ -243,101 +231,6 @@ func (c *Config) RegisterFlags(f *flag.FlagSet) { f.BoolVar(&c.EnableConfigEndpoints, "config.enable-read-api", false, "Enables the /-/config and /agent/api/v1/configs/{name} APIs. Be aware that secrets could be exposed by enabling these endpoints!") } -// LoadFile reads a file and passes the contents to Load -func LoadFile(filename string, expandEnvVars bool, c *Config) error { - buf, err := os.ReadFile(filename) - if err != nil { - return fmt.Errorf("error reading config file %w", err) - } - instrumentation.InstrumentConfig(buf) - return LoadBytes(buf, expandEnvVars, c) -} - -// loadFromAgentManagementAPI loads and merges a config from an Agent Management API. -// 1. Read local initial config. -// 2. Get the remote config. -// a) Fetch from remote. If this fails or is invalid: -// b) Read the remote config from cache. If this fails, return an error. -// 4. Merge the initial and remote config into c. -func loadFromAgentManagementAPI(path string, expandEnvVars bool, c *Config, log *server.Logger, fs *flag.FlagSet) error { - // Load the initial config from disk without instrumenting the config hash - buf, err := os.ReadFile(path) - if err != nil { - return fmt.Errorf("error reading initial config file %w", err) - } - - err = LoadBytes(buf, expandEnvVars, c) - if err != nil { - return fmt.Errorf("failed to load initial config: %w", err) - } - - configProvider, err := newRemoteConfigProvider(c) - if err != nil { - return err - } - remoteConfig, err := getRemoteConfig(expandEnvVars, configProvider, log, fs, true) - if err != nil { - return err - } - mergeEffectiveConfig(c, remoteConfig) - - effectiveConfigBytes, err := yaml.Marshal(c) - if err != nil { - level.Warn(log).Log("msg", "error marshalling config for instrumenting config version", "err", err) - } else { - instrumentation.InstrumentConfig(effectiveConfigBytes) - } - - return nil -} - -// mergeEffectiveConfig overwrites any values in initialConfig with those in remoteConfig -func mergeEffectiveConfig(initialConfig *Config, remoteConfig *Config) { - initialConfig.Server = remoteConfig.Server - initialConfig.Metrics = remoteConfig.Metrics - initialConfig.Integrations = remoteConfig.Integrations - initialConfig.Traces = remoteConfig.Traces - initialConfig.Logs = remoteConfig.Logs -} - -// LoadRemote reads a config from url -func LoadRemote(url string, expandEnvVars bool, c *Config) error { - remoteOpts := &remoteOpts{} - if c.BasicAuthUser != "" && c.BasicAuthPassFile != "" { - remoteOpts.HTTPClientConfig = &config.HTTPClientConfig{ - BasicAuth: &config.BasicAuth{ - Username: c.BasicAuthUser, - PasswordFile: c.BasicAuthPassFile, - }, - } - } - - if remoteOpts.HTTPClientConfig != nil { - dir, err := os.Getwd() - if err != nil { - return fmt.Errorf("failed to get current working directory: %w", err) - } - remoteOpts.HTTPClientConfig.SetDirectory(dir) - } - - rc, err := newRemoteProvider(url, remoteOpts) - if err != nil { - return fmt.Errorf("error reading remote config: %w", err) - } - // fall back to file if no scheme is passed - if rc == nil { - return LoadFile(url, expandEnvVars, c) - } - bb, _, err := rc.retrieve() - if err != nil { - return fmt.Errorf("error retrieving remote config: %w", err) - } - - instrumentation.InstrumentConfig(bb) - - return LoadBytes(bb, expandEnvVars, c) -} - func performEnvVarExpansion(buf []byte, expandEnvVars bool) ([]byte, error) { utf8Buf, err := encoder.EnsureUTF8(buf, false) if err != nil { @@ -385,29 +278,6 @@ func getenv(name string) string { return os.Getenv(name) } -// Load loads a config file from a flagset. Flags will be registered -// to the flagset before parsing them with the values specified by -// args. -func Load(fs *flag.FlagSet, args []string, log *server.Logger) (*Config, error) { - cfg, error := LoadFromFunc(fs, args, func(path, fileType string, expandEnvVars bool, c *Config) error { - switch fileType { - case fileTypeYAML: - if features.Enabled(fs, featRemoteConfigs) { - return LoadRemote(path, expandEnvVars, c) - } - if features.Enabled(fs, featAgentManagement) { - return loadFromAgentManagementAPI(path, expandEnvVars, c, log, fs) - } - return LoadFile(path, expandEnvVars, c) - default: - return fmt.Errorf("unknown file type %q. accepted values: %s", fileType, strings.Join(fileTypes, ", ")) - } - }) - - instrumentation.InstrumentLoad(error == nil) - return cfg, error -} - type loaderFunc func(path string, fileType string, expandEnvVars bool, target *Config) error func applyIntegrationValuesFromFlagset(fs *flag.FlagSet, args []string, path string, cfg *Config) error { diff --git a/internal/static/config/config_test.go b/internal/static/config/config_test.go index 8b7e7aef72..364f2f2513 100644 --- a/internal/static/config/config_test.go +++ b/internal/static/config/config_test.go @@ -13,13 +13,11 @@ import ( "github.com/grafana/agent/internal/static/config/encoder" "github.com/grafana/agent/internal/static/metrics" "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/static/server" "github.com/grafana/agent/internal/util" commonCfg "github.com/prometheus/common/config" "github.com/prometheus/common/model" promCfg "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/labels" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" ) @@ -457,71 +455,18 @@ func TestAgent_OmitEmptyFields(t *testing.T) { require.Equal(t, "{}\n", string(yml)) } -func TestAgentManagement_MergeEffectiveConfig(t *testing.T) { - initialCfg := ` -server: - log_level: info -logs: - positions_directory: /tmp -agent_management: - host: "localhost" - basic_auth: - username: "initial_user" - protocol: "http" - polling_interval: "1m" - remote_configuration: - namespace: "new_namespace" - cache_location: "/etc"` - - remoteCfg := ` -server: - log_level: debug -metrics: - wal_directory: /tmp - global: - scrape_interval: 5m -integrations: - scrape_integrations: true - -agent_management: - host: "localhost:80" - basic_auth: - username: "new_user" - protocol: "http" - polling_interval: "10s" - remote_configuration: - namespace: "new_namespace" - cache_location: "/etc"` - - var ic, rc Config - err := LoadBytes([]byte(initialCfg), false, &ic) - assert.NoError(t, err) - err = LoadBytes([]byte(remoteCfg), false, &rc) - assert.NoError(t, err) - - // keep a copy of the initial config's agent management block to ensure it isn't - // overwritten by the remote config's - initialAgentManagement := ic.AgentManagement - mergeEffectiveConfig(&ic, &rc) - - // agent_management configuration should not be overwritten by the remote config - assert.Equal(t, initialAgentManagement, ic.AgentManagement) - - // since these elements are purposefully different for the previous portion of the test, - // unset them before comparing the rest of the config - ic.AgentManagement = AgentManagementConfig{} - rc.AgentManagement = AgentManagementConfig{} - - assert.True(t, util.CompareYAML(ic, rc)) -} - func TestConfig_EmptyServerConfigFails(t *testing.T) { // Since we are testing defaults via config.Load, we need a file instead of a string. // This test file has an empty server stanza, we expect default values out. - defaultServerCfg := server.DefaultConfig() - logger := server.NewLogger(&defaultServerCfg) fs := flag.NewFlagSet("", flag.ExitOnError) - _, err := Load(fs, []string{"--config.file", "./testdata/server_empty.yml"}, logger) + + _, err := LoadFromFunc(fs, []string{"--config.file", "./testdata/server_empty.yml"}, func(path, fileType string, expandEnvVars bool, target *Config) error { + bb, err := os.ReadFile(path) + if err != nil { + return err + } + return LoadBytes(bb, expandEnvVars, target) + }) require.Error(t, err) } diff --git a/internal/static/config/integrations.go b/internal/static/config/integrations.go index c4ebb70ec3..f0d2652e6d 100644 --- a/internal/static/config/integrations.go +++ b/internal/static/config/integrations.go @@ -4,15 +4,11 @@ import ( "fmt" "reflect" - "github.com/go-kit/log" - "github.com/gorilla/mux" v1 "github.com/grafana/agent/internal/static/integrations" v2 "github.com/grafana/agent/internal/static/integrations/v2" "github.com/grafana/agent/internal/static/metrics" "github.com/grafana/agent/internal/static/server" "github.com/grafana/agent/internal/util" - "github.com/prometheus/statsd_exporter/pkg/level" - "golang.org/x/exp/maps" "gopkg.in/yaml.v2" ) @@ -129,64 +125,3 @@ func (c *VersionedIntegrations) setVersion(v IntegrationsVersion) error { panic(fmt.Sprintf("unknown integrations version %d", c.Version)) } } - -// EnabledIntegrations returns a slice of enabled integrations -func (c *VersionedIntegrations) EnabledIntegrations() []string { - integrations := map[string]struct{}{} - if c.ConfigV1 != nil { - for _, integration := range c.ConfigV1.Integrations { - integrations[integration.Name()] = struct{}{} - } - } - if c.ConfigV2 != nil { - for _, integration := range c.ConfigV2.Configs { - integrations[integration.Name()] = struct{}{} - } - } - return maps.Keys(integrations) -} - -// IntegrationsGlobals is a global struct shared across integrations. -type IntegrationsGlobals = v2.Globals - -// Integrations is an abstraction over both the v1 and v2 systems. -type Integrations interface { - ApplyConfig(*VersionedIntegrations, IntegrationsGlobals) error - WireAPI(*mux.Router) - Stop() -} - -// NewIntegrations creates a new subsystem. globals should be provided regardless -// of useV2. globals.SubsystemOptions will be automatically set if cfg.Version -// is set to IntegrationsVersion2. -func NewIntegrations(logger log.Logger, cfg *VersionedIntegrations, globals IntegrationsGlobals) (Integrations, error) { - if cfg.Version != IntegrationsVersion2 { - instance, err := v1.NewManager(*cfg.ConfigV1, logger, globals.Metrics.InstanceManager(), globals.Metrics.Validate) - if err != nil { - return nil, err - } - return &v1Integrations{Manager: instance}, nil - } - - level.Warn(logger).Log("msg", "integrations-next is enabled. integrations-next is subject to change") - - globals.SubsystemOpts = *cfg.ConfigV2 - instance, err := v2.NewSubsystem(logger, globals) - if err != nil { - return nil, err - } - return &v2Integrations{Subsystem: instance}, nil -} - -type v1Integrations struct{ *v1.Manager } - -func (s *v1Integrations) ApplyConfig(cfg *VersionedIntegrations, _ IntegrationsGlobals) error { - return s.Manager.ApplyConfig(*cfg.ConfigV1) -} - -type v2Integrations struct{ *v2.Subsystem } - -func (s *v2Integrations) ApplyConfig(cfg *VersionedIntegrations, globals IntegrationsGlobals) error { - globals.SubsystemOpts = *cfg.ConfigV2 - return s.Subsystem.ApplyConfig(globals) -} diff --git a/internal/static/config/integrations_test.go b/internal/static/config/integrations_test.go index b0854f3219..d3537997e0 100644 --- a/internal/static/config/integrations_test.go +++ b/internal/static/config/integrations_test.go @@ -2,7 +2,6 @@ package config import ( "flag" - "sort" "testing" "github.com/stretchr/testify/require" @@ -46,74 +45,6 @@ integrations: require.NotNil(t, c.Integrations.ConfigV2) } -func TestEnabledIntegrations_v1(t *testing.T) { - cfg := ` -metrics: - wal_directory: /tmp/wal - -integrations: - agent: - enabled: true - node_exporter: - enabled: true` - - fs := flag.NewFlagSet("test", flag.ExitOnError) - c, err := LoadFromFunc(fs, []string{"-config.file", "test"}, func(_, _ string, _ bool, c *Config) error { - return LoadBytes([]byte(cfg), false, c) - }) - require.NoError(t, err) - - actual := c.Integrations.EnabledIntegrations() - sort.Strings(actual) - expected := []string{"agent", "node_exporter"} - sort.Strings(expected) - require.Equal(t, actual, expected) -} - -func TestEnabledIntegrations_v2(t *testing.T) { - cfg := ` -metrics: - wal_directory: /tmp/wal - -integrations: - agent: - autoscrape: - enable: false - node_exporter: - autoscrape: - enable: false` - - fs := flag.NewFlagSet("test", flag.ExitOnError) - c, err := LoadFromFunc(fs, []string{"-config.file", "test", "-enable-features=integrations-next"}, func(_, _ string, _ bool, c *Config) error { - return LoadBytes([]byte(cfg), false, c) - }) - require.NoError(t, err) - - actual := c.Integrations.EnabledIntegrations() - sort.Strings(actual) - expected := []string{"agent", "node_exporter"} - sort.Strings(expected) - require.Equal(t, actual, expected) -} - -func TestEnabledIntegrations_v2MultipleInstances(t *testing.T) { - cfg := ` -metrics: - wal_directory: /tmp/wal - -integrations: - redis_configs: - - redis_addr: "redis-0:6379" - - redis_addr: "redis-1:6379"` - - fs := flag.NewFlagSet("test", flag.ExitOnError) - c, err := LoadFromFunc(fs, []string{"-config.file", "test", "-enable-features=integrations-next"}, func(_, _ string, _ bool, c *Config) error { - return LoadBytes([]byte(cfg), false, c) - }) - require.NoError(t, err) - require.Equal(t, c.Integrations.EnabledIntegrations(), []string{"redis"}) -} - func TestSetVersionDoesNotOverrideExistingV1Integrations(t *testing.T) { cfg := ` integrations: diff --git a/internal/static/config/remote_config.go b/internal/static/config/remote_config.go deleted file mode 100644 index 6b23c89ea1..0000000000 --- a/internal/static/config/remote_config.go +++ /dev/null @@ -1,145 +0,0 @@ -package config - -import ( - "fmt" - "io" - "net/http" - "net/url" - "time" - - "github.com/grafana/agent/internal/static/config/instrumentation" - "github.com/prometheus/common/config" -) - -// supported remote config provider schemes -const ( - httpScheme = "http" - httpsScheme = "https" -) - -// remoteOpts struct contains agent remote config options -type remoteOpts struct { - url *url.URL - HTTPClientConfig *config.HTTPClientConfig - headers map[string]string -} - -// remoteProvider interface should be implemented by config providers -type remoteProvider interface { - retrieve() ([]byte, http.Header, error) -} - -// newRemoteProvider constructs a new remote configuration provider. The rawURL is parsed -// and a provider is constructed based on the URL's scheme. -func newRemoteProvider(rawURL string, opts *remoteOpts) (remoteProvider, error) { - u, err := url.Parse(rawURL) - if err != nil { - return nil, fmt.Errorf("error parsing rawURL %s: %w", rawURL, err) - } - if opts == nil { - // Default provider opts - opts = &remoteOpts{} - } - opts.url = u - - switch u.Scheme { - case "": - // if no scheme, assume local file path, return nil and let caller handle. - return nil, nil - case httpScheme, httpsScheme: - httpP, err := newHTTPProvider(opts) - if err != nil { - return nil, fmt.Errorf("error constructing httpProvider: %w", err) - } - return httpP, nil - default: - return nil, fmt.Errorf("remote config scheme not supported: %s", u.Scheme) - } -} - -// Remote Config Providers -// httpProvider - http/https provider -type httpProvider struct { - myURL *url.URL - headers map[string]string - httpClient *http.Client -} - -// newHTTPProvider constructs a new httpProvider -func newHTTPProvider(opts *remoteOpts) (*httpProvider, error) { - httpClientConfig := config.HTTPClientConfig{} - if opts.HTTPClientConfig != nil { - err := opts.HTTPClientConfig.Validate() - if err != nil { - return nil, err - } - httpClientConfig = *opts.HTTPClientConfig - } - httpClient, err := config.NewClientFromConfig(httpClientConfig, "remote-config") - if err != nil { - return nil, err - } - return &httpProvider{ - myURL: opts.url, - httpClient: httpClient, - headers: opts.headers, - }, nil -} - -type retryAfterError struct { - retryAfter time.Duration -} - -func (r retryAfterError) Error() string { - return fmt.Sprintf("server indicated to retry after %s", r.retryAfter) -} - -type notModifiedError struct{} - -func (n notModifiedError) Error() string { - return "server indicated no changes" -} - -// retrieve implements remoteProvider and fetches the config -func (p httpProvider) retrieve() ([]byte, http.Header, error) { - req, err := http.NewRequest(http.MethodGet, p.myURL.String(), nil) - if err != nil { - return nil, nil, fmt.Errorf("error creating request: %w", err) - } - for header, headerVal := range p.headers { - req.Header.Set(header, headerVal) - } - response, err := p.httpClient.Do(req) - if err != nil { - instrumentation.InstrumentRemoteConfigFetchError() - return nil, nil, fmt.Errorf("request failed: %w", err) - } - defer response.Body.Close() - - instrumentation.InstrumentRemoteConfigFetch(response.StatusCode) - - if response.StatusCode == http.StatusTooManyRequests || response.StatusCode == http.StatusServiceUnavailable { - retryAfter := response.Header.Get("Retry-After") - if retryAfter == "" { - return nil, nil, fmt.Errorf("server indicated to retry, but no Retry-After header was provided") - } - retryAfterDuration, err := time.ParseDuration(retryAfter) - if err != nil { - return nil, nil, fmt.Errorf("server indicated to retry, but Retry-After header was not a valid duration: %w", err) - } - return nil, nil, retryAfterError{retryAfter: retryAfterDuration} - } - - if response.StatusCode == http.StatusNotModified { - return nil, nil, notModifiedError{} - } - - if response.StatusCode/100 != 2 { - return nil, nil, fmt.Errorf("error fetching config: status code: %d", response.StatusCode) - } - bb, err := io.ReadAll(response.Body) - if err != nil { - return nil, nil, err - } - return bb, response.Header, nil -} diff --git a/internal/static/config/remote_config_test.go b/internal/static/config/remote_config_test.go deleted file mode 100644 index f8b5b046ce..0000000000 --- a/internal/static/config/remote_config_test.go +++ /dev/null @@ -1,155 +0,0 @@ -package config - -import ( - "fmt" - "net/http" - "net/http/httptest" - "os" - "testing" - - "github.com/prometheus/common/config" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -const configPath = "/agent.yml" - -func TestRemoteConfigHTTP(t *testing.T) { - testCfg := ` -metrics: - global: - scrape_timeout: 33s -` - - svr := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if r.URL.Path == configPath { - _, _ = w.Write([]byte(testCfg)) - } - })) - - svrWithBasicAuth := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - user, pass, _ := r.BasicAuth() - if user != "foo" && pass != "bar" { - w.WriteHeader(http.StatusUnauthorized) - return - } - if r.URL.Path == configPath { - _, _ = w.Write([]byte(testCfg)) - } - })) - - svrWithHeaders := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if r.URL.Path == configPath { - w.Header().Add("X-Test-Header", "test") - w.Header().Add("X-Other-Header", "test2") - _, _ = w.Write([]byte(testCfg)) - } - })) - - tempDir := t.TempDir() - err := os.WriteFile(fmt.Sprintf("%s/password-file.txt", tempDir), []byte("bar"), 0644) - require.NoError(t, err) - - passwdFileCfg := &config.HTTPClientConfig{ - BasicAuth: &config.BasicAuth{ - Username: "foo", - PasswordFile: fmt.Sprintf("%s/password-file.txt", tempDir), - }, - } - dir, err := os.Getwd() - require.NoError(t, err) - passwdFileCfg.SetDirectory(dir) - - type args struct { - rawURL string - opts *remoteOpts - } - tests := []struct { - name string - args args - want []byte - wantErr bool - wantHeaders map[string][]string - }{ - { - name: "httpScheme config", - args: args{ - rawURL: fmt.Sprintf("%s/agent.yml", svr.URL), - }, - want: []byte(testCfg), - wantErr: false, - }, - { - name: "httpScheme config with basic auth", - args: args{ - rawURL: fmt.Sprintf("%s/agent.yml", svrWithBasicAuth.URL), - opts: &remoteOpts{ - HTTPClientConfig: &config.HTTPClientConfig{ - BasicAuth: &config.BasicAuth{ - Username: "foo", - Password: "bar", - }, - }, - }, - }, - want: []byte(testCfg), - wantErr: false, - }, - { - name: "httpScheme config with basic auth password file", - args: args{ - rawURL: fmt.Sprintf("%s/agent.yml", svrWithBasicAuth.URL), - opts: &remoteOpts{ - HTTPClientConfig: passwdFileCfg, - }, - }, - want: []byte(testCfg), - wantErr: false, - }, - { - name: "unsupported scheme throws error", - args: args{ - rawURL: "ssh://unsupported/scheme", - }, - want: nil, - wantErr: true, - }, - { - name: "invalid url throws error", - args: args{ - rawURL: "://invalid/url", - }, - want: nil, - wantErr: true, - }, - { - name: "response headers are returned", - args: args{ - rawURL: fmt.Sprintf("%s/agent.yml", svrWithHeaders.URL), - }, - want: []byte(testCfg), - wantErr: false, - wantHeaders: map[string][]string{ - "X-Test-Header": {"test"}, - "X-Other-Header": {"test2"}, - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - rc, err := newRemoteProvider(tt.args.rawURL, tt.args.opts) - if tt.wantErr { - assert.Error(t, err) - return - } - assert.NoError(t, err) - bb, header, err := rc.retrieve() - assert.NoError(t, err) - assert.Equal(t, string(tt.want), string(bb)) - for k, v := range tt.wantHeaders { - assert.Equal(t, v, header[k]) - } - }) - } -} diff --git a/internal/static/integrations/cadvisor/cadvisor_stub.go b/internal/static/integrations/cadvisor/cadvisor_stub.go index 80a038b85d..35bdd3455b 100644 --- a/internal/static/integrations/cadvisor/cadvisor_stub.go +++ b/internal/static/integrations/cadvisor/cadvisor_stub.go @@ -3,7 +3,11 @@ package cadvisor //nolint:golint import ( + "context" + "net/http" + "github.com/grafana/agent/internal/static/integrations" + "github.com/grafana/agent/internal/static/integrations/config" "github.com/go-kit/log" "github.com/go-kit/log/level" @@ -12,5 +16,24 @@ import ( // NewIntegration creates a new cadvisor integration func (c *Config) NewIntegration(logger log.Logger) (integrations.Integration, error) { level.Warn(logger).Log("msg", "the cadvisor integration only works on linux; enabling it on other platforms will do nothing") - return &integrations.StubIntegration{}, nil + return &stubIntegration{}, nil +} + +// stubIntegration implements a no-op integration for use on platforms not supported by an integration +type stubIntegration struct{} + +// MetricsHandler returns an http.NotFoundHandler to satisfy the Integration interface +func (i *stubIntegration) MetricsHandler() (http.Handler, error) { + return http.NotFoundHandler(), nil +} + +// ScrapeConfigs returns an empty list of scrape configs, since there is nothing to scrape +func (i *stubIntegration) ScrapeConfigs() []config.ScrapeConfig { + return []config.ScrapeConfig{} +} + +// Run just waits for the context to finish +func (i *stubIntegration) Run(ctx context.Context) error { + <-ctx.Done() + return ctx.Err() } diff --git a/internal/static/integrations/manager.go b/internal/static/integrations/manager.go index de22be37e8..59760b9c7f 100644 --- a/internal/static/integrations/manager.go +++ b/internal/static/integrations/manager.go @@ -1,39 +1,18 @@ package integrations import ( - "context" "fmt" - "net/http" - "path" - "strings" - "sync" "time" config_util "github.com/prometheus/common/config" - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/gorilla/mux" "github.com/grafana/agent/internal/static/metrics" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/static/metrics/instance/configstore" "github.com/grafana/agent/internal/static/server" - "github.com/grafana/agent/internal/util" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/common/model" promConfig "github.com/prometheus/prometheus/config" - "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/model/relabel" ) -var ( - integrationAbnormalExits = promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "agent_metrics_integration_abnormal_exits_total", - Help: "Total number of times an agent integration exited unexpectedly, causing it to be restarted.", - }, []string{"integration_name"}) -) - var CurrentManagerConfig ManagerConfig = DefaultManagerConfig() // DefaultManagerConfig holds the default settings for integrations. @@ -153,400 +132,3 @@ func (c *ManagerConfig) ApplyDefaults(sflags *server.Flags, mcfg *metrics.Config return nil } - -// Manager manages a set of integrations and runs them. -type Manager struct { - logger log.Logger - - cfgMut sync.RWMutex - cfg ManagerConfig - - hostname string - - ctx context.Context - cancel context.CancelFunc - wg sync.WaitGroup - - im instance.Manager - validator configstore.Validator - - integrationsMut sync.RWMutex - integrations map[string]*integrationProcess - - handlerMut sync.Mutex - handlerCache map[string]handlerCacheEntry -} - -// NewManager creates a new integrations manager. NewManager must be given an -// InstanceManager which is responsible for accepting instance configs to -// scrape and send metrics from running integrations. -func NewManager(cfg ManagerConfig, logger log.Logger, im instance.Manager, validate configstore.Validator) (*Manager, error) { - ctx, cancel := context.WithCancel(context.Background()) - - m := &Manager{ - logger: logger, - - ctx: ctx, - cancel: cancel, - - im: im, - validator: validate, - - integrations: make(map[string]*integrationProcess, len(cfg.Integrations)), - - handlerCache: make(map[string]handlerCacheEntry), - } - - var err error - m.hostname, err = instance.Hostname() - if err != nil { - return nil, err - } - - if err := m.ApplyConfig(cfg); err != nil { - return nil, fmt.Errorf("failed applying config: %w", err) - } - return m, nil -} - -// ApplyConfig updates the configuration of the integrations subsystem. -func (m *Manager) ApplyConfig(cfg ManagerConfig) error { - var failed bool - - m.cfgMut.Lock() - defer m.cfgMut.Unlock() - - m.integrationsMut.Lock() - defer m.integrationsMut.Unlock() - - // The global prometheus config settings don't get applied to integrations until later. This - // causes us to skip reload when those settings change. - if util.CompareYAML(m.cfg, cfg) && util.CompareYAML(m.cfg.PrometheusGlobalConfig, cfg.PrometheusGlobalConfig) { - level.Debug(m.logger).Log("msg", "Integrations config is unchanged skipping apply") - return nil - } - level.Debug(m.logger).Log("msg", "Applying integrations config changes") - - select { - case <-m.ctx.Done(): - return fmt.Errorf("Manager already stopped") - default: - // No-op - } - - // Iterate over our integrations. New or changed integrations will be - // started, with their existing counterparts being shut down. - for _, ic := range cfg.Integrations { - if !ic.Common.Enabled { - continue - } - // Key is used to identify the instance of this integration within the - // instance manager and within our set of running integrations. - key := integrationKey(ic.Name()) - - // Look for an existing integration with the same key. If it exists and - // is unchanged, we have nothing to do. Otherwise, we're going to recreate - // it with the new settings, so we'll need to stop it. - if p, exist := m.integrations[key]; exist { - if util.CompareYAMLWithHook(p.cfg, ic, noScrubbedSecretsHook) { - continue - } - p.stop() - delete(m.integrations, key) - } - - l := log.With(m.logger, "integration", ic.Name()) - i, err := ic.NewIntegration(l) - if err != nil { - level.Error(m.logger).Log("msg", "failed to initialize integration. it will not run or be scraped", "integration", ic.Name(), "err", err) - failed = true - - // If this integration was running before, its instance won't be cleaned - // up since it's now removed from the map. We need to clean it up here. - _ = m.im.DeleteConfig(key) - continue - } - - // Find what instance label should be used to represent this integration. - var instanceKey string - if kp := ic.Common.InstanceKey; kp != nil { - // Common config takes precedence. - instanceKey = strings.TrimSpace(*kp) - } else { - instanceKey, err = ic.InstanceKey(fmt.Sprintf("%s:%d", m.hostname, cfg.ListenPort)) - if err != nil { - level.Error(m.logger).Log("msg", "failed to get instance key for integration. it will not run or be scraped", "integration", ic.Name(), "err", err) - failed = true - - // If this integration was running before, its instance won't be cleaned - // up since it's now removed from the map. We need to clean it up here. - _ = m.im.DeleteConfig(key) - continue - } - } - - // Create, start, and register the new integration. - ctx, cancel := context.WithCancel(m.ctx) - p := &integrationProcess{ - log: m.logger, - cfg: ic, - i: i, - instanceKey: instanceKey, - - ctx: ctx, - stop: cancel, - - wg: &m.wg, - wait: m.instanceBackoff, - } - go p.Run() - m.integrations[key] = p - } - - // Delete instances and processed that have been removed in between calls to - // ApplyConfig. - for key, process := range m.integrations { - foundConfig := false - for _, ic := range cfg.Integrations { - if integrationKey(ic.Name()) == key { - // If this is disabled then we should delete from integrations - if !ic.Common.Enabled { - break - } - foundConfig = true - break - } - } - if foundConfig { - continue - } - - _ = m.im.DeleteConfig(key) - process.stop() - delete(m.integrations, key) - } - - // Re-apply configs to our instance manager for all running integrations. - // Generated scrape configs may change in between calls to ApplyConfig even - // if the configs for the integration didn't. - for key, p := range m.integrations { - shouldCollect := cfg.ScrapeIntegrations - if common := p.cfg.Common; common.ScrapeIntegration != nil { - shouldCollect = *common.ScrapeIntegration - } - - switch shouldCollect { - case true: - instanceConfig := m.instanceConfigForIntegration(p, cfg) - if err := m.validator(&instanceConfig); err != nil { - level.Error(p.log).Log("msg", "failed to validate generated scrape config for integration. integration will not be scraped", "err", err, "integration", p.cfg.Name()) - failed = true - break - } - - if err := m.im.ApplyConfig(instanceConfig); err != nil { - level.Error(p.log).Log("msg", "failed to apply integration. integration will not be scraped", "err", err, "integration", p.cfg.Name()) - failed = true - } - case false: - // If a previous instance of the config was being scraped, we need to - // delete it here. Calling DeleteConfig when nothing is running is a safe - // operation. - _ = m.im.DeleteConfig(key) - } - } - - m.cfg = cfg - - if failed { - return fmt.Errorf("not all integrations were correctly updated") - } - return nil -} - -func noScrubbedSecretsHook(in interface{}) (ok bool, out interface{}, err error) { - switch v := in.(type) { - case config_util.Secret: - return true, string(v), nil - case *config_util.URL: - return true, v.String(), nil - default: - return false, nil, nil - } -} - -// integrationProcess is a running integration. -type integrationProcess struct { - log log.Logger - ctx context.Context - stop context.CancelFunc - cfg UnmarshaledConfig - instanceKey string // Value for the `instance` label - i Integration - - wg *sync.WaitGroup - wait func(cfg Config, err error) -} - -// Run runs the integration until the process is canceled. -func (p *integrationProcess) Run() { - defer func() { - if r := recover(); r != nil { - err := fmt.Errorf("%v", r) - level.Error(p.log).Log("msg", "integration has panicked. THIS IS A BUG!", "err", err, "integration", p.cfg.Name()) - } - }() - - p.wg.Add(1) - defer p.wg.Done() - - for { - err := p.i.Run(p.ctx) - if err != nil && err != context.Canceled { - p.wait(p.cfg, err) - } else { - level.Info(p.log).Log("msg", "stopped integration", "integration", p.cfg.Name()) - break - } - } -} - -func (m *Manager) instanceBackoff(cfg Config, err error) { - m.cfgMut.RLock() - defer m.cfgMut.RUnlock() - - integrationAbnormalExits.WithLabelValues(cfg.Name()).Inc() - level.Error(m.logger).Log("msg", "integration stopped abnormally, restarting after backoff", "err", err, "integration", cfg.Name(), "backoff", m.cfg.IntegrationRestartBackoff) - time.Sleep(m.cfg.IntegrationRestartBackoff) -} - -func (m *Manager) instanceConfigForIntegration(p *integrationProcess, cfg ManagerConfig) instance.Config { - common := p.cfg.Common - relabelConfigs := append(cfg.DefaultRelabelConfigs(p.instanceKey), common.RelabelConfigs...) - - schema := "http" - // Check for HTTPS support - var httpClientConfig config_util.HTTPClientConfig - if cfg.ServerUsingTLS { - schema = "https" - httpClientConfig.TLSConfig = cfg.TLSConfig - } - - var scrapeConfigs []*promConfig.ScrapeConfig - - for _, isc := range p.i.ScrapeConfigs() { - sc := &promConfig.ScrapeConfig{ - JobName: fmt.Sprintf("integrations/%s", isc.JobName), - MetricsPath: path.Join("/integrations", p.cfg.Name(), isc.MetricsPath), - Params: isc.QueryParams, - Scheme: schema, - HonorLabels: false, - HonorTimestamps: true, - ScrapeInterval: model.Duration(common.ScrapeInterval), - ScrapeTimeout: model.Duration(common.ScrapeTimeout), - ServiceDiscoveryConfigs: m.scrapeServiceDiscovery(cfg), - RelabelConfigs: relabelConfigs, - MetricRelabelConfigs: common.MetricRelabelConfigs, - HTTPClientConfig: httpClientConfig, - } - - scrapeConfigs = append(scrapeConfigs, sc) - } - - instanceCfg := instance.DefaultConfig - instanceCfg.Name = integrationKey(p.cfg.Name()) - instanceCfg.ScrapeConfigs = scrapeConfigs - instanceCfg.RemoteWrite = cfg.PrometheusRemoteWrite - if common.WALTruncateFrequency > 0 { - instanceCfg.WALTruncateFrequency = common.WALTruncateFrequency - } - return instanceCfg -} - -// integrationKey returns the key for an integration Config, used for its -// instance name and name in the process cache. -func integrationKey(name string) string { - return fmt.Sprintf("integration/%s", name) -} - -func (m *Manager) scrapeServiceDiscovery(cfg ManagerConfig) discovery.Configs { - // A blank host somehow works, but it then requires a sever name to be set under tls. - newHost := cfg.ListenHost - if newHost == "" { - newHost = "127.0.0.1" - } - localAddr := fmt.Sprintf("%s:%d", newHost, cfg.ListenPort) - labels := model.LabelSet{} - labels[model.LabelName("agent_hostname")] = model.LabelValue(m.hostname) - for k, v := range cfg.Labels { - labels[k] = v - } - - return discovery.Configs{ - discovery.StaticConfig{{ - Targets: []model.LabelSet{{model.AddressLabel: model.LabelValue(localAddr)}}, - Labels: labels, - }}, - } -} - -// WireAPI hooks up /metrics routes per-integration. -func (m *Manager) WireAPI(r *mux.Router) { - r.HandleFunc("/integrations/{name}/metrics", func(rw http.ResponseWriter, r *http.Request) { - m.integrationsMut.RLock() - defer m.integrationsMut.RUnlock() - - key := integrationKey(mux.Vars(r)["name"]) - handler := m.loadHandler(key) - handler.ServeHTTP(rw, r) - }) -} - -// loadHandler will perform a dynamic lookup of an HTTP handler for an -// integration. loadHandler should be called with a read lock on the -// integrations mutex. -func (m *Manager) loadHandler(key string) http.Handler { - m.handlerMut.Lock() - defer m.handlerMut.Unlock() - - // Search the integration by name to see if it's still running. - p, ok := m.integrations[key] - if !ok { - delete(m.handlerCache, key) - return http.NotFoundHandler() - } - - // Now look in the cache for a handler for the running process. - cacheEntry, ok := m.handlerCache[key] - if ok && cacheEntry.process == p { - return cacheEntry.handler - } - - // New integration process that hasn't been scraped before. Generate - // a handler for it and cache it. - handler, err := p.i.MetricsHandler() - if err != nil { - level.Error(m.logger).Log("msg", "could not create http handler for integration", "integration", p.cfg.Name(), "err", err) - return http.HandlerFunc(internalServiceError) - } - - cacheEntry = handlerCacheEntry{handler: handler, process: p} - m.handlerCache[key] = cacheEntry - return cacheEntry.handler -} - -func internalServiceError(w http.ResponseWriter, r *http.Request) { - http.Error(w, "500 Internal Server Error", http.StatusInternalServerError) -} - -// Stop stops the manager and all of its integrations. Blocks until all running -// integrations exit. -func (m *Manager) Stop() { - m.cancel() - m.wg.Wait() -} - -type handlerCacheEntry struct { - handler http.Handler - process *integrationProcess -} diff --git a/internal/static/integrations/manager_test.go b/internal/static/integrations/manager_test.go deleted file mode 100644 index e44dfb6c09..0000000000 --- a/internal/static/integrations/manager_test.go +++ /dev/null @@ -1,433 +0,0 @@ -package integrations - -import ( - "context" - "fmt" - "net/http" - "testing" - "time" - - "github.com/go-kit/log" - "github.com/grafana/agent/internal/static/integrations/config" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/util" - "github.com/prometheus/client_golang/prometheus/promhttp" - "github.com/prometheus/common/model" - promConfig "github.com/prometheus/prometheus/config" - "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/model/relabel" - "github.com/stretchr/testify/require" - "go.uber.org/atomic" - "gopkg.in/yaml.v2" -) - -const mockIntegrationName = "integration/mock" - -func noOpValidator(*instance.Config) error { return nil } - -// TestConfig_MarshalEmptyIntegrations ensures that an empty set of integrations -// can be marshaled correctly. -func TestConfig_MarshalEmptyIntegrations(t *testing.T) { - cfgText := ` -scrape_integrations: true -replace_instance_label: true -integration_restart_backoff: 5s -use_hostname_label: true -` - var ( - cfg ManagerConfig - listenPort = 12345 - listenHost = "127.0.0.1" - ) - require.NoError(t, yaml.Unmarshal([]byte(cfgText), &cfg)) - - // Listen port must be set before applying defaults. Normally applied by the - // config package. - cfg.ListenPort = listenPort - cfg.ListenHost = listenHost - - outBytes, err := yaml.Marshal(cfg) - require.NoError(t, err, "Failed creating integration") - require.YAMLEq(t, cfgText, string(outBytes)) -} - -// Test that embedded integration fields in the struct can be unmarshaled and -// remarshaled back out to text. -func TestConfig_Remarshal(t *testing.T) { - RegisterIntegration(&testIntegrationA{}) - cfgText := ` -scrape_integrations: true -replace_instance_label: true -integration_restart_backoff: 5s -use_hostname_label: true -test: - text: Hello, world! - truth: true -` - var ( - cfg ManagerConfig - listenPort = 12345 - listenHost = "127.0.0.1" - ) - require.NoError(t, yaml.Unmarshal([]byte(cfgText), &cfg)) - - // Listen port must be set before applying defaults. Normally applied by the - // config package. - cfg.ListenPort = listenPort - cfg.ListenHost = listenHost - - outBytes, err := yaml.Marshal(cfg) - require.NoError(t, err, "Failed creating integration") - require.YAMLEq(t, cfgText, string(outBytes)) -} - -func TestConfig_AddressRelabels(t *testing.T) { - cfgText := ` -agent: - enabled: true -` - - var ( - cfg ManagerConfig - listenPort = 12345 - listenHost = "127.0.0.1" - ) - require.NoError(t, yaml.Unmarshal([]byte(cfgText), &cfg)) - - // Listen port must be set before applying defaults. Normally applied by the - // config package. - cfg.ListenPort = listenPort - cfg.ListenHost = listenHost - - expectHostname, _ := instance.Hostname() - relabels := cfg.DefaultRelabelConfigs(expectHostname + ":12345") - - // Ensure that the relabel configs are functional - require.Len(t, relabels, 1) - result, _ := relabel.Process(labels.FromStrings("__address__", "127.0.0.1"), relabels...) - - require.Equal(t, result.Get("instance"), expectHostname+":12345") -} - -func TestManager_instanceConfigForIntegration(t *testing.T) { - mock := newMockIntegration() - icfg := mockConfig{Integration: mock} - - im := instance.NewBasicManager(instance.DefaultBasicManagerConfig, log.NewNopLogger(), mockInstanceFactory) - m, err := NewManager(mockManagerConfig(), log.NewNopLogger(), im, noOpValidator) - require.NoError(t, err) - defer m.Stop() - - p := &integrationProcess{instanceKey: "key", cfg: makeUnmarshaledConfig(icfg, true), i: mock} - cfg := m.instanceConfigForIntegration(p, mockManagerConfig()) - - // Validate that the generated MetricsPath is a valid URL path - require.Len(t, cfg.ScrapeConfigs, 1) - require.Equal(t, "/integrations/mock/metrics", cfg.ScrapeConfigs[0].MetricsPath) -} - -func makeUnmarshaledConfig(cfg Config, enabled bool) UnmarshaledConfig { - return UnmarshaledConfig{Config: cfg, Common: config.Common{Enabled: enabled}} -} - -// TestManager_NoIntegrationsScrape ensures that configs don't get generates -// when the ScrapeIntegrations flag is disabled. -func TestManager_NoIntegrationsScrape(t *testing.T) { - mock := newMockIntegration() - icfg := mockConfig{Integration: mock} - - im := instance.NewBasicManager(instance.DefaultBasicManagerConfig, log.NewNopLogger(), mockInstanceFactory) - - cfg := mockManagerConfig() - cfg.ScrapeIntegrations = false - cfg.Integrations = append(cfg.Integrations, makeUnmarshaledConfig(&icfg, true)) - - m, err := NewManager(cfg, log.NewNopLogger(), im, noOpValidator) - require.NoError(t, err) - defer m.Stop() - - // Normally we'd use util.Eventually here, but since im.ListConfigs starts - // out with a length of zero, util.Eventually would immediately pass. Instead - // we want to wait for a bit to make sure that the length of ListConfigs - // doesn't become non-zero. - time.Sleep(time.Second) - require.Zero(t, len(im.ListConfigs())) -} - -// TestManager_NoIntegrationScrape ensures that configs don't get generates -// when the ScrapeIntegration flag is disabled on the integration. -func TestManager_NoIntegrationScrape(t *testing.T) { - mock := newMockIntegration() - icfg := mockConfig{Integration: mock} - noScrape := false - - im := instance.NewBasicManager(instance.DefaultBasicManagerConfig, log.NewNopLogger(), mockInstanceFactory) - - cfg := mockManagerConfig() - cfg.Integrations = append(cfg.Integrations, UnmarshaledConfig{ - Config: icfg, - Common: config.Common{ScrapeIntegration: &noScrape}, - }) - - m, err := NewManager(cfg, log.NewNopLogger(), im, noOpValidator) - require.NoError(t, err) - defer m.Stop() - - time.Sleep(time.Second) - require.Zero(t, len(im.ListConfigs())) -} - -// TestManager_StartsIntegrations tests that, when given an integration to -// launch, TestManager applies a config and runs the integration. -func TestManager_StartsIntegrations(t *testing.T) { - mock := newMockIntegration() - icfg := mockConfig{Integration: mock} - - cfg := mockManagerConfig() - cfg.Integrations = append(cfg.Integrations, makeUnmarshaledConfig(icfg, true)) - - im := instance.NewBasicManager(instance.DefaultBasicManagerConfig, log.NewNopLogger(), mockInstanceFactory) - m, err := NewManager(cfg, log.NewNopLogger(), im, noOpValidator) - require.NoError(t, err) - defer m.Stop() - - util.Eventually(t, func(t require.TestingT) { - require.Equal(t, 1, len(im.ListConfigs())) - }) - - // Check that the instance was set to run - util.Eventually(t, func(t require.TestingT) { - require.Equal(t, 1, int(mock.startedCount.Load())) - }) -} - -func TestManager_RestartsIntegrations(t *testing.T) { - mock := newMockIntegration() - icfg := mockConfig{Integration: mock} - - cfg := mockManagerConfig() - cfg.Integrations = append(cfg.Integrations, makeUnmarshaledConfig(icfg, true)) - - im := instance.NewBasicManager(instance.DefaultBasicManagerConfig, log.NewNopLogger(), mockInstanceFactory) - m, err := NewManager(cfg, log.NewNopLogger(), im, noOpValidator) - require.NoError(t, err) - defer m.Stop() - - mock.err <- fmt.Errorf("I can't believe this horrible error happened") - - util.Eventually(t, func(t require.TestingT) { - require.Equal(t, 2, int(mock.startedCount.Load())) - }) -} - -func TestManager_GracefulStop(t *testing.T) { - mock := newMockIntegration() - icfg := mockConfig{Integration: mock} - - cfg := mockManagerConfig() - cfg.Integrations = append(cfg.Integrations, makeUnmarshaledConfig(icfg, true)) - - im := instance.NewBasicManager(instance.DefaultBasicManagerConfig, log.NewNopLogger(), mockInstanceFactory) - m, err := NewManager(cfg, log.NewNopLogger(), im, noOpValidator) - require.NoError(t, err) - - util.Eventually(t, func(t require.TestingT) { - require.Equal(t, 1, int(mock.startedCount.Load())) - }) - - m.Stop() - - time.Sleep(500 * time.Millisecond) - require.Equal(t, 1, int(mock.startedCount.Load()), "graceful shutdown should not have restarted the Integration") - - util.Eventually(t, func(t require.TestingT) { - require.Equal(t, false, mock.running.Load()) - }) -} - -func TestManager_IntegrationEnabledToDisabledReload(t *testing.T) { - mock := newMockIntegration() - icfg := mockConfig{Integration: mock} - cfg := mockManagerConfig() - cfg.Integrations = append(cfg.Integrations, makeUnmarshaledConfig(icfg, true)) - - im := instance.NewBasicManager(instance.DefaultBasicManagerConfig, log.NewNopLogger(), mockInstanceFactory) - m, err := NewManager(cfg, log.NewNopLogger(), im, noOpValidator) - require.NoError(t, err) - - // Test for Enabled -> Disabled - _ = m.ApplyConfig(generateMockConfigWithEnabledFlag(false)) - require.Len(t, m.integrations, 0, "Integration was disabled so should be removed from map") - _, err = m.im.GetInstance(mockIntegrationName) - require.Error(t, err, "This mock should not exist") - - // test for Disabled -> Enabled - _ = m.ApplyConfig(generateMockConfigWithEnabledFlag(true)) - require.Len(t, m.integrations, 1, "Integration was enabled so should be here") - _, err = m.im.GetInstance(mockIntegrationName) - require.NoError(t, err, "This mock should exist") - require.Len(t, m.im.ListInstances(), 1, "This instance should exist") -} - -func TestManager_IntegrationDisabledToEnabledReload(t *testing.T) { - mock := newMockIntegration() - icfg := mockConfig{Integration: mock} - - cfg := mockManagerConfig() - cfg.Integrations = append(cfg.Integrations, UnmarshaledConfig{ - Config: icfg, - Common: config.Common{Enabled: false}, - }) - - im := instance.NewBasicManager(instance.DefaultBasicManagerConfig, log.NewNopLogger(), mockInstanceFactory) - m, err := NewManager(cfg, log.NewNopLogger(), im, noOpValidator) - require.NoError(t, err) - require.Len(t, m.integrations, 0, "Integration was disabled so should be removed from map") - _, err = m.im.GetInstance(mockIntegrationName) - require.Error(t, err, "This mock should not exist") - - // test for Disabled -> Enabled - - _ = m.ApplyConfig(generateMockConfigWithEnabledFlag(true)) - require.Len(t, m.integrations, 1, "Integration was enabled so should be here") - _, err = m.im.GetInstance(mockIntegrationName) - require.NoError(t, err, "This mock should exist") - require.Len(t, m.im.ListInstances(), 1, "This instance should exist") -} - -type PromDefaultsValidator struct { - PrometheusGlobalConfig promConfig.GlobalConfig -} - -func (i *PromDefaultsValidator) validate(c *instance.Config) error { - instanceConfig := instance.GlobalConfig{ - Prometheus: i.PrometheusGlobalConfig, - } - return c.ApplyDefaults(instanceConfig) -} - -func TestManager_PromConfigChangeReloads(t *testing.T) { - mock := newMockIntegration() - icfg := mockConfig{Integration: mock} - - cfg := mockManagerConfig() - cfg.Integrations = append(cfg.Integrations, makeUnmarshaledConfig(icfg, true)) - - im := instance.NewBasicManager(instance.DefaultBasicManagerConfig, log.NewNopLogger(), mockInstanceFactory) - - startingPromConfig := mockPromConfigWithValues(model.Duration(30*time.Second), model.Duration(25*time.Second)) - cfg.PrometheusGlobalConfig = startingPromConfig - validator := PromDefaultsValidator{startingPromConfig} - - m, err := NewManager(cfg, log.NewNopLogger(), im, validator.validate) - require.NoError(t, err) - require.Len(t, m.im.ListConfigs(), 1, "Integration was enabled so should be here") - //The integration never has the prom config overrides happen so go after the running instance config instead - for _, c := range m.im.ListConfigs() { - for _, scrape := range c.ScrapeConfigs { - require.Equal(t, startingPromConfig.ScrapeInterval, scrape.ScrapeInterval) - require.Equal(t, startingPromConfig.ScrapeTimeout, scrape.ScrapeTimeout) - } - } - - newPromConfig := mockPromConfigWithValues(model.Duration(60*time.Second), model.Duration(55*time.Second)) - cfg.PrometheusGlobalConfig = newPromConfig - validator.PrometheusGlobalConfig = newPromConfig - - err = m.ApplyConfig(cfg) - require.NoError(t, err) - - require.Len(t, m.im.ListConfigs(), 1, "Integration was enabled so should be here") - //The integration never has the prom config overrides happen so go after the running instance config instead - for _, c := range m.im.ListConfigs() { - for _, scrape := range c.ScrapeConfigs { - require.Equal(t, newPromConfig.ScrapeInterval, scrape.ScrapeInterval) - require.Equal(t, newPromConfig.ScrapeTimeout, scrape.ScrapeTimeout) - } - } -} - -func generateMockConfigWithEnabledFlag(enabled bool) ManagerConfig { - enabledMock := newMockIntegration() - enabledConfig := mockConfig{Integration: enabledMock} - enabledManagerConfig := mockManagerConfig() - enabledManagerConfig.Integrations = append( - enabledManagerConfig.Integrations, - makeUnmarshaledConfig(enabledConfig, enabled), - ) - return enabledManagerConfig -} - -type mockConfig struct { - Integration *mockIntegration `yaml:"mock"` -} - -// Equal is used for cmp.Equal, since otherwise mockConfig can't be compared to itself. -func (c mockConfig) Equal(other mockConfig) bool { return c.Integration == other.Integration } - -func (c mockConfig) Name() string { return "mock" } -func (c mockConfig) InstanceKey(agentKey string) (string, error) { return agentKey, nil } - -func (c mockConfig) NewIntegration(_ log.Logger) (Integration, error) { - return c.Integration, nil -} - -type mockIntegration struct { - startedCount *atomic.Uint32 - running *atomic.Bool - err chan error -} - -func newMockIntegration() *mockIntegration { - return &mockIntegration{ - running: atomic.NewBool(true), - startedCount: atomic.NewUint32(0), - err: make(chan error), - } -} - -func (i *mockIntegration) MetricsHandler() (http.Handler, error) { - return promhttp.Handler(), nil -} - -func (i *mockIntegration) ScrapeConfigs() []config.ScrapeConfig { - return []config.ScrapeConfig{{ - JobName: "mock", - MetricsPath: "/metrics", - }} -} - -func (i *mockIntegration) Run(ctx context.Context) error { - i.startedCount.Inc() - i.running.Store(true) - defer i.running.Store(false) - - select { - case <-ctx.Done(): - return ctx.Err() - case err := <-i.err: - return err - } -} - -func mockInstanceFactory(_ instance.Config) (instance.ManagedInstance, error) { - return instance.NoOpInstance{}, nil -} - -func mockManagerConfig() ManagerConfig { - listenPort := 0 - listenHost := "127.0.0.1" - return ManagerConfig{ - ScrapeIntegrations: true, - IntegrationRestartBackoff: 0, - ListenPort: listenPort, - ListenHost: listenHost, - } -} - -func mockPromConfigWithValues(scrapeInterval model.Duration, scrapeTimeout model.Duration) promConfig.GlobalConfig { - return promConfig.GlobalConfig{ - ScrapeInterval: scrapeInterval, - ScrapeTimeout: scrapeTimeout, - } -} diff --git a/internal/static/integrations/stub_integration.go b/internal/static/integrations/stub_integration.go deleted file mode 100644 index 2d118ff82c..0000000000 --- a/internal/static/integrations/stub_integration.go +++ /dev/null @@ -1,27 +0,0 @@ -package integrations - -import ( - "context" - "net/http" - - "github.com/grafana/agent/internal/static/integrations/config" -) - -// StubIntegration implements a no-op integration for use on platforms not supported by an integration -type StubIntegration struct{} - -// MetricsHandler returns an http.NotFoundHandler to satisfy the Integration interface -func (i *StubIntegration) MetricsHandler() (http.Handler, error) { - return http.NotFoundHandler(), nil -} - -// ScrapeConfigs returns an empty list of scrape configs, since there is nothing to scrape -func (i *StubIntegration) ScrapeConfigs() []config.ScrapeConfig { - return []config.ScrapeConfig{} -} - -// Run just waits for the context to finish -func (i *StubIntegration) Run(ctx context.Context) error { - <-ctx.Done() - return ctx.Err() -} diff --git a/internal/static/integrations/v2/app_agent_receiver/app_agent_receiver.go b/internal/static/integrations/v2/app_agent_receiver/app_agent_receiver.go index f1bdd00adb..9145115fd5 100644 --- a/internal/static/integrations/v2/app_agent_receiver/app_agent_receiver.go +++ b/internal/static/integrations/v2/app_agent_receiver/app_agent_receiver.go @@ -1,191 +1,17 @@ package app_agent_receiver //nolint:golint import ( - "context" "fmt" - "net/http" "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/gorilla/mux" "github.com/grafana/agent/internal/static/integrations/v2" - "github.com/grafana/agent/internal/static/integrations/v2/metricsutils" - "github.com/grafana/agent/internal/static/traces/pushreceiver" - "github.com/grafana/dskit/instrument" - "github.com/grafana/dskit/middleware" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" - "go.opentelemetry.io/collector/component" - "go.opentelemetry.io/collector/consumer" ) -type appAgentReceiverIntegration struct { - integrations.MetricsIntegration - appAgentReceiverHandler AppAgentReceiverHandler - logger log.Logger - conf *Config - reg prometheus.Registerer - - requestDurationCollector *prometheus.HistogramVec - receivedMessageSizeCollector *prometheus.HistogramVec - sentMessageSizeCollector *prometheus.HistogramVec - inflightRequestsCollector *prometheus.GaugeVec +func init() { + integrations.Register(&Config{}, integrations.TypeMultiplex) } -// Static typecheck tests -var ( - _ integrations.Integration = (*appAgentReceiverIntegration)(nil) - _ integrations.HTTPIntegration = (*appAgentReceiverIntegration)(nil) - _ integrations.MetricsIntegration = (*appAgentReceiverIntegration)(nil) -) - // NewIntegration converts this config into an instance of an integration func (c *Config) NewIntegration(l log.Logger, globals integrations.Globals) (integrations.Integration, error) { - reg := prometheus.NewRegistry() - sourcemapLogger := log.With(l, "subcomponent", "sourcemaps") - sourcemapStore := NewSourceMapStore(sourcemapLogger, c.SourceMaps, reg, nil, nil) - - receiverMetricsExporter := NewReceiverMetricsExporter(reg) - - var exp = []AppAgentReceiverExporter{ - receiverMetricsExporter, - } - - if len(c.LogsInstance) > 0 { - getLogsInstance := func() (logsInstance, error) { - instance := globals.Logs.Instance(c.LogsInstance) - if instance == nil { - return nil, fmt.Errorf("logs instance \"%s\" not found", c.LogsInstance) - } - return instance, nil - } - - if _, err := getLogsInstance(); err != nil { - return nil, err - } - - lokiExporter := NewLogsExporter( - l, - LogsExporterConfig{ - GetLogsInstance: getLogsInstance, - Labels: c.LogsLabels, - SendEntryTimeout: c.LogsSendTimeout, - }, - sourcemapStore, - ) - exp = append(exp, lokiExporter) - } - - if len(c.TracesInstance) > 0 { - getTracesConsumer := func() (consumer.Traces, error) { - tracesInstance := globals.Tracing.Instance(c.TracesInstance) - if tracesInstance == nil { - return nil, fmt.Errorf("traces instance \"%s\" not found", c.TracesInstance) - } - factory := tracesInstance.GetFactory(component.KindReceiver, pushreceiver.TypeStr) - if factory == nil { - return nil, fmt.Errorf("push receiver factory not found for traces instance \"%s\"", c.TracesInstance) - } - consumer := factory.(*pushreceiver.Factory).Consumer - if consumer == nil { - return nil, fmt.Errorf("consumer not set for push receiver factory on traces instance \"%s\"", c.TracesInstance) - } - return consumer, nil - } - if _, err := getTracesConsumer(); err != nil { - return nil, err - } - tracesExporter := NewTracesExporter(getTracesConsumer) - exp = append(exp, tracesExporter) - } - - handler := NewAppAgentReceiverHandler(c, exp, reg) - - metricsIntegration, err := metricsutils.NewMetricsHandlerIntegration(l, c, c.Common, globals, promhttp.HandlerFor(reg, promhttp.HandlerOpts{})) - if err != nil { - return nil, err - } - - requestDurationCollector := prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Name: "app_agent_receiver_request_duration_seconds", - Help: "Time (in seconds) spent serving HTTP requests.", - Buckets: instrument.DefBuckets, - }, []string{"method", "route", "status_code", "ws"}) - reg.MustRegister(requestDurationCollector) - - receivedMessageSizeCollector := prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Name: "app_agent_receiver_request_message_bytes", - Help: "Size (in bytes) of messages received in the request.", - Buckets: middleware.BodySizeBuckets, - }, []string{"method", "route"}) - reg.MustRegister(receivedMessageSizeCollector) - - sentMessageSizeCollector := prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Name: "app_agent_receiver_response_message_bytes", - Help: "Size (in bytes) of messages sent in response.", - Buckets: middleware.BodySizeBuckets, - }, []string{"method", "route"}) - reg.MustRegister(sentMessageSizeCollector) - - inflightRequestsCollector := prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "app_agent_receiver_inflight_requests", - Help: "Current number of inflight requests.", - }, []string{"method", "route"}) - reg.MustRegister(inflightRequestsCollector) - - return &appAgentReceiverIntegration{ - MetricsIntegration: metricsIntegration, - appAgentReceiverHandler: handler, - logger: l, - conf: c, - reg: reg, - - requestDurationCollector: requestDurationCollector, - receivedMessageSizeCollector: receivedMessageSizeCollector, - sentMessageSizeCollector: sentMessageSizeCollector, - inflightRequestsCollector: inflightRequestsCollector, - }, nil -} - -// RunIntegration implements Integration -func (i *appAgentReceiverIntegration) RunIntegration(ctx context.Context) error { - r := mux.NewRouter() - r.Handle("/collect", i.appAgentReceiverHandler.HTTPHandler(i.logger)).Methods("POST", "OPTIONS") - - mw := middleware.Instrument{ - RouteMatcher: r, - Duration: i.requestDurationCollector, - RequestBodySize: i.receivedMessageSizeCollector, - ResponseBodySize: i.sentMessageSizeCollector, - InflightRequests: i.inflightRequestsCollector, - } - - srv := &http.Server{ - Addr: fmt.Sprintf("%s:%d", i.conf.Server.Host, i.conf.Server.Port), - Handler: mw.Wrap(r), - } - errChan := make(chan error, 1) - - go func() { - level.Info(i.logger).Log("msg", "starting app agent receiver", "host", i.conf.Server.Host, "port", i.conf.Server.Port) - if err := srv.ListenAndServe(); err != http.ErrServerClosed { - errChan <- err - } - }() - - select { - case <-ctx.Done(): - if err := srv.Shutdown(ctx); err != nil { - return err - } - case err := <-errChan: - close(errChan) - return err - } - - return nil -} - -func init() { - integrations.Register(&Config{}, integrations.TypeMultiplex) + return nil, fmt.Errorf("app_agent_receiver integration code has been replaced by faro.receiver component") } diff --git a/internal/static/integrations/v2/app_agent_receiver/app_agent_receiver_test.go b/internal/static/integrations/v2/app_agent_receiver/app_agent_receiver_test.go deleted file mode 100644 index f44db4c038..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/app_agent_receiver_test.go +++ /dev/null @@ -1,169 +0,0 @@ -package app_agent_receiver - -import ( - "bytes" - "context" - "fmt" - "io" - "net/http" - "strings" - "testing" - "time" - - "github.com/grafana/agent/internal/static/integrations/v2" - "github.com/grafana/agent/internal/static/server" - "github.com/grafana/agent/internal/static/traces" - "github.com/grafana/agent/internal/static/traces/traceutils" - "github.com/grafana/agent/internal/util" - "github.com/phayes/freeport" - "github.com/prometheus/client_golang/prometheus" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "go.opentelemetry.io/collector/pdata/ptrace" - "gopkg.in/yaml.v2" -) - -func Test_ReceiveTracesAndRemoteWrite(t *testing.T) { - var err error - - // - // Prepare the traces instance - // - tracesCh := make(chan ptrace.Traces) - tracesAddr := traceutils.NewTestServer(t, func(t ptrace.Traces) { - tracesCh <- t - }) - - tracesCfgText := util.Untab(fmt.Sprintf(` -configs: -- name: TEST_TRACES - receivers: - jaeger: - protocols: - thrift_compact: - remote_write: - - endpoint: %s - insecure: true - batch: - timeout: 100ms - send_batch_size: 1 - `, tracesAddr)) - - var tracesCfg traces.Config - dec := yaml.NewDecoder(strings.NewReader(tracesCfgText)) - dec.SetStrict(true) - err = dec.Decode(&tracesCfg) - require.NoError(t, err) - - traces, err := traces.New(nil, nil, prometheus.NewRegistry(), tracesCfg, &server.HookLogger{}) - require.NoError(t, err) - t.Cleanup(traces.Stop) - - // - // Prepare the app_agent_receiver integration - // - integrationPort, err := freeport.GetFreePort() - require.NoError(t, err) - - var integrationCfg Config - cb := fmt.Sprintf(` -instance: TEST_APP_AGENT_RECEIVER -server: - cors_allowed_origins: - - '*' - host: '0.0.0.0' - max_allowed_payload_size: 5e+07 - port: %d - rate_limiting: - burstiness: 100 - enabled: true - rps: 100 -sourcemaps: - download: true -traces_instance: TEST_TRACES -`, integrationPort) - err = yaml.Unmarshal([]byte(cb), &integrationCfg) - require.NoError(t, err) - - logger := util.TestLogger(t) - globals := integrations.Globals{ - Tracing: traces, - } - - integration, err := integrationCfg.NewIntegration(logger, globals) - require.NoError(t, err) - - ctx := context.Background() - t.Cleanup(func() { ctx.Done() }) - // - // Start the app_agent_receiver integration - // - go func() { - err = integration.RunIntegration(ctx) - require.NoError(t, err) - }() - - // - // Send data to the integration's /collect endpoint - // - const PAYLOAD = ` -{ - "traces": { - "resourceSpans": [{ - "scopeSpans": [{ - "spans": [{ - "name": "TestSpan", - "attributes": [{ - "key": "foo", - "value": { "intValue": "11111" } - }, - { - "key": "boo", - "value": { "intValue": "22222" } - }, - { - "key": "user.email", - "value": { "stringValue": "user@email.com" } - }] - }] - }] - }] - }, - "logs": [], - "exceptions": [], - "measurements": [], - "meta": {} -} -` - - integrationURL := fmt.Sprintf("http://127.0.0.1:%d/collect", integrationPort) - - var httpResponse *http.Response - require.EventuallyWithT(t, func(c *assert.CollectT) { - req, err := http.NewRequest("POST", integrationURL, bytes.NewBuffer([]byte(PAYLOAD))) - assert.NoError(c, err) - - httpResponse, err = http.DefaultClient.Do(req) - assert.NoError(c, err) - }, 5*time.Second, 250*time.Millisecond) - - // - // Check that the data was received by the integration - // - resBody, err := io.ReadAll(httpResponse.Body) - require.NoError(t, err) - require.Equal(t, "ok", string(resBody[:])) - - require.Equal(t, http.StatusAccepted, httpResponse.StatusCode) - - // - // Check that the traces subsystem remote wrote the integration - // - select { - case <-time.After(10 * time.Second): - require.Fail(t, "failed to receive a span after 10 seconds") - case tr := <-tracesCh: - require.Equal(t, 1, tr.SpanCount()) - // Nothing to do, send succeeded. - } -} diff --git a/internal/static/integrations/v2/app_agent_receiver/handler.go b/internal/static/integrations/v2/app_agent_receiver/handler.go deleted file mode 100644 index c430e90993..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/handler.go +++ /dev/null @@ -1,126 +0,0 @@ -package app_agent_receiver - -import ( - "context" - "sync" - - "crypto/subtle" - "encoding/json" - "net/http" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/prometheus/client_golang/prometheus" - "github.com/rs/cors" - "golang.org/x/time/rate" -) - -const apiKeyHeader = "x-api-key" - -type AppAgentReceiverExporter interface { - Name() string - Export(ctx context.Context, payload Payload) error -} - -// AppAgentReceiverHandler struct controls the data ingestion http handler of the receiver -type AppAgentReceiverHandler struct { - exporters []AppAgentReceiverExporter - config *Config - rateLimiter *rate.Limiter - exporterErrorsCollector *prometheus.CounterVec -} - -// NewAppAgentReceiverHandler creates a new AppReceiver instance based on the given configuration -func NewAppAgentReceiverHandler(conf *Config, exporters []AppAgentReceiverExporter, reg prometheus.Registerer) AppAgentReceiverHandler { - var rateLimiter *rate.Limiter - if conf.Server.RateLimiting.Enabled { - var rps float64 - if conf.Server.RateLimiting.RPS > 0 { - rps = conf.Server.RateLimiting.RPS - } - - var b int - if conf.Server.RateLimiting.Burstiness > 0 { - b = conf.Server.RateLimiting.Burstiness - } - rateLimiter = rate.NewLimiter(rate.Limit(rps), b) - } - - exporterErrorsCollector := prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "app_agent_receiver_exporter_errors_total", - Help: "Total number of errors produced by a receiver exporter", - }, []string{"exporter"}) - - reg.MustRegister(exporterErrorsCollector) - - return AppAgentReceiverHandler{ - exporters: exporters, - config: conf, - rateLimiter: rateLimiter, - exporterErrorsCollector: exporterErrorsCollector, - } -} - -// HTTPHandler is the http.Handler for the receiver. It will do the following -// 0. Enable CORS for the configured hosts -// 1. Check if the request should be rate limited -// 2. Verify that the payload size is within limits -// 3. Start two go routines for exporters processing and exporting data respectively -// 4. Respond with 202 once all the work is done -func (ar *AppAgentReceiverHandler) HTTPHandler(logger log.Logger) http.Handler { - var handler http.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - // Check rate limiting state - if ar.config.Server.RateLimiting.Enabled { - if ok := ar.rateLimiter.Allow(); !ok { - http.Error(w, http.StatusText(http.StatusTooManyRequests), http.StatusTooManyRequests) - return - } - } - - // check API key if one is provided - if len(ar.config.Server.APIKey) > 0 && subtle.ConstantTimeCompare([]byte(r.Header.Get(apiKeyHeader)), []byte(ar.config.Server.APIKey)) == 0 { - http.Error(w, "api key not provided or incorrect", http.StatusUnauthorized) - return - } - - // Verify content length. We trust net/http to give us the correct number - if ar.config.Server.MaxAllowedPayloadSize > 0 && r.ContentLength > ar.config.Server.MaxAllowedPayloadSize { - http.Error(w, http.StatusText(http.StatusRequestEntityTooLarge), http.StatusRequestEntityTooLarge) - return - } - - var p Payload - err := json.NewDecoder(r.Body).Decode(&p) - if err != nil { - http.Error(w, err.Error(), http.StatusBadRequest) - return - } - - var wg sync.WaitGroup - - for _, exporter := range ar.exporters { - wg.Add(1) - go func(exp AppAgentReceiverExporter) { - defer wg.Done() - if err := exp.Export(r.Context(), p); err != nil { - level.Error(logger).Log("msg", "exporter error", "exporter", exp.Name(), "error", err) - ar.exporterErrorsCollector.WithLabelValues(exp.Name()).Inc() - } - }(exporter) - } - - wg.Wait() - w.WriteHeader(http.StatusAccepted) - _, _ = w.Write([]byte("ok")) - }) - - if len(ar.config.Server.CORSAllowedOrigins) > 0 { - c := cors.New(cors.Options{ - AllowedOrigins: ar.config.Server.CORSAllowedOrigins, - AllowedHeaders: []string{apiKeyHeader, "content-type", "x-faro-session-id"}, - }) - handler = c.Handler(handler) - } - - return handler -} diff --git a/internal/static/integrations/v2/app_agent_receiver/handler_test.go b/internal/static/integrations/v2/app_agent_receiver/handler_test.go deleted file mode 100644 index ac0e5438c8..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/handler_test.go +++ /dev/null @@ -1,356 +0,0 @@ -package app_agent_receiver - -import ( - "bytes" - "context" - "errors" - "net/http" - "net/http/httptest" - "testing" - - "github.com/go-kit/log" - "github.com/stretchr/testify/require" - - "github.com/prometheus/client_golang/prometheus" -) - -const PAYLOAD = ` -{ - "traces": { - "resourceSpans": [] - }, - "logs": [], - "exceptions": [], - "measurements": [], - "meta": {} -} -` - -type TestExporter struct { - name string - broken bool - payloads []Payload -} - -func (te *TestExporter) Name() string { - return te.name -} - -func (te *TestExporter) Export(ctx context.Context, payload Payload) error { - if te.broken { - return errors.New("this exporter is broken") - } - te.payloads = append(te.payloads, payload) - return nil -} - -func TestMultipleExportersAllSucceed(t *testing.T) { - req, err := http.NewRequest("POST", "/collect", bytes.NewBuffer([]byte(PAYLOAD))) - - reg := prometheus.NewRegistry() - - require.NoError(t, err) - - exporter1 := TestExporter{ - name: "exporter1", - broken: false, - payloads: []Payload{}, - } - exporter2 := TestExporter{ - name: "exporter2", - broken: false, - payloads: []Payload{}, - } - - conf := &Config{} - - fr := NewAppAgentReceiverHandler(conf, []AppAgentReceiverExporter{&exporter1, &exporter2}, reg) - handler := fr.HTTPHandler(log.NewNopLogger()) - - rr := httptest.NewRecorder() - - handler.ServeHTTP(rr, req) - - require.Equal(t, http.StatusAccepted, rr.Result().StatusCode) - - require.Len(t, exporter1.payloads, 1) - require.Len(t, exporter2.payloads, 1) -} - -func TestMultipleExportersOneFails(t *testing.T) { - req, err := http.NewRequest("POST", "/collect", bytes.NewBuffer([]byte(PAYLOAD))) - - require.NoError(t, err) - - reg := prometheus.NewRegistry() - - exporter1 := TestExporter{ - name: "exporter1", - broken: true, - payloads: []Payload{}, - } - exporter2 := TestExporter{ - name: "exporter2", - broken: false, - payloads: []Payload{}, - } - - conf := &Config{} - - fr := NewAppAgentReceiverHandler(conf, []AppAgentReceiverExporter{&exporter1, &exporter2}, reg) - handler := fr.HTTPHandler(log.NewNopLogger()) - - rr := httptest.NewRecorder() - - handler.ServeHTTP(rr, req) - - metrics, err := reg.Gather() - require.NoError(t, err) - - metric := metrics[0] - require.Equal(t, "app_agent_receiver_exporter_errors_total", *metric.Name) - require.Len(t, metric.Metric, 1) - require.Equal(t, 1.0, *metric.Metric[0].Counter.Value) - require.Len(t, metric.Metric[0].Label, 1) - require.Equal(t, *metric.Metric[0].Label[0].Value, "exporter1") - require.Len(t, metrics, 1) - require.Equal(t, http.StatusAccepted, rr.Result().StatusCode) - require.Len(t, exporter1.payloads, 0) - require.Len(t, exporter2.payloads, 1) -} - -func TestMultipleExportersAllFail(t *testing.T) { - req, err := http.NewRequest("POST", "/collect", bytes.NewBuffer([]byte(PAYLOAD))) - - reg := prometheus.NewRegistry() - - require.NoError(t, err) - - exporter1 := TestExporter{ - name: "exporter1", - broken: true, - payloads: []Payload{}, - } - exporter2 := TestExporter{ - name: "exporter2", - broken: true, - payloads: []Payload{}, - } - - conf := &Config{} - - fr := NewAppAgentReceiverHandler(conf, []AppAgentReceiverExporter{&exporter1, &exporter2}, reg) - handler := fr.HTTPHandler(log.NewNopLogger()) - - rr := httptest.NewRecorder() - - handler.ServeHTTP(rr, req) - - metrics, err := reg.Gather() - require.NoError(t, err) - - require.Len(t, metrics, 1) - metric := metrics[0] - - require.Equal(t, "app_agent_receiver_exporter_errors_total", *metric.Name) - require.Len(t, metric.Metric, 2) - require.Equal(t, 1.0, *metric.Metric[0].Counter.Value) - require.Equal(t, 1.0, *metric.Metric[1].Counter.Value) - require.Len(t, metric.Metric[0].Label, 1) - require.Len(t, metric.Metric[1].Label, 1) - require.Equal(t, *metric.Metric[0].Label[0].Value, "exporter1") - require.Equal(t, *metric.Metric[1].Label[0].Value, "exporter2") - require.Equal(t, http.StatusAccepted, rr.Result().StatusCode) - require.Len(t, exporter1.payloads, 0) - require.Len(t, exporter2.payloads, 0) -} - -func TestNoContentLengthLimitSet(t *testing.T) { - req, err := http.NewRequest("POST", "/collect", bytes.NewBuffer([]byte(PAYLOAD))) - require.NoError(t, err) - reg := prometheus.NewRegistry() - - conf := &Config{} - - req.ContentLength = 89348593894 - - fr := NewAppAgentReceiverHandler(conf, []AppAgentReceiverExporter{}, reg) - handler := fr.HTTPHandler(nil) - - rr := httptest.NewRecorder() - - handler.ServeHTTP(rr, req) - - require.Equal(t, http.StatusAccepted, rr.Result().StatusCode) -} - -func TestLargePayload(t *testing.T) { - req, err := http.NewRequest("POST", "/collect", bytes.NewBuffer([]byte(PAYLOAD))) - require.NoError(t, err) - reg := prometheus.NewRegistry() - - conf := &Config{ - Server: ServerConfig{ - MaxAllowedPayloadSize: 10, - }, - } - - fr := NewAppAgentReceiverHandler(conf, []AppAgentReceiverExporter{}, reg) - handler := fr.HTTPHandler(nil) - - rr := httptest.NewRecorder() - - handler.ServeHTTP(rr, req) - require.Equal(t, http.StatusRequestEntityTooLarge, rr.Result().StatusCode) -} - -func TestAPIKeyRequiredButNotProvided(t *testing.T) { - req, err := http.NewRequest("POST", "/collect", bytes.NewBuffer([]byte(PAYLOAD))) - - if err != nil { - t.Fatal(err) - } - - conf := &Config{ - Server: ServerConfig{ - APIKey: "foo", - }, - } - - fr := NewAppAgentReceiverHandler(conf, nil, prometheus.NewRegistry()) - handler := fr.HTTPHandler(nil) - - rr := httptest.NewRecorder() - - handler.ServeHTTP(rr, req) - require.Equal(t, http.StatusUnauthorized, rr.Result().StatusCode) -} - -func TestAPIKeyWrong(t *testing.T) { - req, err := http.NewRequest("POST", "/collect", bytes.NewBuffer([]byte(PAYLOAD))) - req.Header.Set("x-api-key", "bar") - - if err != nil { - t.Fatal(err) - } - - conf := &Config{ - Server: ServerConfig{ - APIKey: "foo", - }, - } - - fr := NewAppAgentReceiverHandler(conf, nil, prometheus.NewRegistry()) - handler := fr.HTTPHandler(nil) - - rr := httptest.NewRecorder() - - handler.ServeHTTP(rr, req) - require.Equal(t, http.StatusUnauthorized, rr.Result().StatusCode) -} - -func TestAPIKeyCorrect(t *testing.T) { - req, err := http.NewRequest("POST", "/collect", bytes.NewBuffer([]byte(PAYLOAD))) - req.Header.Set("x-api-key", "foo") - - if err != nil { - t.Fatal(err) - } - - conf := &Config{ - Server: ServerConfig{ - APIKey: "foo", - }, - } - - fr := NewAppAgentReceiverHandler(conf, nil, prometheus.NewRegistry()) - handler := fr.HTTPHandler(nil) - - rr := httptest.NewRecorder() - - handler.ServeHTTP(rr, req) - require.Equal(t, http.StatusAccepted, rr.Result().StatusCode) -} - -func TestRateLimiterNoReject(t *testing.T) { - req, err := http.NewRequest("POST", "/collect", bytes.NewBuffer([]byte(PAYLOAD))) - - if err != nil { - t.Fatal(err) - } - - conf := &Config{ - Server: ServerConfig{ - RateLimiting: RateLimitingConfig{ - Burstiness: 10, - RPS: 10, - Enabled: true, - }, - }, - } - - fr := NewAppAgentReceiverHandler(conf, nil, prometheus.NewRegistry()) - handler := fr.HTTPHandler(nil) - - rr := httptest.NewRecorder() - - handler.ServeHTTP(rr, req) - require.Equal(t, http.StatusAccepted, rr.Result().StatusCode) -} - -func TestRateLimiterReject(t *testing.T) { - conf := &Config{ - Server: ServerConfig{ - RateLimiting: RateLimitingConfig{ - Burstiness: 2, - RPS: 1, - Enabled: true, - }, - }, - } - - fr := NewAppAgentReceiverHandler(conf, nil, prometheus.NewRegistry()) - handler := fr.HTTPHandler(nil) - - makeRequest := func() *httptest.ResponseRecorder { - req, err := http.NewRequest("POST", "/collect", bytes.NewBuffer([]byte(PAYLOAD))) - require.NoError(t, err) - rr := httptest.NewRecorder() - handler.ServeHTTP(rr, req) - return rr - } - - r1 := makeRequest() - r2 := makeRequest() - r3 := makeRequest() - - require.Equal(t, http.StatusAccepted, r1.Result().StatusCode) - require.Equal(t, http.StatusAccepted, r2.Result().StatusCode) - require.Equal(t, http.StatusTooManyRequests, r3.Result().StatusCode) -} - -func TestRateLimiterDisabled(t *testing.T) { - req, err := http.NewRequest("POST", "/collect", bytes.NewBuffer([]byte(PAYLOAD))) - - if err != nil { - t.Fatal(err) - } - - conf := &Config{ - Server: ServerConfig{ - RateLimiting: RateLimitingConfig{ - Burstiness: 0, - RPS: 0, - Enabled: false, - }, - }, - } - - fr := NewAppAgentReceiverHandler(conf, nil, prometheus.NewRegistry()) - handler := fr.HTTPHandler(nil) - - rr := httptest.NewRecorder() - - handler.ServeHTTP(rr, req) - require.Equal(t, http.StatusAccepted, rr.Result().StatusCode) -} diff --git a/internal/static/integrations/v2/app_agent_receiver/logs_exporter.go b/internal/static/integrations/v2/app_agent_receiver/logs_exporter.go deleted file mode 100644 index 31295a5060..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/logs_exporter.go +++ /dev/null @@ -1,140 +0,0 @@ -package app_agent_receiver - -import ( - "context" - "fmt" - "time" - - kitlog "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/go-logfmt/logfmt" - "github.com/grafana/agent/internal/static/logs" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/pkg/logproto" - prommodel "github.com/prometheus/common/model" -) - -// logsInstance is an interface with capability to send log entries -type logsInstance interface { - SendEntry(entry api.Entry, dur time.Duration) bool -} - -// logsInstanceGetter is a function that returns a LogsInstance to send log entries to -type logsInstanceGetter func() (logsInstance, error) - -// LogsExporterConfig holds the configuration of the logs exporter -type LogsExporterConfig struct { - SendEntryTimeout time.Duration - GetLogsInstance logsInstanceGetter - Labels map[string]string -} - -// LogsExporter will send logs & errors to loki -type LogsExporter struct { - getLogsInstance logsInstanceGetter - sendEntryTimeout time.Duration - logger kitlog.Logger - labels map[string]string - sourceMapStore SourceMapStore -} - -// NewLogsExporter creates a new logs exporter with the given -// configuration -func NewLogsExporter(logger kitlog.Logger, conf LogsExporterConfig, sourceMapStore SourceMapStore) AppAgentReceiverExporter { - return &LogsExporter{ - logger: logger, - getLogsInstance: conf.GetLogsInstance, - sendEntryTimeout: conf.SendEntryTimeout, - labels: conf.Labels, - sourceMapStore: sourceMapStore, - } -} - -// Name of the exporter, for logging purposes -func (le *LogsExporter) Name() string { - return "logs exporter" -} - -// Export implements the AppDataExporter interface -func (le *LogsExporter) Export(ctx context.Context, payload Payload) error { - meta := payload.Meta.KeyVal() - - var err error - - // log events - for _, logItem := range payload.Logs { - kv := logItem.KeyVal() - MergeKeyVal(kv, meta) - err = le.sendKeyValsToLogsPipeline(kv) - } - - // exceptions - for _, exception := range payload.Exceptions { - transformedException := TransformException(le.sourceMapStore, le.logger, &exception, payload.Meta.App.Release) - kv := transformedException.KeyVal() - MergeKeyVal(kv, meta) - err = le.sendKeyValsToLogsPipeline(kv) - } - - // measurements - for _, measurement := range payload.Measurements { - kv := measurement.KeyVal() - MergeKeyVal(kv, meta) - err = le.sendKeyValsToLogsPipeline(kv) - } - - // events - for _, event := range payload.Events { - kv := event.KeyVal() - MergeKeyVal(kv, meta) - err = le.sendKeyValsToLogsPipeline(kv) - } - - return err -} - -func (le *LogsExporter) sendKeyValsToLogsPipeline(kv *KeyVal) error { - line, err := logfmt.MarshalKeyvals(KeyValToInterfaceSlice(kv)...) - if err != nil { - level.Error(le.logger).Log("msg", "failed to logfmt a frontend log event", "err", err) - return err - } - instance, err := le.getLogsInstance() - if err != nil { - return err - } - sent := instance.SendEntry(api.Entry{ - Labels: le.labelSet(kv), - Entry: logproto.Entry{ - Timestamp: time.Now(), - Line: string(line), - }, - }, le.sendEntryTimeout) - if !sent { - level.Warn(le.logger).Log("msg", "failed to log frontend log event to logs pipeline") - return fmt.Errorf("failed to send app event to logs pipeline") - } - return nil -} - -func (le *LogsExporter) labelSet(kv *KeyVal) prommodel.LabelSet { - set := make(prommodel.LabelSet, len(le.labels)) - - for k, v := range le.labels { - if len(v) > 0 { - set[prommodel.LabelName(k)] = prommodel.LabelValue(v) - } else { - if val, ok := kv.Get(k); ok { - set[prommodel.LabelName(k)] = prommodel.LabelValue(fmt.Sprint(val)) - } - } - } - - return set -} - -// Static typecheck tests -var ( - _ AppAgentReceiverExporter = (*LogsExporter)(nil) - _ logsInstance = (*logs.Instance)(nil) -) diff --git a/internal/static/integrations/v2/app_agent_receiver/logs_exporter_test.go b/internal/static/integrations/v2/app_agent_receiver/logs_exporter_test.go deleted file mode 100644 index 784e2c85bf..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/logs_exporter_test.go +++ /dev/null @@ -1,120 +0,0 @@ -package app_agent_receiver - -import ( - "context" - "encoding/json" - "os" - "testing" - "time" - - kitlog "github.com/go-kit/log" - "github.com/grafana/loki/clients/pkg/promtail/api" - prommodel "github.com/prometheus/common/model" - - "github.com/stretchr/testify/require" -) - -func loadTestPayload(t *testing.T) Payload { - t.Helper() - // Safe to disable, this is a test. - // nolint:gosec - content, err := os.ReadFile("./testdata/payload.json") - require.NoError(t, err, "expected to be able to read file") - require.True(t, len(content) > 0) - var payload Payload - err = json.Unmarshal(content, &payload) - require.NoError(t, err) - return payload -} - -type testLogsInstance struct { - Entries []api.Entry -} - -func (i *testLogsInstance) SendEntry(entry api.Entry, dur time.Duration) bool { - i.Entries = append(i.Entries, entry) - return true -} - -type MockSourceMapStore struct{} - -func (store *MockSourceMapStore) GetSourceMap(sourceURL string, release string) (*SourceMap, error) { - return nil, nil -} - -func TestExportLogs(t *testing.T) { - ctx := context.Background() - inst := &testLogsInstance{ - Entries: []api.Entry{}, - } - - logger := kitlog.NewNopLogger() - - logsExporter := NewLogsExporter( - logger, - LogsExporterConfig{ - GetLogsInstance: func() (logsInstance, error) { return inst, nil }, - Labels: map[string]string{ - "app": "frontend", - "kind": "", - }, - SendEntryTimeout: 100, - }, - &MockSourceMapStore{}, - ) - - payload := loadTestPayload(t) - - err := logsExporter.Export(ctx, payload) - require.NoError(t, err) - - require.Len(t, inst.Entries, 6) - - // log1 - require.Equal(t, prommodel.LabelSet{ - prommodel.LabelName("app"): prommodel.LabelValue("frontend"), - prommodel.LabelName("kind"): prommodel.LabelValue("log"), - }, inst.Entries[0].Labels) - expectedLine := "timestamp=\"2021-09-30 10:46:17.68 +0000 UTC\" kind=log message=\"opened pricing page\" level=info context_component=AppRoot context_page=Pricing traceID=abcd spanID=def sdk_name=grafana-frontend-agent sdk_version=1.0.0 app_name=testapp app_release=0.8.2 app_version=abcdefg app_environment=production user_email=geralt@kaermorhen.org user_id=123 user_username=domasx2 user_attr_foo=bar session_id=abcd session_attr_time_elapsed=100s page_url=https://example.com/page browser_name=chrome browser_version=88.12.1 browser_os=linux browser_mobile=false view_name=foobar" - require.Equal(t, expectedLine, inst.Entries[0].Line) - - // log2 - require.Equal(t, prommodel.LabelSet{ - prommodel.LabelName("app"): prommodel.LabelValue("frontend"), - prommodel.LabelName("kind"): prommodel.LabelValue("log"), - }, inst.Entries[1].Labels) - expectedLine = "timestamp=\"2021-09-30 10:46:17.68 +0000 UTC\" kind=log message=\"loading price list\" level=trace context_component=AppRoot context_page=Pricing traceID=abcd spanID=ghj sdk_name=grafana-frontend-agent sdk_version=1.0.0 app_name=testapp app_release=0.8.2 app_version=abcdefg app_environment=production user_email=geralt@kaermorhen.org user_id=123 user_username=domasx2 user_attr_foo=bar session_id=abcd session_attr_time_elapsed=100s page_url=https://example.com/page browser_name=chrome browser_version=88.12.1 browser_os=linux browser_mobile=false view_name=foobar" - require.Equal(t, expectedLine, inst.Entries[1].Line) - - // exception - require.Equal(t, prommodel.LabelSet{ - prommodel.LabelName("app"): prommodel.LabelValue("frontend"), - prommodel.LabelName("kind"): prommodel.LabelValue("exception"), - }, inst.Entries[2].Labels) - expectedLine = "timestamp=\"2021-09-30 10:46:17.68 +0000 UTC\" kind=exception type=Error value=\"Cannot read property 'find' of undefined\" stacktrace=\"Error: Cannot read property 'find' of undefined\\n at ? (http://fe:3002/static/js/vendors~main.chunk.js:8639:42)\\n at dispatchAction (http://fe:3002/static/js/vendors~main.chunk.js:268095:9)\\n at scheduleUpdateOnFiber (http://fe:3002/static/js/vendors~main.chunk.js:273726:13)\\n at flushSyncCallbackQueue (http://fe:3002/static/js/vendors~main.chunk.js:263362:7)\\n at flushSyncCallbackQueueImpl (http://fe:3002/static/js/vendors~main.chunk.js:263374:13)\\n at runWithPriority$1 (http://fe:3002/static/js/vendors~main.chunk.js:263325:14)\\n at unstable_runWithPriority (http://fe:3002/static/js/vendors~main.chunk.js:291265:16)\\n at ? (http://fe:3002/static/js/vendors~main.chunk.js:263379:30)\\n at performSyncWorkOnRoot (http://fe:3002/static/js/vendors~main.chunk.js:274126:22)\\n at renderRootSync (http://fe:3002/static/js/vendors~main.chunk.js:274509:11)\\n at workLoopSync (http://fe:3002/static/js/vendors~main.chunk.js:274543:9)\\n at performUnitOfWork (http://fe:3002/static/js/vendors~main.chunk.js:274606:16)\\n at beginWork$1 (http://fe:3002/static/js/vendors~main.chunk.js:275746:18)\\n at beginWork (http://fe:3002/static/js/vendors~main.chunk.js:270944:20)\\n at updateFunctionComponent (http://fe:3002/static/js/vendors~main.chunk.js:269291:24)\\n at renderWithHooks (http://fe:3002/static/js/vendors~main.chunk.js:266969:22)\\n at ? (http://fe:3002/static/js/main.chunk.js:2600:74)\\n at useGetBooksQuery (http://fe:3002/static/js/main.chunk.js:1299:65)\\n at Module.useQuery (http://fe:3002/static/js/vendors~main.chunk.js:8495:85)\\n at useBaseQuery (http://fe:3002/static/js/vendors~main.chunk.js:8656:83)\\n at useDeepMemo (http://fe:3002/static/js/vendors~main.chunk.js:8696:14)\\n at ? (http://fe:3002/static/js/vendors~main.chunk.js:8657:55)\\n at QueryData.execute (http://fe:3002/static/js/vendors~main.chunk.js:7883:47)\\n at QueryData.getExecuteResult (http://fe:3002/static/js/vendors~main.chunk.js:7944:23)\\n at QueryData._this.getQueryResult (http://fe:3002/static/js/vendors~main.chunk.js:7790:19)\\n at new ApolloError (http://fe:3002/static/js/vendors~main.chunk.js:5164:24)\" hash=2735541995122471342 sdk_name=grafana-frontend-agent sdk_version=1.0.0 app_name=testapp app_release=0.8.2 app_version=abcdefg app_environment=production user_email=geralt@kaermorhen.org user_id=123 user_username=domasx2 user_attr_foo=bar session_id=abcd session_attr_time_elapsed=100s page_url=https://example.com/page browser_name=chrome browser_version=88.12.1 browser_os=linux browser_mobile=false view_name=foobar" - require.Equal(t, expectedLine, inst.Entries[2].Line) - - // measurement - require.Equal(t, prommodel.LabelSet{ - prommodel.LabelName("app"): prommodel.LabelValue("frontend"), - prommodel.LabelName("kind"): prommodel.LabelValue("measurement"), - }, inst.Entries[3].Labels) - expectedLine = "timestamp=\"2021-09-30 10:46:17.68 +0000 UTC\" kind=measurement type=foobar ttfb=14.000000 ttfcp=22.120000 ttfp=20.120000 traceID=abcd spanID=def context_hello=world sdk_name=grafana-frontend-agent sdk_version=1.0.0 app_name=testapp app_release=0.8.2 app_version=abcdefg app_environment=production user_email=geralt@kaermorhen.org user_id=123 user_username=domasx2 user_attr_foo=bar session_id=abcd session_attr_time_elapsed=100s page_url=https://example.com/page browser_name=chrome browser_version=88.12.1 browser_os=linux browser_mobile=false view_name=foobar" - require.Equal(t, expectedLine, inst.Entries[3].Line) - - // event 1 - require.Equal(t, prommodel.LabelSet{ - prommodel.LabelName("app"): prommodel.LabelValue("frontend"), - prommodel.LabelName("kind"): prommodel.LabelValue("event"), - }, inst.Entries[4].Labels) - expectedLine = "timestamp=\"2021-09-30 10:46:17.68 +0000 UTC\" kind=event event_name=click_login_button event_domain=frontend event_data_foo=bar event_data_one=two traceID=abcd spanID=def sdk_name=grafana-frontend-agent sdk_version=1.0.0 app_name=testapp app_release=0.8.2 app_version=abcdefg app_environment=production user_email=geralt@kaermorhen.org user_id=123 user_username=domasx2 user_attr_foo=bar session_id=abcd session_attr_time_elapsed=100s page_url=https://example.com/page browser_name=chrome browser_version=88.12.1 browser_os=linux browser_mobile=false view_name=foobar" - require.Equal(t, expectedLine, inst.Entries[4].Line) - - // event 2 - require.Equal(t, prommodel.LabelSet{ - prommodel.LabelName("app"): prommodel.LabelValue("frontend"), - prommodel.LabelName("kind"): prommodel.LabelValue("event"), - }, inst.Entries[5].Labels) - expectedLine = "timestamp=\"2021-09-30 10:46:17.68 +0000 UTC\" kind=event event_name=click_reset_password_button sdk_name=grafana-frontend-agent sdk_version=1.0.0 app_name=testapp app_release=0.8.2 app_version=abcdefg app_environment=production user_email=geralt@kaermorhen.org user_id=123 user_username=domasx2 user_attr_foo=bar session_id=abcd session_attr_time_elapsed=100s page_url=https://example.com/page browser_name=chrome browser_version=88.12.1 browser_os=linux browser_mobile=false view_name=foobar" - require.Equal(t, expectedLine, inst.Entries[5].Line) -} diff --git a/internal/static/integrations/v2/app_agent_receiver/payload.go b/internal/static/integrations/v2/app_agent_receiver/payload.go deleted file mode 100644 index ca91a8842d..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/payload.go +++ /dev/null @@ -1,420 +0,0 @@ -package app_agent_receiver - -import ( - "fmt" - "sort" - "strconv" - "strings" - "time" - - "go.opentelemetry.io/collector/pdata/pcommon" - "go.opentelemetry.io/collector/pdata/ptrace" - - "github.com/zeebo/xxh3" -) - -// Payload is the body of the receiver request -type Payload struct { - Exceptions []Exception `json:"exceptions,omitempty"` - Logs []Log `json:"logs,omitempty"` - Measurements []Measurement `json:"measurements,omitempty"` - Events []Event `json:"events,omitempty"` - Meta Meta `json:"meta,omitempty"` - Traces *Traces `json:"traces,omitempty"` -} - -// Frame struct represents a single stacktrace frame -type Frame struct { - Function string `json:"function,omitempty"` - Module string `json:"module,omitempty"` - Filename string `json:"filename,omitempty"` - Lineno int `json:"lineno,omitempty"` - Colno int `json:"colno,omitempty"` -} - -// String function converts a Frame into a human readable string -func (frame Frame) String() string { - module := "" - if len(frame.Module) > 0 { - module = frame.Module + "|" - } - return fmt.Sprintf("\n at %s (%s%s:%v:%v)", frame.Function, module, frame.Filename, frame.Lineno, frame.Colno) -} - -// Stacktrace is a collection of Frames -type Stacktrace struct { - Frames []Frame `json:"frames,omitempty"` -} - -// Exception struct controls all the data regarding an exception -type Exception struct { - Type string `json:"type,omitempty"` - Value string `json:"value,omitempty"` - Stacktrace *Stacktrace `json:"stacktrace,omitempty"` - Timestamp time.Time `json:"timestamp"` - Trace TraceContext `json:"trace,omitempty"` - Context ExceptionContext `json:"context,omitempty"` -} - -// Message string is concatenating of the Exception.Type and Exception.Value -func (e Exception) Message() string { - return fmt.Sprintf("%s: %s", e.Type, e.Value) -} - -// String is the string representation of an Exception -func (e Exception) String() string { - var stacktrace = e.Message() - if e.Stacktrace != nil { - for _, frame := range e.Stacktrace.Frames { - stacktrace += frame.String() - } - } - return stacktrace -} - -// KeyVal representation of the exception object -func (e Exception) KeyVal() *KeyVal { - kv := NewKeyVal() - KeyValAdd(kv, "timestamp", e.Timestamp.String()) - KeyValAdd(kv, "kind", "exception") - KeyValAdd(kv, "type", e.Type) - KeyValAdd(kv, "value", e.Value) - KeyValAdd(kv, "stacktrace", e.String()) - KeyValAdd(kv, "hash", strconv.FormatUint(xxh3.HashString(e.Value), 10)) - MergeKeyValWithPrefix(kv, KeyValFromMap(e.Context), "context_") - MergeKeyVal(kv, e.Trace.KeyVal()) - return kv -} - -// ExceptionContext is a string to string map structure that -// represents the context of an exception -type ExceptionContext map[string]string - -// TraceContext holds trace id and span id associated to an entity (log, exception, measurement...). -type TraceContext struct { - TraceID string `json:"trace_id"` - SpanID string `json:"span_id"` -} - -// KeyVal representation of the trace context object. -func (tc TraceContext) KeyVal() *KeyVal { - retv := NewKeyVal() - KeyValAdd(retv, "traceID", tc.TraceID) - KeyValAdd(retv, "spanID", tc.SpanID) - return retv -} - -// Traces wraps the otel traces model. -type Traces struct { - ptrace.Traces -} - -// UnmarshalJSON unmarshals Traces model. -func (t *Traces) UnmarshalJSON(b []byte) error { - unmarshaler := &ptrace.JSONUnmarshaler{} - td, err := unmarshaler.UnmarshalTraces(b) - if err != nil { - return err - } - *t = Traces{td} - return nil -} - -// MarshalJSON marshals Traces model to json. -func (t Traces) MarshalJSON() ([]byte, error) { - marshaler := &ptrace.JSONMarshaler{} - return marshaler.MarshalTraces(t.Traces) -} - -// SpanSlice unpacks Traces entity into a slice of Spans. -func (t Traces) SpanSlice() []ptrace.Span { - spans := make([]ptrace.Span, 0) - rss := t.ResourceSpans() - for i := 0; i < rss.Len(); i++ { - rs := rss.At(i) - ilss := rs.ScopeSpans() - for j := 0; j < ilss.Len(); j++ { - s := ilss.At(j).Spans() - for si := 0; si < s.Len(); si++ { - spans = append(spans, s.At(si)) - } - } - } - return spans -} - -// SpanToKeyVal returns KeyVal representation of a Span. -func SpanToKeyVal(s ptrace.Span) *KeyVal { - kv := NewKeyVal() - if s.StartTimestamp() > 0 { - KeyValAdd(kv, "timestamp", s.StartTimestamp().AsTime().String()) - } - if s.EndTimestamp() > 0 { - KeyValAdd(kv, "end_timestamp", s.StartTimestamp().AsTime().String()) - } - KeyValAdd(kv, "kind", "span") - KeyValAdd(kv, "traceID", s.TraceID().String()) - KeyValAdd(kv, "spanID", s.SpanID().String()) - KeyValAdd(kv, "span_kind", s.Kind().String()) - KeyValAdd(kv, "name", s.Name()) - KeyValAdd(kv, "parent_spanID", s.ParentSpanID().String()) - s.Attributes().Range(func(k string, v pcommon.Value) bool { - KeyValAdd(kv, "attr_"+k, fmt.Sprintf("%v", v)) - return true - }) - - return kv -} - -// LogLevel is log level enum for incoming app logs -type LogLevel string - -const ( - // LogLevelTrace is "trace" - LogLevelTrace LogLevel = "trace" - // LogLevelDebug is "debug" - LogLevelDebug LogLevel = "debug" - // LogLevelInfo is "info" - LogLevelInfo LogLevel = "info" - // LogLevelWarning is "warning" - LogLevelWarning LogLevel = "warning" - // LogLevelError is "error" - LogLevelError LogLevel = "error" -) - -// LogContext is a string to string map structure that -// represents the context of a log message -type LogContext map[string]string - -// Log struct controls the data that come into a Log message -type Log struct { - Message string `json:"message,omitempty"` - LogLevel LogLevel `json:"level,omitempty"` - Context LogContext `json:"context,omitempty"` - Timestamp time.Time `json:"timestamp"` - Trace TraceContext `json:"trace,omitempty"` -} - -// KeyVal representation of a Log object -func (l Log) KeyVal() *KeyVal { - kv := NewKeyVal() - KeyValAdd(kv, "timestamp", l.Timestamp.String()) - KeyValAdd(kv, "kind", "log") - KeyValAdd(kv, "message", l.Message) - KeyValAdd(kv, "level", string(l.LogLevel)) - MergeKeyValWithPrefix(kv, KeyValFromMap(l.Context), "context_") - MergeKeyVal(kv, l.Trace.KeyVal()) - return kv -} - -// MeasurementContext is a string to string map structure that -// represents the context of a log message -type MeasurementContext map[string]string - -// Measurement holds the data for user provided measurements -type Measurement struct { - Type string `json:"type,omitempty"` - Values map[string]float64 `json:"values,omitempty"` - Timestamp time.Time `json:"timestamp,omitempty"` - Trace TraceContext `json:"trace,omitempty"` - Context MeasurementContext `json:"context,omitempty"` -} - -// KeyVal representation of the exception object -func (m Measurement) KeyVal() *KeyVal { - kv := NewKeyVal() - - KeyValAdd(kv, "timestamp", m.Timestamp.String()) - KeyValAdd(kv, "kind", "measurement") - KeyValAdd(kv, "type", m.Type) - - keys := make([]string, 0, len(m.Values)) - for k := range m.Values { - keys = append(keys, k) - } - sort.Strings(keys) - for _, k := range keys { - KeyValAdd(kv, k, fmt.Sprintf("%f", m.Values[k])) - } - MergeKeyVal(kv, m.Trace.KeyVal()) - MergeKeyValWithPrefix(kv, KeyValFromMap(m.Context), "context_") - return kv -} - -// SDK holds metadata about the app agent that produced the event -type SDK struct { - Name string `json:"name,omitempty"` - Version string `json:"version,omitempty"` - Integrations []SDKIntegration `json:"integrations,omitempty"` -} - -// KeyVal produces key->value representation of Sdk metadata -func (sdk SDK) KeyVal() *KeyVal { - kv := NewKeyVal() - KeyValAdd(kv, "name", sdk.Name) - KeyValAdd(kv, "version", sdk.Version) - - if len(sdk.Integrations) > 0 { - integrations := make([]string, len(sdk.Integrations)) - - for i, integration := range sdk.Integrations { - integrations[i] = integration.String() - } - - KeyValAdd(kv, "integrations", strings.Join(integrations, ",")) - } - - return kv -} - -// SDKIntegration holds metadata about a plugin/integration on the app agent that collected and sent the event -type SDKIntegration struct { - Name string `json:"name,omitempty"` - Version string `json:"version,omitempty"` -} - -func (i SDKIntegration) String() string { - return fmt.Sprintf("%s:%s", i.Name, i.Version) -} - -// User holds metadata about the user related to an app event -type User struct { - Email string `json:"email,omitempty"` - ID string `json:"id,omitempty"` - Username string `json:"username,omitempty"` - Attributes map[string]string `json:"attributes,omitempty"` -} - -// KeyVal produces a key->value representation User metadata -func (u User) KeyVal() *KeyVal { - kv := NewKeyVal() - KeyValAdd(kv, "email", u.Email) - KeyValAdd(kv, "id", u.ID) - KeyValAdd(kv, "username", u.Username) - MergeKeyValWithPrefix(kv, KeyValFromMap(u.Attributes), "attr_") - return kv -} - -// Meta holds metadata about an app event -type Meta struct { - SDK SDK `json:"sdk,omitempty"` - App App `json:"app,omitempty"` - User User `json:"user,omitempty"` - Session Session `json:"session,omitempty"` - Page Page `json:"page,omitempty"` - Browser Browser `json:"browser,omitempty"` - View View `json:"view,omitempty"` -} - -// KeyVal produces key->value representation of the app event metadata -func (m Meta) KeyVal() *KeyVal { - kv := NewKeyVal() - MergeKeyValWithPrefix(kv, m.SDK.KeyVal(), "sdk_") - MergeKeyValWithPrefix(kv, m.App.KeyVal(), "app_") - MergeKeyValWithPrefix(kv, m.User.KeyVal(), "user_") - MergeKeyValWithPrefix(kv, m.Session.KeyVal(), "session_") - MergeKeyValWithPrefix(kv, m.Page.KeyVal(), "page_") - MergeKeyValWithPrefix(kv, m.Browser.KeyVal(), "browser_") - MergeKeyValWithPrefix(kv, m.View.KeyVal(), "view_") - return kv -} - -// Session holds metadata about the browser session the event originates from -type Session struct { - ID string `json:"id,omitempty"` - Attributes map[string]string `json:"attributes,omitempty"` -} - -// KeyVal produces key->value representation of the Session metadata -func (s Session) KeyVal() *KeyVal { - kv := NewKeyVal() - KeyValAdd(kv, "id", s.ID) - MergeKeyValWithPrefix(kv, KeyValFromMap(s.Attributes), "attr_") - return kv -} - -// Page holds metadata about the web page event originates from -type Page struct { - ID string `json:"id,omitempty"` - URL string `json:"url,omitempty"` - Attributes map[string]string `json:"attributes,omitempty"` -} - -// KeyVal produces key->val representation of Page metadata -func (p Page) KeyVal() *KeyVal { - kv := NewKeyVal() - KeyValAdd(kv, "id", p.ID) - KeyValAdd(kv, "url", p.URL) - MergeKeyValWithPrefix(kv, KeyValFromMap(p.Attributes), "attr_") - return kv -} - -// App holds metadata about the application event originates from -type App struct { - Name string `json:"name,omitempty"` - Release string `json:"release,omitempty"` - Version string `json:"version,omitempty"` - Environment string `json:"environment,omitempty"` -} - -// Event holds RUM event data -type Event struct { - Name string `json:"name"` - Domain string `json:"domain,omitempty"` - Attributes map[string]string `json:"attributes,omitempty"` - Timestamp time.Time `json:"timestamp,omitempty"` - Trace TraceContext `json:"trace,omitempty"` -} - -// KeyVal produces key -> value representation of Event metadata -func (e Event) KeyVal() *KeyVal { - kv := NewKeyVal() - KeyValAdd(kv, "timestamp", e.Timestamp.String()) - KeyValAdd(kv, "kind", "event") - KeyValAdd(kv, "event_name", e.Name) - KeyValAdd(kv, "event_domain", e.Domain) - if e.Attributes != nil { - MergeKeyValWithPrefix(kv, KeyValFromMap(e.Attributes), "event_data_") - } - MergeKeyVal(kv, e.Trace.KeyVal()) - return kv -} - -// KeyVal produces key-> value representation of App metadata -func (a App) KeyVal() *KeyVal { - kv := NewKeyVal() - KeyValAdd(kv, "name", a.Name) - KeyValAdd(kv, "release", a.Release) - KeyValAdd(kv, "version", a.Version) - KeyValAdd(kv, "environment", a.Environment) - return kv -} - -// Browser holds metadata about a client's browser -type Browser struct { - Name string `json:"name,omitempty"` - Version string `json:"version,omitempty"` - OS string `json:"os,omitempty"` - Mobile bool `json:"mobile,omitempty"` -} - -// KeyVal produces key->value representation of the Browser metadata -func (b Browser) KeyVal() *KeyVal { - kv := NewKeyVal() - KeyValAdd(kv, "name", b.Name) - KeyValAdd(kv, "version", b.Version) - KeyValAdd(kv, "os", b.OS) - KeyValAdd(kv, "mobile", fmt.Sprintf("%v", b.Mobile)) - return kv -} - -// View holds metadata about a view -type View struct { - Name string `json:"name,omitempty"` -} - -func (v View) KeyVal() *KeyVal { - kv := NewKeyVal() - KeyValAdd(kv, "name", v.Name) - return kv -} diff --git a/internal/static/integrations/v2/app_agent_receiver/payload_test.go b/internal/static/integrations/v2/app_agent_receiver/payload_test.go deleted file mode 100644 index b66792547a..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/payload_test.go +++ /dev/null @@ -1,142 +0,0 @@ -package app_agent_receiver - -import ( - "encoding/json" - "os" - "path/filepath" - "testing" - "time" - - "github.com/stretchr/testify/require" -) - -func loadTestData(t *testing.T, file string) []byte { - t.Helper() - // Safe to disable, this is a test. - // nolint:gosec - content, err := os.ReadFile(filepath.Join("testdata", file)) - require.NoError(t, err, "expected to be able to read file") - require.True(t, len(content) > 0) - return content -} - -func TestUnmarshalPayloadJSON(t *testing.T) { - content := loadTestData(t, "payload.json") - var payload Payload - err := json.Unmarshal(content, &payload) - require.NoError(t, err) - - now, err := time.Parse("2006-01-02T15:04:05Z0700", "2021-09-30T10:46:17.680Z") - require.NoError(t, err) - - require.Equal(t, Meta{ - SDK: SDK{ - Name: "grafana-frontend-agent", - Version: "1.0.0", - }, - App: App{ - Name: "testapp", - Release: "0.8.2", - Version: "abcdefg", - Environment: "production", - }, - User: User{ - Username: "domasx2", - ID: "123", - Email: "geralt@kaermorhen.org", - Attributes: map[string]string{"foo": "bar"}, - }, - Session: Session{ - ID: "abcd", - Attributes: map[string]string{"time_elapsed": "100s"}, - }, - Page: Page{ - URL: "https://example.com/page", - }, - Browser: Browser{ - Name: "chrome", - Version: "88.12.1", - OS: "linux", - Mobile: false, - }, - View: View{ - Name: "foobar", - }, - }, payload.Meta) - - require.Len(t, payload.Exceptions, 1) - require.Len(t, payload.Exceptions[0].Stacktrace.Frames, 26) - require.Equal(t, "Error", payload.Exceptions[0].Type) - require.Equal(t, "Cannot read property 'find' of undefined", payload.Exceptions[0].Value) - require.EqualValues(t, ExceptionContext{"ReactError": "Annoying Error", "component": "ReactErrorBoundary"}, payload.Exceptions[0].Context) - - require.Equal(t, []Log{ - { - Message: "opened pricing page", - LogLevel: LogLevelInfo, - Context: map[string]string{ - "component": "AppRoot", - "page": "Pricing", - }, - Timestamp: now, - Trace: TraceContext{ - TraceID: "abcd", - SpanID: "def", - }, - }, - { - Message: "loading price list", - LogLevel: LogLevelTrace, - Context: map[string]string{ - "component": "AppRoot", - "page": "Pricing", - }, - Timestamp: now, - Trace: TraceContext{ - TraceID: "abcd", - SpanID: "ghj", - }, - }, - }, payload.Logs) - - require.Equal(t, []Event{ - { - Name: "click_login_button", - Domain: "frontend", - Timestamp: now, - Attributes: map[string]string{ - "foo": "bar", - "one": "two", - }, - Trace: TraceContext{ - TraceID: "abcd", - SpanID: "def", - }, - }, - { - Name: "click_reset_password_button", - Timestamp: now, - }, - }, payload.Events) - - require.Len(t, payload.Measurements, 1) - - require.Equal(t, []Measurement{ - { - Type: "foobar", - Values: map[string]float64{ - "ttfp": 20.12, - "ttfcp": 22.12, - "ttfb": 14, - }, - Timestamp: now, - Trace: TraceContext{ - TraceID: "abcd", - SpanID: "def", - }, - Context: MeasurementContext{ - "hello": "world", - }, - }, - }, payload.Measurements) -} diff --git a/internal/static/integrations/v2/app_agent_receiver/receiver_metrics_exporter.go b/internal/static/integrations/v2/app_agent_receiver/receiver_metrics_exporter.go deleted file mode 100644 index ea74c97fdf..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/receiver_metrics_exporter.go +++ /dev/null @@ -1,61 +0,0 @@ -package app_agent_receiver - -import ( - "context" - - "github.com/prometheus/client_golang/prometheus" -) - -// ReceiverMetricsExporter is an app agent receiver exporter that will capture metrics -// about counts of logs, exceptions, measurements, traces being ingested -type ReceiverMetricsExporter struct { - totalLogs prometheus.Counter - totalMeasurements prometheus.Counter - totalExceptions prometheus.Counter - totalEvents prometheus.Counter -} - -// NewReceiverMetricsExporter creates a new ReceiverMetricsExporter -func NewReceiverMetricsExporter(reg prometheus.Registerer) AppAgentReceiverExporter { - exp := &ReceiverMetricsExporter{ - totalLogs: prometheus.NewCounter(prometheus.CounterOpts{ - Name: "app_agent_receiver_logs_total", - Help: "Total number of ingested logs", - }), - totalMeasurements: prometheus.NewCounter(prometheus.CounterOpts{ - Name: "app_agent_receiver_measurements_total", - Help: "Total number of ingested measurements", - }), - totalExceptions: prometheus.NewCounter(prometheus.CounterOpts{ - Name: "app_agent_receiver_exceptions_total", - Help: "Total number of ingested exceptions", - }), - totalEvents: prometheus.NewCounter(prometheus.CounterOpts{ - Name: "app_agent_receiver_events_total", - Help: "Total number of ingested events", - }), - } - - reg.MustRegister(exp.totalLogs, exp.totalExceptions, exp.totalMeasurements, exp.totalEvents) - - return exp -} - -// Name of the exporter, for logging purposes -func (re *ReceiverMetricsExporter) Name() string { - return "receiver metrics exporter" -} - -// Export implements the AppDataExporter interface -func (re *ReceiverMetricsExporter) Export(ctx context.Context, payload Payload) error { - re.totalExceptions.Add(float64(len(payload.Exceptions))) - re.totalLogs.Add(float64(len(payload.Logs))) - re.totalMeasurements.Add(float64(len(payload.Measurements))) - re.totalEvents.Add(float64(len(payload.Events))) - return nil -} - -// Static typecheck tests -var ( - _ AppAgentReceiverExporter = (*ReceiverMetricsExporter)(nil) -) diff --git a/internal/static/integrations/v2/app_agent_receiver/receiver_metrics_test.go b/internal/static/integrations/v2/app_agent_receiver/receiver_metrics_test.go deleted file mode 100644 index 5fde03caad..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/receiver_metrics_test.go +++ /dev/null @@ -1,141 +0,0 @@ -package app_agent_receiver - -import ( - "context" - "fmt" - "testing" - - "github.com/prometheus/client_golang/prometheus" - - "github.com/stretchr/testify/require" -) - -type metricAssertion struct { - name string - value float64 -} - -func testcase(t *testing.T, payload Payload, assertions []metricAssertion) { - ctx := context.Background() - - reg := prometheus.NewRegistry() - - exporter := NewReceiverMetricsExporter(reg) - - err := exporter.Export(ctx, payload) - require.NoError(t, err) - - metrics, err := reg.Gather() - require.NoError(t, err) - - for _, assertion := range assertions { - found := false - for _, metric := range metrics { - if *metric.Name == assertion.name { - found = true - require.Len(t, metric.Metric, 1) - val := metric.Metric[0].Counter.Value - require.Equal(t, assertion.value, *val) - break - } - } - if !found { - require.Fail(t, fmt.Sprintf("metric [%s] not found", assertion.name)) - } - } -} - -func TestReceiverMetricsExport(t *testing.T) { - var payload Payload - payload.Logs = make([]Log, 2) - payload.Measurements = make([]Measurement, 3) - payload.Exceptions = make([]Exception, 4) - payload.Events = make([]Event, 5) - testcase(t, payload, []metricAssertion{ - { - name: "app_agent_receiver_logs_total", - value: 2, - }, - { - name: "app_agent_receiver_measurements_total", - value: 3, - }, - { - name: "app_agent_receiver_exceptions_total", - value: 4, - }, - { - name: "app_agent_receiver_events_total", - value: 5, - }, - }) -} - -func TestReceiverMetricsExportLogsOnly(t *testing.T) { - var payload Payload - payload.Logs = []Log{ - {}, - {}, - } - testcase(t, payload, []metricAssertion{ - { - name: "app_agent_receiver_logs_total", - value: 2, - }, - { - name: "app_agent_receiver_measurements_total", - value: 0, - }, - { - name: "app_agent_receiver_exceptions_total", - value: 0, - }, - }) -} - -func TestReceiverMetricsExportExceptionsOnly(t *testing.T) { - var payload Payload - payload.Exceptions = []Exception{ - {}, - {}, - {}, - {}, - } - testcase(t, payload, []metricAssertion{ - { - name: "app_agent_receiver_logs_total", - value: 0, - }, - { - name: "app_agent_receiver_measurements_total", - value: 0, - }, - { - name: "app_agent_receiver_exceptions_total", - value: 4, - }, - }) -} - -func TestReceiverMetricsExportMeasurementsOnly(t *testing.T) { - var payload Payload - payload.Measurements = []Measurement{ - {}, - {}, - {}, - } - testcase(t, payload, []metricAssertion{ - { - name: "app_agent_receiver_logs_total", - value: 0, - }, - { - name: "app_agent_receiver_measurements_total", - value: 3, - }, - { - name: "app_agent_receiver_exceptions_total", - value: 0, - }, - }) -} diff --git a/internal/static/integrations/v2/app_agent_receiver/sourcemaps.go b/internal/static/integrations/v2/app_agent_receiver/sourcemaps.go deleted file mode 100644 index fe8935dd0b..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/sourcemaps.go +++ /dev/null @@ -1,357 +0,0 @@ -package app_agent_receiver - -import ( - "bytes" - "fmt" - "io" - "io/fs" - "net/http" - "net/url" - "os" - "path/filepath" - "regexp" - "strings" - "sync" - "text/template" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/go-sourcemap/sourcemap" - "github.com/prometheus/client_golang/prometheus" - "github.com/vincent-petithory/dataurl" -) - -// SourceMapStore is interface for a sourcemap service capable of transforming -// minified source locations to original source location -type SourceMapStore interface { - GetSourceMap(sourceURL string, release string) (*SourceMap, error) -} - -type httpClient interface { - Get(url string) (resp *http.Response, err error) -} - -// FileService is interface for a service that can be used to load source maps -// from file system -type fileService interface { - Stat(name string) (fs.FileInfo, error) - ReadFile(name string) ([]byte, error) -} - -type osFileService struct{} - -func (s *osFileService) Stat(name string) (fs.FileInfo, error) { - return os.Stat(name) -} - -func (s *osFileService) ReadFile(name string) ([]byte, error) { - return os.ReadFile(name) -} - -var reSourceMap = "//[#@]\\s(source(?:Mapping)?URL)=\\s*(?P\\S+)\r?\n?$" - -// SourceMap is a wrapper for go-sourcemap consumer -type SourceMap struct { - consumer *sourcemap.Consumer -} - -type sourceMapMetrics struct { - cacheSize *prometheus.CounterVec - downloads *prometheus.CounterVec - fileReads *prometheus.CounterVec -} - -type sourcemapFileLocation struct { - SourceMapFileLocation - pathTemplate *template.Template -} - -// RealSourceMapStore is an implementation of SourceMapStore -// that can download source maps or read them from file system -type RealSourceMapStore struct { - sync.Mutex - l log.Logger - httpClient httpClient - fileService fileService - config SourceMapConfig - cache map[string]*SourceMap - fileLocations []*sourcemapFileLocation - metrics *sourceMapMetrics -} - -// NewSourceMapStore creates an instance of SourceMapStore. -// httpClient and fileService will be instantiated to defaults if nil is provided -func NewSourceMapStore(l log.Logger, config SourceMapConfig, reg prometheus.Registerer, httpClient httpClient, fileService fileService) SourceMapStore { - if httpClient == nil { - httpClient = &http.Client{ - Timeout: config.DownloadTimeout, - } - } - - if fileService == nil { - fileService = &osFileService{} - } - - metrics := &sourceMapMetrics{ - cacheSize: prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "app_agent_receiver_sourcemap_cache_size", - Help: "number of items in source map cache, per origin", - }, []string{"origin"}), - downloads: prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "app_agent_receiver_sourcemap_downloads_total", - Help: "downloads by the source map service", - }, []string{"origin", "http_status"}), - fileReads: prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "app_agent_receiver_sourcemap_file_reads_total", - Help: "source map file reads from file system, by origin and status", - }, []string{"origin", "status"}), - } - reg.MustRegister(metrics.cacheSize, metrics.downloads, metrics.fileReads) - - fileLocations := []*sourcemapFileLocation{} - - for _, configLocation := range config.FileSystem { - tpl, err := template.New(configLocation.Path).Parse(configLocation.Path) - if err != nil { - panic(err) - } - - fileLocations = append(fileLocations, &sourcemapFileLocation{ - SourceMapFileLocation: configLocation, - pathTemplate: tpl, - }) - } - - return &RealSourceMapStore{ - l: l, - httpClient: httpClient, - fileService: fileService, - config: config, - cache: make(map[string]*SourceMap), - metrics: metrics, - fileLocations: fileLocations, - } -} - -func (store *RealSourceMapStore) downloadFileContents(url string) ([]byte, error) { - resp, err := store.httpClient.Get(url) - if err != nil { - store.metrics.downloads.WithLabelValues(getOrigin(url), "?").Inc() - return nil, err - } - defer resp.Body.Close() - store.metrics.downloads.WithLabelValues(getOrigin(url), fmt.Sprint(resp.StatusCode)).Inc() - if resp.StatusCode != 200 { - return nil, fmt.Errorf("unexpected status %v", resp.StatusCode) - } - body, err := io.ReadAll(resp.Body) - if err != nil { - return nil, err - } - return body, nil -} - -func (store *RealSourceMapStore) downloadSourceMapContent(sourceURL string) (content []byte, resolvedSourceMapURL string, err error) { - level.Debug(store.l).Log("msg", "attempting to download source file", "url", sourceURL) - - result, err := store.downloadFileContents(sourceURL) - if err != nil { - level.Debug(store.l).Log("msg", "failed to download source file", "url", sourceURL, "err", err) - return nil, "", err - } - r := regexp.MustCompile(reSourceMap) - match := r.FindAllStringSubmatch(string(result), -1) - if len(match) == 0 { - level.Debug(store.l).Log("msg", "no source map url found in source", "url", sourceURL) - return nil, "", nil - } - sourceMapURL := match[len(match)-1][2] - - // inline sourcemap - if strings.HasPrefix(sourceMapURL, "data:") { - dataURL, err := dataurl.DecodeString(sourceMapURL) - if err != nil { - level.Debug(store.l).Log("msg", "failed to parse inline source map data url", "url", sourceURL, "err", err) - return nil, "", err - } - - level.Info(store.l).Log("msg", "successfully parsed inline source map data url", "url", sourceURL) - return dataURL.Data, sourceURL + ".map", nil - } - // remote sourcemap - resolvedSourceMapURL = sourceMapURL - - // if url is relative, attempt to resolve absolute - if !strings.HasPrefix(resolvedSourceMapURL, "http") { - base, err := url.Parse(sourceURL) - if err != nil { - level.Debug(store.l).Log("msg", "failed to parse source url", "url", sourceURL, "err", err) - return nil, "", err - } - relative, err := url.Parse(sourceMapURL) - if err != nil { - level.Debug(store.l).Log("msg", "failed to parse source map url", "url", sourceURL, "sourceMapURL", sourceMapURL, "err", err) - return nil, "", err - } - resolvedSourceMapURL = base.ResolveReference(relative).String() - level.Debug(store.l).Log("msg", "resolved absolute source map url", "url", sourceURL, "sourceMapURL", resolvedSourceMapURL) - } - level.Debug(store.l).Log("msg", "attempting to download source map file", "url", resolvedSourceMapURL) - result, err = store.downloadFileContents(resolvedSourceMapURL) - if err != nil { - level.Debug(store.l).Log("failed to download source map file", "url", resolvedSourceMapURL, "err", err) - return nil, "", err - } - return result, resolvedSourceMapURL, nil -} - -func (store *RealSourceMapStore) getSourceMapFromFileSystem(sourceURL string, release string, fileconf *sourcemapFileLocation) (content []byte, sourceMapURL string, err error) { - if len(sourceURL) == 0 || !strings.HasPrefix(sourceURL, fileconf.MinifiedPathPrefix) || strings.HasSuffix(sourceURL, "/") { - return nil, "", nil - } - - var rootPath bytes.Buffer - - err = fileconf.pathTemplate.Execute(&rootPath, struct{ Release string }{Release: cleanFilePathPart(release)}) - if err != nil { - return nil, "", err - } - - pathParts := []string{rootPath.String()} - for _, part := range strings.Split(strings.TrimPrefix(strings.Split(sourceURL, "?")[0], fileconf.MinifiedPathPrefix), "/") { - if len(part) > 0 && part != "." && part != ".." { - pathParts = append(pathParts, part) - } - } - mapFilePath := filepath.Join(pathParts...) + ".map" - - if _, err := store.fileService.Stat(mapFilePath); err != nil { - store.metrics.fileReads.WithLabelValues(getOrigin(sourceURL), "not_found").Inc() - level.Debug(store.l).Log("msg", "source map not found on filesystem", "url", sourceURL, "file_path", mapFilePath) - return nil, "", nil - } - level.Debug(store.l).Log("msg", "source map found on filesystem", "url", mapFilePath, "file_path", mapFilePath) - - content, err = store.fileService.ReadFile(mapFilePath) - if err != nil { - store.metrics.fileReads.WithLabelValues(getOrigin(sourceURL), "error").Inc() - } else { - store.metrics.fileReads.WithLabelValues(getOrigin(sourceURL), "ok").Inc() - } - return content, sourceURL, err -} - -func (store *RealSourceMapStore) getSourceMapContent(sourceURL string, release string) (content []byte, sourceMapURL string, err error) { - //attempt to find in fs - for _, fileconf := range store.fileLocations { - content, sourceMapURL, err = store.getSourceMapFromFileSystem(sourceURL, release, fileconf) - if content != nil || err != nil { - return content, sourceMapURL, err - } - } - - //attempt to download - if strings.HasPrefix(sourceURL, "http") && urlMatchesOrigins(sourceURL, store.config.DownloadFromOrigins) { - return store.downloadSourceMapContent(sourceURL) - } - return nil, "", nil -} - -// GetSourceMap returns sourcemap for a given source url -func (store *RealSourceMapStore) GetSourceMap(sourceURL string, release string) (*SourceMap, error) { - store.Lock() - defer store.Unlock() - - cacheKey := fmt.Sprintf("%s__%s", sourceURL, release) - - if smap, ok := store.cache[cacheKey]; ok { - return smap, nil - } - content, sourceMapURL, err := store.getSourceMapContent(sourceURL, release) - if err != nil || content == nil { - store.cache[cacheKey] = nil - return nil, err - } - if content != nil { - consumer, err := sourcemap.Parse(sourceMapURL, content) - if err != nil { - store.cache[cacheKey] = nil - level.Debug(store.l).Log("msg", "failed to parse source map", "url", sourceMapURL, "release", release, "err", err) - return nil, err - } - level.Info(store.l).Log("msg", "successfully parsed source map", "url", sourceMapURL, "release", release) - smap := &SourceMap{ - consumer: consumer, - } - store.cache[cacheKey] = smap - store.metrics.cacheSize.WithLabelValues(getOrigin(sourceURL)).Inc() - return smap, nil - } - return nil, nil -} - -// ResolveSourceLocation resolves minified source location to original source location -func ResolveSourceLocation(store SourceMapStore, frame *Frame, release string) (*Frame, error) { - smap, err := store.GetSourceMap(frame.Filename, release) - if err != nil { - return nil, err - } - if smap == nil { - return nil, nil - } - - file, function, line, col, ok := smap.consumer.Source(frame.Lineno, frame.Colno) - if !ok { - return nil, nil - } - // unfortunately in many cases go-sourcemap fails to determine the original function name. - // not a big issue as long as file, line and column are correct - if len(function) == 0 { - function = "?" - } - return &Frame{ - Filename: file, - Lineno: line, - Colno: col, - Function: function, - }, nil -} - -// TransformException will attempt to resolve all minified source locations in the stacktrace with original source locations -func TransformException(store SourceMapStore, log log.Logger, ex *Exception, release string) *Exception { - if ex.Stacktrace == nil { - return ex - } - frames := []Frame{} - - for _, frame := range ex.Stacktrace.Frames { - mappedFrame, err := ResolveSourceLocation(store, &frame, release) - if err != nil { - level.Error(log).Log("msg", "Error resolving stack trace frame source location", "err", err) - frames = append(frames, frame) - } else if mappedFrame != nil { - frames = append(frames, *mappedFrame) - } else { - frames = append(frames, frame) - } - } - - return &Exception{ - Type: ex.Type, - Value: ex.Value, - Stacktrace: &Stacktrace{Frames: frames}, - Timestamp: ex.Timestamp, - } -} - -func cleanFilePathPart(x string) string { - return strings.TrimLeft(strings.ReplaceAll(strings.ReplaceAll(x, "\\", ""), "/", ""), ".") -} - -func getOrigin(URL string) string { - parsed, err := url.Parse(URL) - if err != nil { - return "?" - } - return fmt.Sprintf("%s://%s", parsed.Scheme, parsed.Host) -} diff --git a/internal/static/integrations/v2/app_agent_receiver/sourcemaps_test.go b/internal/static/integrations/v2/app_agent_receiver/sourcemaps_test.go deleted file mode 100644 index e9f7a5bfd6..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/sourcemaps_test.go +++ /dev/null @@ -1,495 +0,0 @@ -package app_agent_receiver - -import ( - "bytes" - "errors" - "io" - "io/fs" - "net/http" - "path/filepath" - "testing" - - "github.com/go-kit/log" - "github.com/prometheus/client_golang/prometheus" - "github.com/stretchr/testify/require" -) - -type mockHTTPClient struct { - responses []struct { - *http.Response - error - } - requests []string -} - -func (cl *mockHTTPClient) Get(url string) (resp *http.Response, err error) { - if len(cl.responses) > len(cl.requests) { - r := cl.responses[len(cl.requests)] - cl.requests = append(cl.requests, url) - return r.Response, r.error - } - return nil, errors.New("mockHTTPClient got more requests than expected") -} - -type mockFileService struct { - files map[string][]byte - stats []string - reads []string -} - -func (s *mockFileService) Stat(name string) (fs.FileInfo, error) { - s.stats = append(s.stats, name) - _, ok := s.files[name] - if !ok { - return nil, errors.New("file not found") - } - return nil, nil -} - -func (s *mockFileService) ReadFile(name string) ([]byte, error) { - s.reads = append(s.reads, name) - content, ok := s.files[name] - if ok { - return content, nil - } - return nil, errors.New("file not found") -} - -func newResponseFromTestData(t *testing.T, file string) *http.Response { - return &http.Response{ - Body: io.NopCloser(bytes.NewReader(loadTestData(t, file))), - StatusCode: 200, - } -} - -func mockException() *Exception { - return &Exception{ - Stacktrace: &Stacktrace{ - Frames: []Frame{ - { - Colno: 6, - Filename: "http://localhost:1234/foo.js", - Function: "eval", - Lineno: 5, - }, - { - Colno: 5, - Filename: "http://localhost:1234/foo.js", - Function: "callUndefined", - Lineno: 6, - }, - }, - }, - } -} - -func Test_RealSourceMapStore_DownloadSuccess(t *testing.T) { - conf := SourceMapConfig{ - Download: true, - DownloadFromOrigins: []string{"*"}, - } - - httpClient := &mockHTTPClient{ - responses: []struct { - *http.Response - error - }{ - {newResponseFromTestData(t, "foo.js"), nil}, - {newResponseFromTestData(t, "foo.js.map"), nil}, - }, - } - - logger := log.NewNopLogger() - - sourceMapStore := NewSourceMapStore(logger, conf, prometheus.NewRegistry(), httpClient, &mockFileService{}) - - exception := mockException() - - transformed := TransformException(sourceMapStore, logger, exception, "123") - - require.Equal(t, []string{"http://localhost:1234/foo.js", "http://localhost:1234/foo.js.map"}, httpClient.requests) - - expected := &Exception{ - Stacktrace: &Stacktrace{ - Frames: []Frame{ - { - Colno: 37, - Filename: "/__parcel_source_root/demo/src/actions.ts", - Function: "?", - Lineno: 6, - }, - { - Colno: 2, - Filename: "/__parcel_source_root/demo/src/actions.ts", - Function: "?", - Lineno: 7, - }, - }, - }, - } - - require.Equal(t, *expected, *transformed) -} - -func Test_RealSourceMapStore_DownloadError(t *testing.T) { - conf := SourceMapConfig{ - Download: true, - DownloadFromOrigins: []string{"*"}, - } - - resp := &http.Response{ - StatusCode: 500, - Body: io.NopCloser(bytes.NewReader([]byte{})), - } - - httpClient := &mockHTTPClient{ - responses: []struct { - *http.Response - error - }{ - {resp, nil}, - }, - } - - logger := log.NewNopLogger() - - sourceMapStore := NewSourceMapStore(logger, conf, prometheus.NewRegistry(), httpClient, &mockFileService{}) - - exception := mockException() - - transformed := TransformException(sourceMapStore, logger, exception, "123") - - require.Equal(t, []string{"http://localhost:1234/foo.js"}, httpClient.requests) - require.Equal(t, exception, transformed) -} - -func Test_RealSourceMapStore_DownloadHTTPOriginFiltering(t *testing.T) { - conf := SourceMapConfig{ - Download: true, - DownloadFromOrigins: []string{"http://bar.com/"}, - } - - httpClient := &mockHTTPClient{ - responses: []struct { - *http.Response - error - }{ - {newResponseFromTestData(t, "foo.js"), nil}, - {newResponseFromTestData(t, "foo.js.map"), nil}, - }, - } - - logger := log.NewNopLogger() - - sourceMapStore := NewSourceMapStore(logger, conf, prometheus.NewRegistry(), httpClient, &mockFileService{}) - - exception := &Exception{ - Stacktrace: &Stacktrace{ - Frames: []Frame{ - { - Colno: 6, - Filename: "http://foo.com/foo.js", - Function: "eval", - Lineno: 5, - }, - { - Colno: 5, - Filename: "http://bar.com/foo.js", - Function: "callUndefined", - Lineno: 6, - }, - }, - }, - } - - transformed := TransformException(sourceMapStore, logger, exception, "123") - - require.Equal(t, []string{"http://bar.com/foo.js", "http://bar.com/foo.js.map"}, httpClient.requests) - - expected := &Exception{ - Stacktrace: &Stacktrace{ - Frames: []Frame{ - { - Colno: 6, - Filename: "http://foo.com/foo.js", - Function: "eval", - Lineno: 5, - }, - { - Colno: 2, - Filename: "/__parcel_source_root/demo/src/actions.ts", - Function: "?", - Lineno: 7, - }, - }, - }, - } - - require.Equal(t, *expected, *transformed) -} - -func Test_RealSourceMapStore_ReadFromFileSystem(t *testing.T) { - conf := SourceMapConfig{ - Download: false, - FileSystem: []SourceMapFileLocation{ - { - MinifiedPathPrefix: "http://foo.com/", - Path: filepath.FromSlash("/var/build/latest/"), - }, - { - MinifiedPathPrefix: "http://bar.com/", - Path: filepath.FromSlash("/var/build/{{ .Release }}/"), - }, - }, - } - - mapFile := loadTestData(t, "foo.js.map") - - fileService := &mockFileService{ - files: map[string][]byte{ - filepath.FromSlash("/var/build/latest/foo.js.map"): mapFile, - filepath.FromSlash("/var/build/123/foo.js.map"): mapFile, - }, - } - - logger := log.NewNopLogger() - - sourceMapStore := NewSourceMapStore(logger, conf, prometheus.NewRegistry(), &mockHTTPClient{}, fileService) - - exception := &Exception{ - Stacktrace: &Stacktrace{ - Frames: []Frame{ - { - Colno: 6, - Filename: "http://foo.com/foo.js", - Function: "eval", - Lineno: 5, - }, - { - Colno: 6, - Filename: "http://foo.com/bar.js", - Function: "eval", - Lineno: 5, - }, - { - Colno: 5, - Filename: "http://bar.com/foo.js", - Function: "callUndefined", - Lineno: 6, - }, - { - Colno: 5, - Filename: "http://baz.com/foo.js", - Function: "callUndefined", - Lineno: 6, - }, - }, - }, - } - - transformed := TransformException(sourceMapStore, logger, exception, "123") - - require.Equal(t, []string{ - filepath.FromSlash("/var/build/latest/foo.js.map"), - filepath.FromSlash("/var/build/latest/bar.js.map"), - filepath.FromSlash("/var/build/123/foo.js.map"), - }, fileService.stats) - require.Equal(t, []string{ - filepath.FromSlash("/var/build/latest/foo.js.map"), - filepath.FromSlash("/var/build/123/foo.js.map"), - }, fileService.reads) - - expected := &Exception{ - Stacktrace: &Stacktrace{ - Frames: []Frame{ - { - Colno: 37, - Filename: "/__parcel_source_root/demo/src/actions.ts", - Function: "?", - Lineno: 6, - }, - { - Colno: 6, - Filename: "http://foo.com/bar.js", - Function: "eval", - Lineno: 5, - }, - { - Colno: 2, - Filename: "/__parcel_source_root/demo/src/actions.ts", - Function: "?", - Lineno: 7, - }, - { - Colno: 5, - Filename: "http://baz.com/foo.js", - Function: "callUndefined", - Lineno: 6, - }, - }, - }, - } - - require.Equal(t, *expected, *transformed) -} - -func Test_RealSourceMapStore_ReadFromFileSystemAndDownload(t *testing.T) { - conf := SourceMapConfig{ - Download: true, - DownloadFromOrigins: []string{"*"}, - FileSystem: []SourceMapFileLocation{ - { - MinifiedPathPrefix: "http://foo.com/", - Path: filepath.FromSlash("/var/build/latest/"), - }, - }, - } - - mapFile := loadTestData(t, "foo.js.map") - - fileService := &mockFileService{ - files: map[string][]byte{ - filepath.FromSlash("/var/build/latest/foo.js.map"): mapFile, - }, - } - - httpClient := &mockHTTPClient{ - responses: []struct { - *http.Response - error - }{ - {newResponseFromTestData(t, "foo.js"), nil}, - {newResponseFromTestData(t, "foo.js.map"), nil}, - }, - } - - logger := log.NewNopLogger() - - sourceMapStore := NewSourceMapStore(logger, conf, prometheus.NewRegistry(), httpClient, fileService) - - exception := &Exception{ - Stacktrace: &Stacktrace{ - Frames: []Frame{ - { - Colno: 6, - Filename: "http://foo.com/foo.js", - Function: "eval", - Lineno: 5, - }, - { - Colno: 5, - Filename: "http://bar.com/foo.js", - Function: "callUndefined", - Lineno: 6, - }, - }, - }, - } - - transformed := TransformException(sourceMapStore, logger, exception, "123") - - require.Equal(t, []string{filepath.FromSlash("/var/build/latest/foo.js.map")}, fileService.stats) - require.Equal(t, []string{filepath.FromSlash("/var/build/latest/foo.js.map")}, fileService.reads) - require.Equal(t, []string{"http://bar.com/foo.js", "http://bar.com/foo.js.map"}, httpClient.requests) - - expected := &Exception{ - Stacktrace: &Stacktrace{ - Frames: []Frame{ - { - Colno: 37, - Filename: "/__parcel_source_root/demo/src/actions.ts", - Function: "?", - Lineno: 6, - }, - { - Colno: 2, - Filename: "/__parcel_source_root/demo/src/actions.ts", - Function: "?", - Lineno: 7, - }, - }, - }, - } - - require.Equal(t, *expected, *transformed) -} - -func Test_RealSourceMapStore_FilepathSanitized(t *testing.T) { - conf := SourceMapConfig{ - Download: false, - FileSystem: []SourceMapFileLocation{ - { - MinifiedPathPrefix: "http://foo.com/", - Path: filepath.FromSlash("/var/build/latest/"), - }, - }, - } - - fileService := &mockFileService{} - - logger := log.NewNopLogger() - - sourceMapStore := NewSourceMapStore(logger, conf, prometheus.NewRegistry(), &mockHTTPClient{}, fileService) - - exception := &Exception{ - Stacktrace: &Stacktrace{ - Frames: []Frame{ - { - Colno: 6, - Filename: "http://foo.com/../../../etc/passwd", - Function: "eval", - Lineno: 5, - }, - }, - }, - } - - transformed := TransformException(sourceMapStore, logger, exception, "123") - - require.Equal(t, []string{ - filepath.FromSlash("/var/build/latest/etc/passwd.map"), - }, fileService.stats) - require.Len(t, fileService.reads, 0) - - require.Equal(t, *exception, *transformed) -} - -func Test_RealSourceMapStore_FilepathQueryParamsOmitted(t *testing.T) { - conf := SourceMapConfig{ - Download: false, - FileSystem: []SourceMapFileLocation{ - { - MinifiedPathPrefix: "http://foo.com/", - Path: filepath.FromSlash("/var/build/latest/"), - }, - }, - } - - fileService := &mockFileService{} - - logger := log.NewNopLogger() - - sourceMapStore := NewSourceMapStore(logger, conf, prometheus.NewRegistry(), &mockHTTPClient{}, fileService) - - exception := &Exception{ - Stacktrace: &Stacktrace{ - Frames: []Frame{ - { - Colno: 6, - Filename: "http://foo.com/static/foo.js?v=1233", - Function: "eval", - Lineno: 5, - }, - }, - }, - } - - transformed := TransformException(sourceMapStore, logger, exception, "123") - - require.Equal(t, []string{ - filepath.FromSlash("/var/build/latest/static/foo.js.map"), - }, fileService.stats) - require.Len(t, fileService.reads, 0) - - require.Equal(t, *exception, *transformed) -} diff --git a/internal/static/integrations/v2/app_agent_receiver/testdata/foo.js b/internal/static/integrations/v2/app_agent_receiver/testdata/foo.js deleted file mode 100644 index b38652a4ee..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/testdata/foo.js +++ /dev/null @@ -1,39 +0,0 @@ -function throwError() { - throw new Error('This is a thrown error'); -} -function callUndefined() { - // eslint-disable-next-line no-eval - eval('test();'); -} -function callConsole(method) { - // eslint-disable-next-line no-console - console[method](`This is a console ${method} message`); -} -function fetchError() { - fetch('http://localhost:12345', { - method: 'POST' - }); -} -function promiseReject() { - new Promise((_accept, reject)=>{ - reject('This is a rejected promise'); - }); -} -function fetchSuccess() { - fetch('http://localhost:1234'); -} -function sendCustomMetric() { - window.grafanaJavaScriptAgent.api.pushMeasurement({ - type: 'custom', - values: { - my_custom_metric: Math.random() - } - }); -} -window.addEventListener('load', ()=>{ - window.grafanaJavaScriptAgent.api.pushLog([ - 'Manual event from Home' - ]); -}); - -//# sourceMappingURL=foo.js.map diff --git a/internal/static/integrations/v2/app_agent_receiver/testdata/foo.js.map b/internal/static/integrations/v2/app_agent_receiver/testdata/foo.js.map deleted file mode 100644 index 0cd4998974..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/testdata/foo.js.map +++ /dev/null @@ -1 +0,0 @@ -{"mappings":"SAAS,UAAU,GAAG,CAAC;IACrB,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,CAAwB;AAC1C,CAAC;SAEQ,aAAa,GAAG,CAAC;IACxB,EAAmC,AAAnC,iCAAmC;IACnC,IAAI,CAAC,CAAS;AAChB,CAAC;SAEQ,WAAW,CAAC,MAAmD,EAAE,CAAC;IACzE,EAAsC,AAAtC,oCAAsC;IACtC,OAAO,CAAC,MAAM,GAAG,kBAAkB,EAAE,MAAM,CAAC,QAAQ;AACtD,CAAC;SAEQ,UAAU,GAAG,CAAC;IACrB,KAAK,CAAC,CAAwB,yBAAE,CAAC;QAC/B,MAAM,EAAE,CAAM;IAChB,CAAC;AACH,CAAC;SAEQ,aAAa,GAAG,CAAC;IACxB,GAAG,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,GAAK,CAAC;QAChC,MAAM,CAAC,CAA4B;IACrC,CAAC;AACH,CAAC;SAEQ,YAAY,GAAG,CAAC;IACvB,KAAK,CAAC,CAAuB;AAC/B,CAAC;SAEQ,gBAAgB,GAAG,CAAC;IAC1B,MAAM,CAAS,sBAAsB,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;QAC1D,IAAI,EAAE,CAAQ;QACd,MAAM,EAAE,CAAC;YACP,gBAAgB,EAAE,IAAI,CAAC,MAAM;QAC/B,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,CAAC,gBAAgB,CAAC,CAAM,WAAQ,CAAC;IACpC,MAAM,CAAS,sBAAsB,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAAA,CAAwB;IAAA,CAAC;AAC/E,CAAC","sources":["demo/src/actions.ts"],"sourcesContent":["function throwError() {\n throw new Error('This is a thrown error');\n}\n\nfunction callUndefined() {\n // eslint-disable-next-line no-eval\n eval('test();');\n}\n\nfunction callConsole(method: 'trace' | 'info' | 'log' | 'warn' | 'error') {\n // eslint-disable-next-line no-console\n console[method](`This is a console ${method} message`);\n}\n\nfunction fetchError() {\n fetch('http://localhost:12345', {\n method: 'POST',\n });\n}\n\nfunction promiseReject() {\n new Promise((_accept, reject) => {\n reject('This is a rejected promise');\n });\n}\n\nfunction fetchSuccess() {\n fetch('http://localhost:1234');\n}\n\nfunction sendCustomMetric() {\n (window as any).grafanaJavaScriptAgent.api.pushMeasurement({\n type: 'custom',\n values: {\n my_custom_metric: Math.random(),\n },\n });\n}\n\nwindow.addEventListener('load', () => {\n (window as any).grafanaJavaScriptAgent.api.pushLog(['Manual event from Home']);\n});\n"],"names":[],"version":3,"file":"index.28a7d598.js.map","sourceRoot":"/__parcel_source_root/"} \ No newline at end of file diff --git a/internal/static/integrations/v2/app_agent_receiver/testdata/payload.json b/internal/static/integrations/v2/app_agent_receiver/testdata/payload.json deleted file mode 100644 index b6ac7efce0..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/testdata/payload.json +++ /dev/null @@ -1,330 +0,0 @@ -{ - "logs": [ - { - "message": "opened pricing page", - "level": "info", - "context": { - "component": "AppRoot", - "page": "Pricing" - }, - "timestamp": "2021-09-30T10:46:17.680Z", - "trace": { - "trace_id": "abcd", - "span_id": "def" - } - }, - { - "message": "loading price list", - "level": "trace", - "context": { - "component": "AppRoot", - "page": "Pricing" - }, - "timestamp": "2021-09-30T10:46:17.680Z", - "trace": { - "trace_id": "abcd", - "span_id": "ghj" - } - } - ], - "exceptions": [ - { - "type": "Error", - "value": "Cannot read property 'find' of undefined", - "stacktrace": { - "frames": [ - { - "colno": 42, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "?", - "in_app": true, - "lineno": 8639 - }, - { - "colno": 9, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "dispatchAction", - "in_app": true, - "lineno": 268095 - }, - { - "colno": 13, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "scheduleUpdateOnFiber", - "in_app": true, - "lineno": 273726 - }, - { - "colno": 7, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "flushSyncCallbackQueue", - "in_app": true, - "lineno": 263362 - }, - { - "colno": 13, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "flushSyncCallbackQueueImpl", - "in_app": true, - "lineno": 263374 - }, - { - "colno": 14, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "runWithPriority$1", - "lineno": 263325 - }, - { - "colno": 16, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "unstable_runWithPriority", - "lineno": 291265 - }, - { - "colno": 30, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "?", - "lineno": 263379 - }, - { - "colno": 22, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "performSyncWorkOnRoot", - "lineno": 274126 - }, - { - "colno": 11, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "renderRootSync", - "lineno": 274509 - }, - { - "colno": 9, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "workLoopSync", - "lineno": 274543 - }, - { - "colno": 16, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "performUnitOfWork", - "lineno": 274606 - }, - { - "colno": 18, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "beginWork$1", - "in_app": true, - "lineno": 275746 - }, - { - "colno": 20, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "beginWork", - "lineno": 270944 - }, - { - "colno": 24, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "updateFunctionComponent", - "lineno": 269291 - }, - { - "colno": 22, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "renderWithHooks", - "lineno": 266969 - }, - { - "colno": 74, - "filename": "http://fe:3002/static/js/main.chunk.js", - "function": "?", - "in_app": true, - "lineno": 2600 - }, - { - "colno": 65, - "filename": "http://fe:3002/static/js/main.chunk.js", - "function": "useGetBooksQuery", - "lineno": 1299 - }, - { - "colno": 85, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "Module.useQuery", - "lineno": 8495 - }, - { - "colno": 83, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "useBaseQuery", - "in_app": true, - "lineno": 8656 - }, - { - "colno": 14, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "useDeepMemo", - "lineno": 8696 - }, - { - "colno": 55, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "?", - "lineno": 8657 - }, - { - "colno": 47, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "QueryData.execute", - "in_app": true, - "lineno": 7883 - }, - { - "colno": 23, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "QueryData.getExecuteResult", - "lineno": 7944 - }, - { - "colno": 19, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "QueryData._this.getQueryResult", - "lineno": 7790 - }, - { - "colno": 24, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "new ApolloError", - "in_app": true, - "lineno": 5164 - } - ] - }, - "timestamp": "2021-09-30T10:46:17.680Z", - "trace": { - "trace_id": "abcd", - "span_id": "def" - }, - "context": { - "component": "ReactErrorBoundary", - "ReactError": "Annoying Error" - } - } - ], - "measurements": [ - { - "type": "foobar", - "values": { - "ttfp": 20.12, - "ttfcp": 22.12, - "ttfb": 14 - }, - "timestamp": "2021-09-30T10:46:17.680Z", - "trace": { - "trace_id": "abcd", - "span_id": "def" - }, - "context": { - "hello": "world" - } - } - ], - "events": [ - { - "name": "click_login_button", - "domain": "frontend", - "attributes": { - "foo": "bar", - "one": "two" - }, - "timestamp": "2021-09-30T10:46:17.680Z", - "trace": { - "trace_id": "abcd", - "span_id": "def" - } - }, - { - "name": "click_reset_password_button", - "timestamp": "2021-09-30T10:46:17.680Z" - } - ], - "meta": { - "sdk": { - "name": "grafana-frontend-agent", - "version": "1.0.0" - }, - "app": { - "name": "testapp", - "release": "0.8.2", - "version": "abcdefg", - "environment": "production" - }, - "user": { - "username": "domasx2", - "id": "123", - "email": "geralt@kaermorhen.org", - "attributes": { - "foo": "bar" - } - }, - "session": { - "id": "abcd", - "attributes": { - "time_elapsed": "100s" - } - }, - "page": { - "url": "https://example.com/page" - }, - "browser": { - "name": "chrome", - "version": "88.12.1", - "os": "linux", - "mobile": false - }, - "view": { - "name": "foobar" - } - }, - "traces": { - "resourceSpans": [ - { - "resource": { - "attributes": [ - { - "key": "host.name", - "value": { - "stringValue": "testHost" - } - } - ] - }, - "instrumentationLibrarySpans": [ - { - "instrumentationLibrary": { - "name": "name", - "version": "version" - }, - "spans": [ - { - "traceId": "", - "spanId": "", - "parentSpanId": "", - "name": "testSpan", - "status": {} - }, - { - "traceId": "", - "spanId": "", - "parentSpanId": "", - "name": "testSpan2", - "status": {} - } - ] - } - ] - } - ] - } -} diff --git a/internal/static/integrations/v2/app_agent_receiver/testdata/payload_2.json b/internal/static/integrations/v2/app_agent_receiver/testdata/payload_2.json deleted file mode 100644 index eb8b18e565..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/testdata/payload_2.json +++ /dev/null @@ -1,393 +0,0 @@ -{ - "logs": [ - { - "message": "opened pricing page", - "level": "info", - "context": { - "component": "AppRoot", - "page": "Pricing" - }, - "timestamp": "2021-09-30T10:46:17.680Z", - "trace": { - "trace_id": "abcd", - "span_id": "def" - } - }, - { - "message": "loading price list", - "level": "trace", - "context": { - "component": "AppRoot", - "page": "Pricing" - }, - "timestamp": "2021-09-30T10:46:17.680Z", - "trace": { - "trace_id": "abcd", - "span_id": "ghj" - } - } - ], - "exceptions": [ - { - "type": "Error", - "value": "Cannot read property 'find' of undefined", - "stacktrace": { - "frames": [ - { - "colno": 42, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "?", - "in_app": true, - "lineno": 8639 - }, - { - "colno": 9, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "dispatchAction", - "in_app": true, - "lineno": 268095 - }, - { - "colno": 13, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "scheduleUpdateOnFiber", - "in_app": true, - "lineno": 273726 - }, - { - "colno": 7, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "flushSyncCallbackQueue", - "in_app": true, - "lineno": 263362 - }, - { - "colno": 13, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "flushSyncCallbackQueueImpl", - "in_app": true, - "lineno": 263374 - }, - { - "colno": 14, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "runWithPriority$1", - "lineno": 263325 - }, - { - "colno": 16, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "unstable_runWithPriority", - "lineno": 291265 - }, - { - "colno": 30, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "?", - "lineno": 263379 - }, - { - "colno": 22, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "performSyncWorkOnRoot", - "lineno": 274126 - }, - { - "colno": 11, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "renderRootSync", - "lineno": 274509 - }, - { - "colno": 9, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "workLoopSync", - "lineno": 274543 - }, - { - "colno": 16, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "performUnitOfWork", - "lineno": 274606 - }, - { - "colno": 18, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "beginWork$1", - "in_app": true, - "lineno": 275746 - }, - { - "colno": 20, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "beginWork", - "lineno": 270944 - }, - { - "colno": 24, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "updateFunctionComponent", - "lineno": 269291 - }, - { - "colno": 22, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "renderWithHooks", - "lineno": 266969 - }, - { - "colno": 74, - "filename": "http://fe:3002/static/js/main.chunk.js", - "function": "?", - "in_app": true, - "lineno": 2600 - }, - { - "colno": 65, - "filename": "http://fe:3002/static/js/main.chunk.js", - "function": "useGetBooksQuery", - "lineno": 1299 - }, - { - "colno": 85, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "Module.useQuery", - "lineno": 8495 - }, - { - "colno": 83, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "useBaseQuery", - "in_app": true, - "lineno": 8656 - }, - { - "colno": 14, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "useDeepMemo", - "lineno": 8696 - }, - { - "colno": 55, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "?", - "lineno": 8657 - }, - { - "colno": 47, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "QueryData.execute", - "in_app": true, - "lineno": 7883 - }, - { - "colno": 23, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "QueryData.getExecuteResult", - "lineno": 7944 - }, - { - "colno": 19, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "QueryData._this.getQueryResult", - "lineno": 7790 - }, - { - "colno": 24, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "new ApolloError", - "in_app": true, - "lineno": 5164 - } - ] - }, - "timestamp": "2021-09-30T10:46:17.680Z", - "trace": { - "trace_id": "abcd", - "span_id": "def" - } - } - ], - "measurements": [ - { - "values": { - "ttfp": 20.12, - "ttfcp": 22.12, - "ttfb": 14 - }, - "type": "page load", - "timestamp": "2021-09-30T10:46:17.680Z", - "trace": { - "trace_id": "abcd", - "span_id": "def" - } - } - ], - "meta": { - "sdk": { - "name": "grafana-frontend-agent", - "version": "1.0.0" - }, - "app": { - "name": "testapp", - "release": "0.8.2", - "version": "abcdefg", - "environment": "production" - }, - "user": { - "username": "domasx2", - "attributes": { - "foo": "bar" - } - }, - "session": { - "id": "abcd", - "attributes": { - "time_elapsed": "100s" - } - }, - "page": { - "url": "https://example.com/page" - }, - "browser": { - "name": "chrome", - "version": "88.12.1", - "os": "linux", - "mobile": false - }, - "view": { - "name": "foobar" - } - }, - "traces": { - "resourceSpans": [ - { - "resource": { - "attributes": [ - { - "key": "service.name", - "value": { - "stringValue": "unknown_service" - } - }, - { - "key": "telemetry.sdk.language", - "value": { - "stringValue": "webjs" - } - }, - { - "key": "telemetry.sdk.name", - "value": { - "stringValue": "opentelemetry" - } - }, - { - "key": "telemetry.sdk.version", - "value": { - "stringValue": "1.0.1" - } - } - ], - "droppedAttributesCount": 0 - }, - "instrumentationLibrarySpans": [ - { - "spans": [ - { - "traceId": "2d6f18da2663c7e477df23d8a8ad95b7", - "spanId": "50e64e3fac969cbb", - "parentSpanId": "9d9da6529d56706c", - "name": "documentFetch", - "kind": 1, - "startTimeUnixNano": 1646228314336100000, - "endTimeUnixNano": 1646228314351000000, - "attributes": [ - { - "key": "component", - "value": { - "stringValue": "document-load" - } - }, - { - "key": "http.response_content_length", - "value": { - "intValue": 1326 - } - } - ], - "droppedAttributesCount": 0, - "events": [ - { - "timeUnixNano": 1646228314336100000, - "name": "fetchStart", - "attributes": [], - "droppedAttributesCount": 0 - }, - { - "timeUnixNano": 1646228314342000000, - "name": "domainLookupStart", - "attributes": [], - "droppedAttributesCount": 0 - }, - { - "timeUnixNano": 1646228314342000000, - "name": "domainLookupEnd", - "attributes": [], - "droppedAttributesCount": 0 - }, - { - "timeUnixNano": 1646228314342000000, - "name": "connectStart", - "attributes": [], - "droppedAttributesCount": 0 - }, - { - "timeUnixNano": 1646228314330100000, - "name": "secureConnectionStart", - "attributes": [], - "droppedAttributesCount": 0 - }, - { - "timeUnixNano": 1646228314342500000, - "name": "connectEnd", - "attributes": [], - "droppedAttributesCount": 0 - }, - { - "timeUnixNano": 1646228314342700000, - "name": "requestStart", - "attributes": [], - "droppedAttributesCount": 0 - }, - { - "timeUnixNano": 1646228314347000000, - "name": "responseStart", - "attributes": [], - "droppedAttributesCount": 0 - }, - { - "timeUnixNano": 1646228314351000000, - "name": "responseEnd", - "attributes": [], - "droppedAttributesCount": 0 - } - ], - "droppedEventsCount": 0, - "status": { - "code": 0 - }, - "links": [], - "droppedLinksCount": 0 - } - ], - "instrumentationLibrary": { - "name": "@opentelemetry/instrumentation-document-load", - "version": "0.27.1" - } - } - ] - } - ] - } -} diff --git a/internal/static/integrations/v2/app_agent_receiver/traces_exporter.go b/internal/static/integrations/v2/app_agent_receiver/traces_exporter.go deleted file mode 100644 index 941f829452..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/traces_exporter.go +++ /dev/null @@ -1,41 +0,0 @@ -package app_agent_receiver - -import ( - "context" - - "go.opentelemetry.io/collector/consumer" -) - -type tracesConsumerGetter func() (consumer.Traces, error) - -// TracesExporter will send traces to a traces instance -type TracesExporter struct { - getTracesConsumer tracesConsumerGetter -} - -// NewTracesExporter creates a trace exporter for the app agent receiver. -func NewTracesExporter(getTracesConsumer tracesConsumerGetter) AppAgentReceiverExporter { - return &TracesExporter{getTracesConsumer} -} - -// Name of the exporter, for logging purposes -func (te *TracesExporter) Name() string { - return "traces exporter" -} - -// Export implements the AppDataExporter interface -func (te *TracesExporter) Export(ctx context.Context, payload Payload) error { - if payload.Traces == nil { - return nil - } - consumer, err := te.getTracesConsumer() - if err != nil { - return err - } - return consumer.ConsumeTraces(ctx, payload.Traces.Traces) -} - -// Static typecheck tests -var ( - _ AppAgentReceiverExporter = (*TracesExporter)(nil) -) diff --git a/internal/static/integrations/v2/app_agent_receiver/traces_test.go b/internal/static/integrations/v2/app_agent_receiver/traces_test.go deleted file mode 100644 index 3e46227c45..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/traces_test.go +++ /dev/null @@ -1,53 +0,0 @@ -package app_agent_receiver - -import ( - "context" - "errors" - "testing" - - "github.com/stretchr/testify/require" - "go.opentelemetry.io/collector/consumer" - "go.opentelemetry.io/collector/pdata/ptrace" -) - -type mockTracesConsumer struct { - consumed []ptrace.Traces -} - -func (c *mockTracesConsumer) Capabilities() consumer.Capabilities { - return consumer.Capabilities{MutatesData: false} -} - -func (c *mockTracesConsumer) ConsumeTraces(ctx context.Context, td ptrace.Traces) error { - c.consumed = append(c.consumed, td) - return nil -} - -func Test_exportTraces_success(t *testing.T) { - ctx := context.Background() - tracesConsumer := &mockTracesConsumer{} - exporter := NewTracesExporter(func() (consumer.Traces, error) { return tracesConsumer, nil }) - payload := loadTestPayload(t) - err := exporter.Export(ctx, payload) - require.NoError(t, err) - require.Len(t, tracesConsumer.consumed, 1) -} - -func Test_exportTraces_noTracesInpayload(t *testing.T) { - ctx := context.Background() - tracesConsumer := &mockTracesConsumer{consumed: nil} - exporter := NewTracesExporter(func() (consumer.Traces, error) { return tracesConsumer, nil }) - payload := loadTestPayload(t) - payload.Traces = nil - err := exporter.Export(ctx, payload) - require.NoError(t, err) - require.Len(t, tracesConsumer.consumed, 0) -} - -func Test_exportTraces_noConsumer(t *testing.T) { - ctx := context.Background() - exporter := NewTracesExporter(func() (consumer.Traces, error) { return nil, errors.New("it dont work") }) - payload := loadTestPayload(t) - err := exporter.Export(ctx, payload) - require.Error(t, err, "it don't work") -} diff --git a/internal/static/integrations/v2/app_agent_receiver/utils.go b/internal/static/integrations/v2/app_agent_receiver/utils.go deleted file mode 100644 index e716cb2043..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/utils.go +++ /dev/null @@ -1,84 +0,0 @@ -package app_agent_receiver - -import ( - "fmt" - "sort" - - "github.com/grafana/agent/internal/util/wildcard" - om "github.com/wk8/go-ordered-map" -) - -// KeyVal is an ordered map of string to interface -type KeyVal = om.OrderedMap - -// NewKeyVal creates new empty KeyVal -func NewKeyVal() *KeyVal { - return om.New() -} - -// KeyValFromMap will instantiate KeyVal from a map[string]string -func KeyValFromMap(m map[string]string) *KeyVal { - kv := NewKeyVal() - keys := make([]string, 0, len(m)) - for k := range m { - keys = append(keys, k) - } - sort.Strings(keys) - for _, k := range keys { - KeyValAdd(kv, k, m[k]) - } - return kv -} - -// MergeKeyVal will merge source in target -func MergeKeyVal(target *KeyVal, source *KeyVal) { - for el := source.Oldest(); el != nil; el = el.Next() { - target.Set(el.Key, el.Value) - } -} - -// MergeKeyValWithPrefix will merge source in target, adding a prefix to each key being merged in -func MergeKeyValWithPrefix(target *KeyVal, source *KeyVal, prefix string) { - for el := source.Oldest(); el != nil; el = el.Next() { - target.Set(fmt.Sprintf("%s%s", prefix, el.Key), el.Value) - } -} - -// KeyValAdd adds a key + value string pair to kv -func KeyValAdd(kv *KeyVal, key string, value string) { - if len(value) > 0 { - kv.Set(key, value) - } -} - -// KeyValToInterfaceSlice converts KeyVal to []interface{}, typically used for logging -func KeyValToInterfaceSlice(kv *KeyVal) []interface{} { - slice := make([]interface{}, kv.Len()*2) - idx := 0 - for el := kv.Oldest(); el != nil; el = el.Next() { - slice[idx] = el.Key - idx++ - slice[idx] = el.Value - idx++ - } - return slice -} - -// KeyValToInterfaceMap converts KeyVal to map[string]interface -func KeyValToInterfaceMap(kv *KeyVal) map[string]interface{} { - retv := make(map[string]interface{}) - for el := kv.Oldest(); el != nil; el = el.Next() { - retv[fmt.Sprint(el.Key)] = el.Value - } - return retv -} - -// URLMatchesOrigins returns true if URL matches at least one of origin prefix. Wildcard '*' and '?' supported -func urlMatchesOrigins(URL string, origins []string) bool { - for _, origin := range origins { - if origin == "*" || wildcard.Match(origin+"*", URL) { - return true - } - } - return false -} diff --git a/internal/static/integrations/v2/app_agent_receiver/utils_test.go b/internal/static/integrations/v2/app_agent_receiver/utils_test.go deleted file mode 100644 index 6e32dd9626..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/utils_test.go +++ /dev/null @@ -1,36 +0,0 @@ -package app_agent_receiver - -import ( - "testing" - - "github.com/stretchr/testify/require" -) - -func testCase(t *testing.T, URL string, origins []string, expected bool) { - result := urlMatchesOrigins(URL, origins) - require.Equal(t, expected, result) -} - -func Test_Origin_WildcardAlwaysMatches(t *testing.T) { - testCase(t, "http://example.com/static/foo.js", []string{"https://foo.com/", "*"}, true) -} - -func Test_Origin_Matches(t *testing.T) { - testCase(t, "http://example.com/static/foo.js", []string{"https://foo.com/", "http://example.com/"}, true) -} - -func Test_Origin_MatchesWithWildcard(t *testing.T) { - testCase(t, "http://foo.bar.com/static/foo.js", []string{"https://foo.com/", "http://*.bar.com/"}, true) -} - -func Test_Origin_DoesNotMatch(t *testing.T) { - testCase(t, "http://example.com/static/foo.js", []string{"https://foo.com/", "http://test.com/"}, false) -} - -func Test_Origin_DoesNotMatchWithWildcard(t *testing.T) { - testCase(t, "http://foo.bar.com/static/foo.js", []string{"https://foo.com/", "http://*.baz.com/"}, false) -} - -func Test_Origin_MatchesWithWildcardNoProtocol(t *testing.T) { - testCase(t, "http://foo.bar.com/static/foo.js", []string{"https://foo.com/", "*.bar.com/"}, true) -} diff --git a/internal/static/integrations/v2/autoscrape/appender.go b/internal/static/integrations/v2/autoscrape/appender.go deleted file mode 100644 index 04be1c7d6a..0000000000 --- a/internal/static/integrations/v2/autoscrape/appender.go +++ /dev/null @@ -1,42 +0,0 @@ -package autoscrape - -import ( - "fmt" - - "github.com/prometheus/prometheus/model/exemplar" - "github.com/prometheus/prometheus/model/histogram" - "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/model/metadata" - "github.com/prometheus/prometheus/storage" -) - -// failedAppender is used as the appender when an instance couldn't be found. -type failedAppender struct { - instanceName string -} - -var _ storage.Appender = (*failedAppender)(nil) - -func (fa *failedAppender) Append(ref storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { - return 0, fmt.Errorf("no such instance %s", fa.instanceName) -} - -func (fa *failedAppender) Commit() error { - return fmt.Errorf("no such instance %s", fa.instanceName) -} - -func (fa *failedAppender) Rollback() error { - return fmt.Errorf("no such instance %s", fa.instanceName) -} - -func (fa *failedAppender) AppendExemplar(ref storage.SeriesRef, l labels.Labels, e exemplar.Exemplar) (storage.SeriesRef, error) { - return 0, fmt.Errorf("no such instance %s", fa.instanceName) -} - -func (fa *failedAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels, m metadata.Metadata) (storage.SeriesRef, error) { - return 0, fmt.Errorf("no such instance %s", fa.instanceName) -} - -func (fa *failedAppender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { - return 0, fmt.Errorf("no such instance %s", fa.instanceName) -} diff --git a/internal/static/integrations/v2/autoscrape/autoscrape.go b/internal/static/integrations/v2/autoscrape/autoscrape.go index 8d1bd02ae0..5415f269ac 100644 --- a/internal/static/integrations/v2/autoscrape/autoscrape.go +++ b/internal/static/integrations/v2/autoscrape/autoscrape.go @@ -2,22 +2,9 @@ package autoscrape import ( - "context" - "sync" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/grafana/agent/internal/static/metrics" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/static/server" - "github.com/oklog/run" - config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" prom_config "github.com/prometheus/prometheus/config" - "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/model/relabel" - "github.com/prometheus/prometheus/scrape" - "github.com/prometheus/prometheus/storage" ) // DefaultGlobal holds default values for Global. @@ -53,262 +40,9 @@ type Config struct { MetricRelabelConfigs []*relabel.Config `yaml:"metric_relabel_configs,omitempty"` // Relabel individual autoscrape metrics } -// InstanceStore is used to find instances to send metrics to. It is a subset -// of the pkg/metrics/instance.Manager interface. -type InstanceStore interface { - // GetInstance retrieves a ManagedInstance by name. - GetInstance(name string) (instance.ManagedInstance, error) -} - // ScrapeConfig bind a Prometheus scrape config with an instance to send // scraped metrics to. type ScrapeConfig struct { Instance string Config prom_config.ScrapeConfig } - -// Scraper is a metrics autoscraper. -type Scraper struct { - ctx context.Context - cancel context.CancelFunc - - log log.Logger - is InstanceStore - - // Prometheus doesn't pass contextual information at scrape time that could - // be used to change the behavior of generating an appender. This means that - // it's not yet possible for us to just run a single SD + scrape manager for - // all of our integrations, and we instead need to launch a pair of each for - // every instance we're writing to. - - iscrapersMut sync.RWMutex - iscrapers map[string]*instanceScraper - dialerFunc server.DialContextFunc -} - -// NewScraper creates a new autoscraper. Scraper will run until Stop is called. -// Instances to send scraped metrics to will be looked up via im. Scraping will -// use the provided dialerFunc to make connections if non-nil. -func NewScraper(l log.Logger, is InstanceStore, dialerFunc server.DialContextFunc) *Scraper { - l = log.With(l, "component", "autoscraper") - - ctx, cancel := context.WithCancel(context.Background()) - - s := &Scraper{ - ctx: ctx, - cancel: cancel, - - log: l, - is: is, - iscrapers: map[string]*instanceScraper{}, - dialerFunc: dialerFunc, - } - return s -} - -// ApplyConfig will apply the given jobs. An error will be returned for any -// jobs that failed to be applied. -func (s *Scraper) ApplyConfig(jobs []*ScrapeConfig) error { - s.iscrapersMut.Lock() - defer s.iscrapersMut.Unlock() - - var firstError error - saveError := func(e error) { - if firstError == nil { - firstError = e - } - } - - // Shard our jobs by target instance. - shardedJobs := map[string][]*prom_config.ScrapeConfig{} - for _, j := range jobs { - _, err := s.is.GetInstance(j.Instance) - if err != nil { - level.Error(s.log).Log("msg", "cannot autoscrape integration", "name", j.Config.JobName, "err", err) - saveError(err) - continue - } - - shardedJobs[j.Instance] = append(shardedJobs[j.Instance], &j.Config) - } - - // Then pass the jobs to instanceScraper, creating them if we need to. - for instance, jobs := range shardedJobs { - is, ok := s.iscrapers[instance] - if !ok { - is = newInstanceScraper(s.ctx, s.log, s.is, instance, config_util.DialContextFunc(s.dialerFunc)) - s.iscrapers[instance] = is - } - if err := is.ApplyConfig(jobs); err != nil { - // Not logging here; is.ApplyConfig already logged the errors. - saveError(err) - } - } - - // Garbage collect: If there's a key in s.scrapers that wasn't in - // shardedJobs, stop that unused scraper. - for instance, is := range s.iscrapers { - _, current := shardedJobs[instance] - if !current { - is.Stop() - delete(s.iscrapers, instance) - } - } - - return firstError -} - -// TargetsActive returns the set of active scrape targets for all target -// instances. -func (s *Scraper) TargetsActive() map[string]metrics.TargetSet { - s.iscrapersMut.RLock() - defer s.iscrapersMut.RUnlock() - - allTargets := make(map[string]metrics.TargetSet, len(s.iscrapers)) - for instance, is := range s.iscrapers { - allTargets[instance] = is.sm.TargetsActive() - } - return allTargets -} - -// Stop stops the Scraper. -func (s *Scraper) Stop() { - s.iscrapersMut.Lock() - defer s.iscrapersMut.Unlock() - - for instance, is := range s.iscrapers { - is.Stop() - delete(s.iscrapers, instance) - } - - s.cancel() -} - -// instanceScraper is a Scraper which always sends to the same instance. -type instanceScraper struct { - log log.Logger - - sd *discovery.Manager - sm *scrape.Manager - cancel context.CancelFunc - exited chan struct{} -} - -// newInstanceScraper runs a new instanceScraper. Must be stopped by calling -// Stop. -func newInstanceScraper( - ctx context.Context, - l log.Logger, - s InstanceStore, - instanceName string, - dialerFunc config_util.DialContextFunc, -) *instanceScraper { - - ctx, cancel := context.WithCancel(ctx) - l = log.With(l, "target_instance", instanceName) - - sdOpts := []func(*discovery.Manager){ - discovery.Name("autoscraper/" + instanceName), - discovery.HTTPClientOptions( - // If dialerFunc is nil, scrape.NewManager will use Go's default dialer. - config_util.WithDialContextFunc(dialerFunc), - ), - } - sd := discovery.NewManager(ctx, l, sdOpts...) - sm := scrape.NewManager(&scrape.Options{ - HTTPClientOptions: []config_util.HTTPClientOption{ - // If dialerFunc is nil, scrape.NewManager will use Go's default dialer. - config_util.WithDialContextFunc(dialerFunc), - }, - }, l, &agentAppender{ - inst: instanceName, - is: s, - }) - - is := &instanceScraper{ - log: l, - - sd: sd, - sm: sm, - cancel: cancel, - exited: make(chan struct{}), - } - - go is.run() - return is -} - -type agentAppender struct { - inst string - is InstanceStore -} - -func (aa *agentAppender) Appender(ctx context.Context) storage.Appender { - mi, err := aa.is.GetInstance(aa.inst) - if err != nil { - return &failedAppender{instanceName: aa.inst} - } - return mi.Appender(ctx) -} - -func (is *instanceScraper) run() { - defer close(is.exited) - var rg run.Group - - rg.Add(func() error { - // Service discovery will stop whenever our parent context is canceled or - // if is.cancel is called. - err := is.sd.Run() - if err != nil { - level.Error(is.log).Log("msg", "autoscrape service discovery exited with error", "err", err) - } - return err - }, func(_ error) { - is.cancel() - }) - - rg.Add(func() error { - err := is.sm.Run(is.sd.SyncCh()) - if err != nil { - level.Error(is.log).Log("msg", "autoscrape scrape manager exited with error", "err", err) - } - return err - }, func(_ error) { - is.sm.Stop() - }) - - _ = rg.Run() -} - -func (is *instanceScraper) ApplyConfig(jobs []*prom_config.ScrapeConfig) error { - var firstError error - saveError := func(e error) { - if firstError == nil && e != nil { - firstError = e - } - } - - var ( - scrapeConfigs = make([]*prom_config.ScrapeConfig, 0, len(jobs)) - sdConfigs = make(map[string]discovery.Configs, len(jobs)) - ) - for _, job := range jobs { - sdConfigs[job.JobName] = job.ServiceDiscoveryConfigs - scrapeConfigs = append(scrapeConfigs, job) - } - if err := is.sd.ApplyConfig(sdConfigs); err != nil { - level.Error(is.log).Log("msg", "error when applying SD to autoscraper", "err", err) - saveError(err) - } - if err := is.sm.ApplyConfig(&prom_config.Config{ScrapeConfigs: scrapeConfigs}); err != nil { - level.Error(is.log).Log("msg", "error when applying jobs to scraper", "err", err) - saveError(err) - } - - return firstError -} - -func (is *instanceScraper) Stop() { - is.cancel() - <-is.exited -} diff --git a/internal/static/integrations/v2/autoscrape/autoscrape_test.go b/internal/static/integrations/v2/autoscrape/autoscrape_test.go deleted file mode 100644 index 9aaa148ecd..0000000000 --- a/internal/static/integrations/v2/autoscrape/autoscrape_test.go +++ /dev/null @@ -1,118 +0,0 @@ -package autoscrape - -import ( - "context" - "net/http/httptest" - "testing" - "time" - - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/util" - "github.com/prometheus/client_golang/prometheus/promhttp" - "github.com/prometheus/common/model" - prom_config "github.com/prometheus/prometheus/config" - "github.com/prometheus/prometheus/discovery" - "github.com/prometheus/prometheus/model/exemplar" - "github.com/prometheus/prometheus/model/histogram" - "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/model/metadata" - "github.com/prometheus/prometheus/storage" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "go.uber.org/atomic" -) - -// TestAutoscrape is a basic end-to-end test of the autoscraper. -func TestAutoscrape(t *testing.T) { - srv := httptest.NewServer(promhttp.Handler()) - defer srv.Close() - - wt := util.NewWaitTrigger() - - noop := noOpAppender - noop.AppendFunc = func(ref storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { - wt.Trigger() - return noOpAppender.AppendFunc(ref, l, t, v) - } - - im := instance.MockManager{ - GetInstanceFunc: func(name string) (instance.ManagedInstance, error) { - assert.Equal(t, t.Name(), name) - return &mockInstance{app: &noop}, nil - }, - } - as := NewScraper(util.TestLogger(t), im, nil) - defer as.Stop() - - err := as.ApplyConfig([]*ScrapeConfig{{ - Instance: t.Name(), - Config: func() prom_config.ScrapeConfig { - cfg := prom_config.DefaultScrapeConfig - cfg.JobName = t.Name() - cfg.ScrapeInterval = model.Duration(time.Second) - cfg.ScrapeTimeout = model.Duration(time.Second / 2) - cfg.ServiceDiscoveryConfigs = discovery.Configs{ - discovery.StaticConfig{{ - Targets: []model.LabelSet{{ - model.AddressLabel: model.LabelValue(srv.Listener.Addr().String()), - }}, - Source: t.Name(), - }}, - } - return cfg - }(), - }}) - require.NoError(t, err, "failed to apply configs") - - // NOTE(rfratto): SD won't start sending targets until after 5 seconds. We'll - // need to at least wait that long. - time.Sleep(5 * time.Second) - - require.NoError(t, wt.Wait(5*time.Second), "timed out waiting for scrape") -} - -var globalRef atomic.Uint64 -var noOpAppender = mockAppender{ - AppendFunc: func(ref storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { - return storage.SeriesRef(globalRef.Inc()), nil - }, - CommitFunc: func() error { return nil }, - RollbackFunc: func() error { return nil }, - AppendExemplarFunc: func(ref storage.SeriesRef, l labels.Labels, e exemplar.Exemplar) (storage.SeriesRef, error) { - return storage.SeriesRef(globalRef.Inc()), nil - }, - AppendHistogramFunc: func(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { - return storage.SeriesRef(globalRef.Inc()), nil - }, -} - -type mockAppender struct { - AppendFunc func(ref storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) - CommitFunc func() error - RollbackFunc func() error - AppendExemplarFunc func(ref storage.SeriesRef, l labels.Labels, e exemplar.Exemplar) (storage.SeriesRef, error) - UpdateMetadataFunc func(ref storage.SeriesRef, l labels.Labels, m metadata.Metadata) (storage.SeriesRef, error) - AppendHistogramFunc func(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) -} - -func (ma *mockAppender) Append(ref storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { - return ma.AppendFunc(ref, l, t, v) -} -func (ma *mockAppender) Commit() error { return ma.CommitFunc() } -func (ma *mockAppender) Rollback() error { return ma.RollbackFunc() } -func (ma *mockAppender) AppendExemplar(ref storage.SeriesRef, l labels.Labels, e exemplar.Exemplar) (storage.SeriesRef, error) { - return ma.AppendExemplarFunc(ref, l, e) -} -func (ma *mockAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels, m metadata.Metadata) (storage.SeriesRef, error) { - return ma.UpdateMetadataFunc(ref, l, m) -} -func (ma *mockAppender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { - return ma.AppendHistogramFunc(ref, l, t, h, fh) -} - -type mockInstance struct { - instance.NoOpInstance - app storage.Appender -} - -func (mi *mockInstance) Appender(ctx context.Context) storage.Appender { return mi.app } diff --git a/internal/static/integrations/v2/controller.go b/internal/static/integrations/v2/controller.go deleted file mode 100644 index b01a666119..0000000000 --- a/internal/static/integrations/v2/controller.go +++ /dev/null @@ -1,444 +0,0 @@ -package integrations - -import ( - "context" - "errors" - "fmt" - "net/http" - "net/url" - "path" - "sort" - "strings" - "sync" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/gorilla/mux" - "github.com/grafana/agent/internal/static/integrations/v2/autoscrape" - "github.com/prometheus/prometheus/discovery" - http_sd "github.com/prometheus/prometheus/discovery/http" - "go.uber.org/atomic" -) - -// controllerConfig holds a set of integration configs. -type controllerConfig []Config - -// controller manages a set of integrations. -type controller struct { - logger log.Logger - - mut sync.Mutex - cfg controllerConfig - globals Globals - integrations []*controlledIntegration // Running integrations - - runIntegrations chan []*controlledIntegration // Schedule integrations to run -} - -// newController creates a new Controller. Controller is intended to be -// embedded inside of integrations that may want to multiplex other -// integrations. -func newController(l log.Logger, cfg controllerConfig, globals Globals) (*controller, error) { - c := &controller{ - logger: l, - runIntegrations: make(chan []*controlledIntegration, 1), - } - if err := c.UpdateController(cfg, globals); err != nil { - return nil, err - } - return c, nil -} - -// run starts the controller and blocks until ctx is canceled. -func (c *controller) run(ctx context.Context) { - pool := newWorkerPool(ctx, c.logger) - defer pool.Close() - - for { - select { - case <-ctx.Done(): - level.Debug(c.logger).Log("msg", "controller exiting") - return - case newIntegrations := <-c.runIntegrations: - pool.Reload(newIntegrations) - } - } -} - -// controlledIntegration is a running Integration. A running integration is -// identified uniquely by its id. -type controlledIntegration struct { - id integrationID - i Integration - c Config // Config that generated i. Used for changing to see if a config changed. - running atomic.Bool -} - -func (ci *controlledIntegration) Running() bool { - return ci.running.Load() -} - -// integrationID uses a tuple of Name and Identifier to uniquely identify an -// integration. -type integrationID struct{ Name, Identifier string } - -func (id integrationID) String() string { - return fmt.Sprintf("%s/%s", id.Name, id.Identifier) -} - -// UpdateController updates the Controller with new Controller and -// IntegrationOptions. -// -// UpdateController updates running integrations. Extensions can be -// recalculated by calling relevant methods like Handler or Targets. -func (c *controller) UpdateController(cfg controllerConfig, globals Globals) error { - c.mut.Lock() - defer c.mut.Unlock() - - // Ensure that no singleton integration is defined twice - var ( - duplicatedSingletons []string - singletonSet = make(map[string]struct{}) - ) - for _, cfg := range cfg { - t, _ := RegisteredType(cfg) - if t != TypeSingleton { - continue - } - - if _, exists := singletonSet[cfg.Name()]; exists { - duplicatedSingletons = append(duplicatedSingletons, cfg.Name()) - continue - } - singletonSet[cfg.Name()] = struct{}{} - } - if len(duplicatedSingletons) == 1 { - return fmt.Errorf("integration %q may only be defined once", duplicatedSingletons[0]) - } else if len(duplicatedSingletons) > 1 { - list := strings.Join(duplicatedSingletons, ", ") - return fmt.Errorf("the following integrations may only be defined once each: %s", list) - } - - integrationIDMap := map[integrationID]struct{}{} - - integrations := make([]*controlledIntegration, 0, len(cfg)) - -NextConfig: - for _, ic := range cfg { - name := ic.Name() - - identifier, err := ic.Identifier(globals) - if err != nil { - return fmt.Errorf("could not build identifier for integration %q: %w", name, err) - } - - if err := ic.ApplyDefaults(globals); err != nil { - return fmt.Errorf("failed to apply defaults for %s/%s: %w", name, identifier, err) - } - - id := integrationID{Name: name, Identifier: identifier} - if _, exist := integrationIDMap[id]; exist { - return fmt.Errorf("multiple instance names %q in integration %q", identifier, name) - } - integrationIDMap[id] = struct{}{} - - // Now that we know the ID for an integration, we can check to see if it's - // running and can be dynamically updated. - for _, ci := range c.integrations { - if ci.id != id { - continue - } - - // If the configs haven't changed, then we don't need to do anything. - if CompareConfigs(ci.c, ic) { - integrations = append(integrations, ci) - continue NextConfig - } - - if ui, ok := ci.i.(UpdateIntegration); ok { - if err := ui.ApplyConfig(ic, globals); errors.Is(err, ErrInvalidUpdate) { - level.Warn(c.logger).Log("msg", "failed to dynamically update integration; will recreate", "integration", name, "instance", identifier, "err', err") - break - } else if err != nil { - return fmt.Errorf("failed to update %s integration %q: %w", name, identifier, err) - } else { - // Update succeeded; re-use the running one and go to the next - // integration to process. - integrations = append(integrations, ci) - continue NextConfig - } - } - - // We found the integration to update: we can stop this loop now. - break - } - - logger := log.With(c.logger, "integration", name, "instance", identifier) - integration, err := ic.NewIntegration(logger, globals) - if err != nil { - return fmt.Errorf("failed to construct %s integration %q: %w", name, identifier, err) - } - - // Create a new controlled integration. - integrations = append(integrations, &controlledIntegration{ - id: id, - i: integration, - c: ic, - }) - } - - // Schedule integrations to run - c.runIntegrations <- integrations - - c.cfg = cfg - c.globals = globals - c.integrations = integrations - return nil -} - -// Handler returns an HTTP handler for the controller and its integrations. -// Handler will pass through requests to other running integrations. Handler -// always returns an http.Handler regardless of error. -// -// Handler is expensive to compute and should only be done after reloading the -// config. -func (c *controller) Handler(prefix string) (http.Handler, error) { - var firstErr error - saveFirstErr := func(err error) { - if firstErr == nil { - firstErr = err - } - } - - r := mux.NewRouter() - - err := c.forEachIntegration(prefix, func(ci *controlledIntegration, iprefix string) { - id := ci.id - - i, ok := ci.i.(HTTPIntegration) - if !ok { - return - } - - handler, err := i.Handler(iprefix + "/") - if err != nil { - saveFirstErr(fmt.Errorf("could not generate HTTP handler for %s integration %q: %w", id.Name, id.Identifier, err)) - return - } else if handler == nil { - return - } - - // Anything that matches the integrationPrefix should be passed to the handler. - // The reason these two are separated is if you have two instance names and one is a prefix of another - // ie localhost and localhost2, localhost2 will never get called because localhost will always get precedence - // add / fixes this, but to keep old behavior we need to ensure /localhost and localhost2 also work, hence - // the second handlefunc below this one. https://github.com/grafana/agent/issues/1718 - hfunc := func(rw http.ResponseWriter, r *http.Request) { - if !ci.Running() { - http.Error(rw, fmt.Sprintf("%s integration intance %q not running", id.Name, id.Identifier), http.StatusServiceUnavailable) - return - } - handler.ServeHTTP(rw, r) - } - r.PathPrefix(iprefix + "/").HandlerFunc(hfunc) - // Handle calling the iprefix itself - r.HandleFunc(iprefix, hfunc) - }) - if err != nil { - level.Warn(c.logger).Log("msg", "error when iterating over integrations to build HTTP handlers", "err", err) - } - - // TODO(rfratto): navigation page for exact prefix match - - return r, firstErr -} - -// forEachIntegration calculates the prefix for each integration and calls f. -// prefix will not end in /. -func (c *controller) forEachIntegration(basePrefix string, f func(ci *controlledIntegration, iprefix string)) error { - c.mut.Lock() - defer c.mut.Unlock() - - // Pre-populate a mapping of integration name -> identifier. If there are - // two instances of the same integration, we want to ensure unique routing. - // - // This special logic is done for backwards compatibility with the original - // design of integrations. - identifiersMap := map[string][]string{} - for _, i := range c.integrations { - identifiersMap[i.id.Name] = append(identifiersMap[i.id.Name], i.id.Identifier) - } - - usedPrefixes := map[string]struct{}{} - - for _, ci := range c.integrations { - id := ci.id - multipleInstances := len(identifiersMap[id.Name]) > 1 - - var integrationPrefix string - if multipleInstances { - // i.e., /integrations/mysqld_exporter/server-a - integrationPrefix = path.Join(basePrefix, id.Name, id.Identifier) - } else { - // i.e., /integrations/node_exporter - integrationPrefix = path.Join(basePrefix, id.Name) - } - - f(ci, integrationPrefix) - - if _, exist := usedPrefixes[integrationPrefix]; exist { - return fmt.Errorf("BUG: duplicate integration prefix %q", integrationPrefix) - } - usedPrefixes[integrationPrefix] = struct{}{} - } - return nil -} - -// Targets returns the current set of targets across all integrations. Use opts -// to customize which targets are returned. -func (c *controller) Targets(ep Endpoint, opts TargetOptions) []*targetGroup { - // Grab the integrations as fast as possible. We don't want to spend too much - // time holding the mutex. - type prefixedMetricsIntegration struct { - id integrationID - i MetricsIntegration - ep Endpoint - } - var mm []prefixedMetricsIntegration - - err := c.forEachIntegration(ep.Prefix, func(ci *controlledIntegration, iprefix string) { - // Best effort liveness check. They might stop running when we request - // their targets, which is fine, but we should save as much work as we - // can. - if !ci.Running() { - return - } - if mi, ok := ci.i.(MetricsIntegration); ok { - ep := Endpoint{Host: ep.Host, Prefix: iprefix} - mm = append(mm, prefixedMetricsIntegration{id: ci.id, i: mi, ep: ep}) - } - }) - if err != nil { - level.Warn(c.logger).Log("msg", "error when iterating over integrations to get targets", "err", err) - } - - var tgs []*targetGroup - for _, mi := range mm { - // If we're looking for a subset of integrations, filter out anything that doesn't match. - if len(opts.Integrations) > 0 && !stringSliceContains(opts.Integrations, mi.id.Name) { - continue - } - // If we're looking for a specific instance, filter out anything that doesn't match. - if opts.Instance != "" && mi.id.Identifier != opts.Instance { - continue - } - - for _, tgt := range mi.i.Targets(mi.ep) { - tgs = append(tgs, (*targetGroup)(tgt)) - } - } - sort.Slice(tgs, func(i, j int) bool { - return tgs[i].Source < tgs[j].Source - }) - return tgs -} - -func stringSliceContains(ss []string, s string) bool { - for _, check := range ss { - if check == s { - return true - } - } - return false -} - -// TargetOptions controls which targets should be returned by the subsystem. -type TargetOptions struct { - // Integrations is the set of integrations to return. An empty slice will - // default to returning all integrations. - Integrations []string - // Instance matches a specific instance from all integrations. An empty - // string will match any instance. - Instance string -} - -// TargetOptionsFromParams creates TargetOptions from parsed URL query parameters. -func TargetOptionsFromParams(u url.Values) (TargetOptions, error) { - var to TargetOptions - - rawIntegrations := u.Get("integrations") - if rawIntegrations != "" { - rawIntegrations, err := url.QueryUnescape(rawIntegrations) - if err != nil { - return to, fmt.Errorf("invalid value for integrations: %w", err) - } - to.Integrations = strings.Split(rawIntegrations, ",") - } - - rawInstance := u.Get("instance") - if rawInstance != "" { - rawInstance, err := url.QueryUnescape(rawInstance) - if err != nil { - return to, fmt.Errorf("invalid value for instance: %w", err) - } - to.Instance = rawInstance - } - - return to, nil -} - -// ToParams will convert to into URL query parameters. -func (to TargetOptions) ToParams() url.Values { - p := make(url.Values) - if len(to.Integrations) != 0 { - p.Set("integrations", url.QueryEscape(strings.Join(to.Integrations, ","))) - } - if to.Instance != "" { - p.Set("instance", url.QueryEscape(to.Instance)) - } - return p -} - -// ScrapeConfigs returns a set of scrape configs to use for self-scraping. -// sdConfig should contain the full URL where the integrations SD API is -// exposed. ScrapeConfigs will inject unique query parameters per integration -// to limit what will be discovered. -func (c *controller) ScrapeConfigs(prefix string, sdConfig *http_sd.SDConfig) []*autoscrape.ScrapeConfig { - // Grab the integrations as fast as possible. We don't want to spend too much - // time holding the mutex. - type prefixedMetricsIntegration struct { - id integrationID - i MetricsIntegration - prefix string - } - var mm []prefixedMetricsIntegration - - err := c.forEachIntegration(prefix, func(ci *controlledIntegration, iprefix string) { - if mi, ok := ci.i.(MetricsIntegration); ok { - mm = append(mm, prefixedMetricsIntegration{id: ci.id, i: mi, prefix: iprefix}) - } - }) - if err != nil { - level.Warn(c.logger).Log("msg", "error when iterating over integrations to get scrape configs", "err", err) - } - - var cfgs []*autoscrape.ScrapeConfig - for _, mi := range mm { - // sdConfig will be pointing to the targets API. By default, this returns absolutely everything. - // We want to use the query parameters to inform the API to only return - // specific targets. - opts := TargetOptions{ - Integrations: []string{mi.id.Name}, - Instance: mi.id.Identifier, - } - - integrationSDConfig := *sdConfig - integrationSDConfig.URL = sdConfig.URL + "?" + opts.ToParams().Encode() - sds := discovery.Configs{&integrationSDConfig} - cfgs = append(cfgs, mi.i.ScrapeConfigs(sds)...) - } - sort.Slice(cfgs, func(i, j int) bool { - return cfgs[i].Config.JobName < cfgs[j].Config.JobName - }) - return cfgs -} diff --git a/internal/static/integrations/v2/controller_httpintegration_test.go b/internal/static/integrations/v2/controller_httpintegration_test.go deleted file mode 100644 index ee817a1c53..0000000000 --- a/internal/static/integrations/v2/controller_httpintegration_test.go +++ /dev/null @@ -1,259 +0,0 @@ -package integrations - -import ( - "fmt" - "io" - "net/http" - "net/http/httptest" - "strings" - "testing" - - "github.com/go-kit/log" - "github.com/gorilla/mux" - "github.com/grafana/agent/internal/util" - "github.com/stretchr/testify/require" -) - -// -// Tests for controller's utilization of the HTTPIntegration interface. -// - -func Test_controller_Handler_Sync(t *testing.T) { - httpConfigFromID := func(t *testing.T, name, identifier string) Config { - t.Helper() - - cfg := mockConfigNameTuple(t, name, identifier) - cfg.NewIntegrationFunc = func(log.Logger, Globals) (Integration, error) { - i := mockHTTPIntegration{ - Integration: NoOpIntegration, - HandlerFunc: func(prefix string) (http.Handler, error) { - return http.HandlerFunc(func(rw http.ResponseWriter, _ *http.Request) { - // We should never reach here since we don't run the integrations. - rw.WriteHeader(http.StatusBadRequest) - }), nil - }, - } - return i, nil - } - - return cfg - } - - cfg := controllerConfig{httpConfigFromID(t, "foo", "bar")} - ctrl, err := newController(util.TestLogger(t), cfg, Globals{}) - require.NoError(t, err) - - handler, err := ctrl.Handler("/integrations/") - require.NoError(t, err) - - srv := httptest.NewServer(handler) - - resp, err := srv.Client().Get(srv.URL + "/integrations/foo/bar") - require.NoError(t, err) - require.Equal(t, http.StatusServiceUnavailable, resp.StatusCode) -} - -// Test_controller_HTTPIntegration_Prefixes ensures that the controller will assign -// appropriate prefixes to HTTPIntegrations. -func Test_controller_HTTPIntegration_Prefixes(t *testing.T) { - httpConfigFromID := func(t *testing.T, prefixes *[]string, name, identifier string) Config { - t.Helper() - - cfg := mockConfigNameTuple(t, name, identifier) - cfg.NewIntegrationFunc = func(log.Logger, Globals) (Integration, error) { - i := mockHTTPIntegration{ - Integration: NoOpIntegration, - HandlerFunc: func(prefix string) (http.Handler, error) { - *prefixes = append(*prefixes, prefix) - return http.NotFoundHandler(), nil - }, - } - return i, nil - } - - return cfg - } - - t.Run("fully unique", func(t *testing.T) { - var prefixes []string - - ctrl, err := newController( - util.TestLogger(t), - controllerConfig{ - httpConfigFromID(t, &prefixes, "foo", "bar"), - httpConfigFromID(t, &prefixes, "fizz", "buzz"), - httpConfigFromID(t, &prefixes, "hello", "world"), - }, - Globals{}, - ) - require.NoError(t, err) - _ = newSyncController(t, ctrl) - - _, err = ctrl.Handler("/integrations/") - require.NoError(t, err) - - expect := []string{ - "/integrations/foo/", - "/integrations/fizz/", - "/integrations/hello/", - } - require.ElementsMatch(t, prefixes, expect) - }) - - t.Run("multiple instances", func(t *testing.T) { - var prefixes []string - - ctrl, err := newController( - util.TestLogger(t), - controllerConfig{ - httpConfigFromID(t, &prefixes, "foo", "bar"), - httpConfigFromID(t, &prefixes, "foo", "buzz"), - httpConfigFromID(t, &prefixes, "hello", "world"), - }, - Globals{}, - ) - require.NoError(t, err) - _ = newSyncController(t, ctrl) - - _, err = ctrl.Handler("/integrations/") - require.NoError(t, err) - - expect := []string{ - "/integrations/foo/bar/", - "/integrations/foo/buzz/", - "/integrations/hello/", - } - require.ElementsMatch(t, prefixes, expect) - }) -} - -// Test_controller_HTTPIntegration_Routing ensures that the controller will route -// requests to the appropriate integration. -func Test_controller_HTTPIntegration_Routing(t *testing.T) { - httpConfigFromID := func(t *testing.T, name, identifier string) Config { - t.Helper() - - cfg := mockConfigNameTuple(t, name, identifier) - cfg.NewIntegrationFunc = func(log.Logger, Globals) (Integration, error) { - i := mockHTTPIntegration{ - Integration: NoOpIntegration, - HandlerFunc: func(prefix string) (http.Handler, error) { - return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { - fmt.Fprintf(rw, "prefix=%s, path=%s", prefix, r.URL.Path) - }), nil - }, - } - return i, nil - } - - return cfg - } - - ctrl, err := newController( - util.TestLogger(t), - controllerConfig{ - httpConfigFromID(t, "foo", "bar"), - httpConfigFromID(t, "foo", "buzz"), - httpConfigFromID(t, "hello", "world"), - }, - Globals{}, - ) - require.NoError(t, err) - _ = newSyncController(t, ctrl) - - handler, err := ctrl.Handler("/integrations/") - require.NoError(t, err) - - srv := httptest.NewServer(handler) - - getResponse := func(t *testing.T, path string) string { - t.Helper() - resp, err := srv.Client().Get(srv.URL + path) - require.NoError(t, err) - defer resp.Body.Close() - - var sb strings.Builder - _, err = io.Copy(&sb, resp.Body) - require.NoError(t, err) - return sb.String() - } - - tt := []struct { - path, expect string - }{ - {"/integrations/foo/bar", "prefix=/integrations/foo/bar/, path=/integrations/foo/bar"}, - {"/integrations/foo/bar/", "prefix=/integrations/foo/bar/, path=/integrations/foo/bar/"}, - {"/integrations/foo/bar/extra", "prefix=/integrations/foo/bar/, path=/integrations/foo/bar/extra"}, - } - - for _, tc := range tt { - require.Equal(t, tc.expect, getResponse(t, tc.path)) - } -} - -// Test_controller_HTTPIntegration_NestedRouting ensures that the controller -// will work with nested routers. -func Test_controller_HTTPIntegration_NestedRouting(t *testing.T) { - cfg := mockConfigNameTuple(t, "test", "test") - cfg.NewIntegrationFunc = func(log.Logger, Globals) (Integration, error) { - i := mockHTTPIntegration{ - Integration: NoOpIntegration, - HandlerFunc: func(prefix string) (http.Handler, error) { - r := mux.NewRouter() - r.StrictSlash(true) - - r.HandleFunc(prefix, func(rw http.ResponseWriter, r *http.Request) { - fmt.Fprintf(rw, "prefix=%s, path=%s", prefix, r.URL.Path) - }) - - r.HandleFunc(prefix+"greet", func(rw http.ResponseWriter, _ *http.Request) { - fmt.Fprintf(rw, "Hello, world!") - }) - return r, nil - }, - } - return i, nil - } - - ctrl, err := newController(util.TestLogger(t), controllerConfig{cfg}, Globals{}) - require.NoError(t, err) - _ = newSyncController(t, ctrl) - - handler, err := ctrl.Handler("/integrations/") - require.NoError(t, err) - - srv := httptest.NewServer(handler) - - getResponse := func(t *testing.T, path string) string { - t.Helper() - resp, err := srv.Client().Get(srv.URL + path) - require.NoError(t, err) - defer resp.Body.Close() - - var sb strings.Builder - _, err = io.Copy(&sb, resp.Body) - require.NoError(t, err) - return sb.String() - } - - tt := []struct { - path, expect string - }{ - {"/integrations/test", "prefix=/integrations/test/, path=/integrations/test/"}, - {"/integrations/test/", "prefix=/integrations/test/, path=/integrations/test/"}, - {"/integrations/test/greet", "Hello, world!"}, - } - - for _, tc := range tt { - require.Equal(t, tc.expect, getResponse(t, tc.path)) - } -} - -type mockHTTPIntegration struct { - Integration - HandlerFunc func(prefix string) (http.Handler, error) -} - -func (m mockHTTPIntegration) Handler(prefix string) (http.Handler, error) { - return m.HandlerFunc(prefix) -} diff --git a/internal/static/integrations/v2/controller_metricsintegration_test.go b/internal/static/integrations/v2/controller_metricsintegration_test.go deleted file mode 100644 index 67222e61fe..0000000000 --- a/internal/static/integrations/v2/controller_metricsintegration_test.go +++ /dev/null @@ -1,184 +0,0 @@ -package integrations - -import ( - "context" - nethttp "net/http" - "testing" - - "github.com/go-kit/log" - "github.com/grafana/agent/internal/static/integrations/v2/autoscrape" - "github.com/grafana/agent/internal/util" - "github.com/prometheus/common/model" - prom_config "github.com/prometheus/prometheus/config" - "github.com/prometheus/prometheus/discovery" - "github.com/prometheus/prometheus/discovery/http" - "github.com/prometheus/prometheus/discovery/targetgroup" - "github.com/stretchr/testify/require" -) - -// -// Tests for controller's utilization of the MetricsIntegration interface. -// - -func Test_controller_MetricsIntegration_Targets(t *testing.T) { - integrationWithTarget := func(targetName string) Integration { - return mockMetricsIntegration{ - HTTPIntegration: newWaitStartedIntegration(), - TargetsFunc: func(Endpoint) []*targetgroup.Group { - return []*targetgroup.Group{{ - Targets: []model.LabelSet{{model.AddressLabel: model.LabelValue(targetName)}}, - }} - }, - ScrapeConfigsFunc: func(c discovery.Configs) []*autoscrape.ScrapeConfig { return nil }, - } - } - - integrations := []Config{ - mockConfigNameTuple(t, "a", "instanceA").WithNewIntegrationFunc(func(l log.Logger, g Globals) (Integration, error) { - return integrationWithTarget("a"), nil - }), - mockConfigNameTuple(t, "b", "instanceB").WithNewIntegrationFunc(func(l log.Logger, g Globals) (Integration, error) { - return integrationWithTarget("b"), nil - }), - } - - // waitIntegrations starts a controller and waits for all of its integrations - // to run. - waitIntegrations := func(t *testing.T, ctrl *controller) { - t.Helper() - _ = newSyncController(t, ctrl) - err := ctrl.forEachIntegration("/", func(ci *controlledIntegration, _ string) { - wsi := ci.i.(mockMetricsIntegration).HTTPIntegration.(*waitStartedIntegration) - _ = wsi.trigger.WaitContext(context.Background()) - }) - require.NoError(t, err) - } - - t.Run("All", func(t *testing.T) { - ctrl, err := newController( - util.TestLogger(t), - controllerConfig(integrations), - Globals{}, - ) - require.NoError(t, err) - waitIntegrations(t, ctrl) - - result := ctrl.Targets(Endpoint{Prefix: "/"}, TargetOptions{}) - expect := []*targetGroup{ - {Targets: []model.LabelSet{{model.AddressLabel: "a"}}}, - {Targets: []model.LabelSet{{model.AddressLabel: "b"}}}, - } - require.Equal(t, expect, result) - }) - - t.Run("All by Integration", func(t *testing.T) { - ctrl, err := newController( - util.TestLogger(t), - controllerConfig(integrations), - Globals{}, - ) - require.NoError(t, err) - waitIntegrations(t, ctrl) - - result := ctrl.Targets(Endpoint{Prefix: "/"}, TargetOptions{ - Integrations: []string{"a", "b"}, - }) - expect := []*targetGroup{ - {Targets: []model.LabelSet{{model.AddressLabel: "a"}}}, - {Targets: []model.LabelSet{{model.AddressLabel: "b"}}}, - } - require.Equal(t, expect, result) - }) - - t.Run("Specific Integration", func(t *testing.T) { - ctrl, err := newController( - util.TestLogger(t), - controllerConfig(integrations), - Globals{}, - ) - require.NoError(t, err) - waitIntegrations(t, ctrl) - - result := ctrl.Targets(Endpoint{Prefix: "/"}, TargetOptions{ - Integrations: []string{"a"}, - }) - expect := []*targetGroup{ - {Targets: []model.LabelSet{{model.AddressLabel: "a"}}}, - } - require.Equal(t, expect, result) - }) -} - -func Test_controller_MetricsIntegration_ScrapeConfig(t *testing.T) { - integrationWithTarget := func(targetName string) Integration { - return mockMetricsIntegration{ - HTTPIntegration: NoOpIntegration, - ScrapeConfigsFunc: func(c discovery.Configs) []*autoscrape.ScrapeConfig { - return []*autoscrape.ScrapeConfig{{ - Instance: "default", - Config: prom_config.ScrapeConfig{JobName: targetName}, - }} - }, - } - } - - integrations := []Config{ - mockConfigNameTuple(t, "a", "instanceA").WithNewIntegrationFunc(func(l log.Logger, g Globals) (Integration, error) { - return integrationWithTarget("a"), nil - }), - mockConfigNameTuple(t, "b", "instanceB").WithNewIntegrationFunc(func(l log.Logger, g Globals) (Integration, error) { - return integrationWithTarget("b"), nil - }), - } - - ctrl, err := newController( - util.TestLogger(t), - controllerConfig(integrations), - Globals{}, - ) - require.NoError(t, err) - _ = newSyncController(t, ctrl) - - result := ctrl.ScrapeConfigs("/", &http.DefaultSDConfig) - expect := []*autoscrape.ScrapeConfig{ - {Instance: "default", Config: prom_config.ScrapeConfig{JobName: "a"}}, - {Instance: "default", Config: prom_config.ScrapeConfig{JobName: "b"}}, - } - require.Equal(t, expect, result) -} - -// -// Tests for controller's utilization of the MetricsIntegration interface. -// - -type waitStartedIntegration struct { - trigger *util.WaitTrigger -} - -func newWaitStartedIntegration() *waitStartedIntegration { - return &waitStartedIntegration{trigger: util.NewWaitTrigger()} -} - -func (i *waitStartedIntegration) RunIntegration(ctx context.Context) error { - i.trigger.Trigger() - <-ctx.Done() - return nil -} - -func (i *waitStartedIntegration) Handler(prefix string) (nethttp.Handler, error) { - return nil, nil -} - -type mockMetricsIntegration struct { - HTTPIntegration - TargetsFunc func(ep Endpoint) []*targetgroup.Group - ScrapeConfigsFunc func(discovery.Configs) []*autoscrape.ScrapeConfig -} - -func (m mockMetricsIntegration) Targets(ep Endpoint) []*targetgroup.Group { - return m.TargetsFunc(ep) -} - -func (m mockMetricsIntegration) ScrapeConfigs(cfgs discovery.Configs) []*autoscrape.ScrapeConfig { - return m.ScrapeConfigsFunc(cfgs) -} diff --git a/internal/static/integrations/v2/controller_test.go b/internal/static/integrations/v2/controller_test.go deleted file mode 100644 index dcae71c56f..0000000000 --- a/internal/static/integrations/v2/controller_test.go +++ /dev/null @@ -1,286 +0,0 @@ -package integrations - -import ( - "context" - "strings" - "sync" - "testing" - - "github.com/go-kit/log" - "github.com/grafana/agent/internal/util" - "github.com/stretchr/testify/require" - "go.uber.org/atomic" -) - -// -// Tests for Controller's utilization of the core Integration interface. -// - -// Test_controller_UniqueIdentifier ensures that integrations must not share a (name, id) tuple. -func Test_controller_UniqueIdentifier(t *testing.T) { - controllerFromConfigs := func(t *testing.T, cc []Config) (*controller, error) { - t.Helper() - return newController(util.TestLogger(t), controllerConfig(cc), Globals{}) - } - - t.Run("different name, identifier", func(t *testing.T) { - _, err := controllerFromConfigs(t, []Config{ - mockConfigNameTuple(t, "foo", "bar"), - mockConfigNameTuple(t, "fizz", "buzz"), - }) - require.NoError(t, err) - }) - - t.Run("same name, different identifier", func(t *testing.T) { - _, err := controllerFromConfigs(t, []Config{ - mockConfigNameTuple(t, "foo", "bar"), - mockConfigNameTuple(t, "foo", "buzz"), - }) - require.NoError(t, err) - }) - - t.Run("same name, same identifier", func(t *testing.T) { - _, err := controllerFromConfigs(t, []Config{ - mockConfigNameTuple(t, "foo", "bar"), - mockConfigNameTuple(t, "foo", "bar"), - }) - require.Error(t, err, `multiple instance names "bar" in integration "foo"`) - }) -} - -// Test_controller_RunsIntegration ensures that integrations -// run. -func Test_controller_RunsIntegration(t *testing.T) { - var wg sync.WaitGroup - wg.Add(1) - - ctx, cancel := context.WithCancel(context.Background()) - - ctrl, err := newController( - util.TestLogger(t), - controllerConfig{ - mockConfigForIntegration(t, FuncIntegration(func(ctx context.Context) error { - defer wg.Done() - cancel() - <-ctx.Done() - return nil - })), - }, - Globals{}, - ) - require.NoError(t, err, "failed to create controller") - - // Run the controller. The controller should immediately run our fake integration - // which will cancel ctx and cause ctrl to exit. - ctrl.run(ctx) - - // Make sure that our integration exited too. - wg.Wait() -} - -// Test_controller_ConfigChanges ensures that integrations only get restarted -// when configs are no longer equal. -func Test_controller_ConfigChanges(t *testing.T) { - tc := func(t *testing.T, changed bool) (timesRan uint64) { - t.Helper() - - var integrationsWg sync.WaitGroup - var starts atomic.Uint64 - - mockIntegration := FuncIntegration(func(ctx context.Context) error { - integrationsWg.Done() - starts.Inc() - <-ctx.Done() - return nil - }) - - cfg := controllerConfig{ - mockConfig{ - NameFunc: func() string { return mockIntegrationName }, - ConfigEqualsFunc: func(Config) bool { return !changed }, - ApplyDefaultsFunc: func(g Globals) error { return nil }, - IdentifierFunc: func(Globals) (string, error) { - return mockIntegrationName, nil - }, - NewIntegrationFunc: func(log.Logger, Globals) (Integration, error) { - integrationsWg.Add(1) - return mockIntegration, nil - }, - }, - } - - globals := Globals{} - ctrl, err := newController(util.TestLogger(t), cfg, globals) - require.NoError(t, err, "failed to create controller") - - sc := newSyncController(t, ctrl) - require.NoError(t, sc.UpdateController(cfg, globals), "failed to re-apply config") - - // Wait for our integrations to have been started - integrationsWg.Wait() - - sc.Stop() - return starts.Load() - } - - t.Run("Unchanged", func(t *testing.T) { - starts := tc(t, false) - require.Equal(t, uint64(1), starts, "integration should only have started exactly once") - }) - - t.Run("Changed", func(t *testing.T) { - starts := tc(t, true) - require.Equal(t, uint64(2), starts, "integration should have started exactly twice") - }) -} - -func Test_controller_SingletonCheck(t *testing.T) { - var integrationsWg sync.WaitGroup - var starts atomic.Uint64 - - mockIntegration := FuncIntegration(func(ctx context.Context) error { - integrationsWg.Done() - starts.Inc() - <-ctx.Done() - return nil - }) - c1 := mockConfig{ - NameFunc: func() string { return mockIntegrationName }, - ConfigEqualsFunc: func(Config) bool { return true }, - ApplyDefaultsFunc: func(g Globals) error { return nil }, - IdentifierFunc: func(Globals) (string, error) { - return mockIntegrationName, nil - }, - NewIntegrationFunc: func(log.Logger, Globals) (Integration, error) { - integrationsWg.Add(1) - return mockIntegration, nil - }, - } - configMap := make(map[Config]Type) - configMap[&c1] = TypeSingleton - setRegistered(t, configMap) - cfg := controllerConfig{ - c1, - c1, - } - - globals := Globals{} - _, err := newController(util.TestLogger(t), cfg, globals) - require.Error(t, err) - require.True(t, strings.Contains(err.Error(), `integration "mock" may only be defined once`)) -} - -type syncController struct { - inner *controller - pool *workerPool -} - -// newSyncController pairs an unstarted controller with a manually managed -// worker pool to synchronously apply integrations. -func newSyncController(t *testing.T, inner *controller) *syncController { - t.Helper() - - sc := &syncController{ - inner: inner, - pool: newWorkerPool(context.Background(), inner.logger), - } - - // There's always immediately one queued integration set from any - // successfully created controller. - sc.refresh() - return sc -} - -func (sc *syncController) refresh() { - sc.inner.mut.Lock() - defer sc.inner.mut.Unlock() - - newIntegrations := <-sc.inner.runIntegrations - sc.pool.Reload(newIntegrations) - sc.inner.integrations = newIntegrations -} - -func (sc *syncController) UpdateController(c controllerConfig, g Globals) error { - err := sc.inner.UpdateController(c, g) - if err != nil { - return err - } - sc.refresh() - return nil -} - -func (sc *syncController) Stop() { - sc.pool.Close() -} - -const mockIntegrationName = "mock" - -type mockConfig struct { - NameFunc func() string - ApplyDefaultsFunc func(Globals) error - ConfigEqualsFunc func(Config) bool - IdentifierFunc func(Globals) (string, error) - NewIntegrationFunc func(log.Logger, Globals) (Integration, error) -} - -func (mc mockConfig) Name() string { - return mc.NameFunc() -} - -func (mc mockConfig) ConfigEquals(c Config) bool { - if mc.ConfigEqualsFunc != nil { - return mc.ConfigEqualsFunc(c) - } - return false -} - -func (mc mockConfig) ApplyDefaults(g Globals) error { - return mc.ApplyDefaultsFunc(g) -} - -func (mc mockConfig) Identifier(g Globals) (string, error) { - return mc.IdentifierFunc(g) -} - -func (mc mockConfig) NewIntegration(l log.Logger, g Globals) (Integration, error) { - return mc.NewIntegrationFunc(l, g) -} - -func (mc mockConfig) WithNewIntegrationFunc(f func(log.Logger, Globals) (Integration, error)) mockConfig { - return mockConfig{ - NameFunc: mc.NameFunc, - ApplyDefaultsFunc: mc.ApplyDefaultsFunc, - ConfigEqualsFunc: mc.ConfigEqualsFunc, - IdentifierFunc: mc.IdentifierFunc, - NewIntegrationFunc: f, - } -} - -func mockConfigNameTuple(t *testing.T, name, id string) mockConfig { - t.Helper() - - return mockConfig{ - NameFunc: func() string { return name }, - IdentifierFunc: func(_ Globals) (string, error) { return id, nil }, - ApplyDefaultsFunc: func(g Globals) error { return nil }, - NewIntegrationFunc: func(log.Logger, Globals) (Integration, error) { - return NoOpIntegration, nil - }, - } -} - -// mockConfigForIntegration returns a Config that will always return i. -func mockConfigForIntegration(t *testing.T, i Integration) mockConfig { - t.Helper() - - return mockConfig{ - NameFunc: func() string { return mockIntegrationName }, - ApplyDefaultsFunc: func(g Globals) error { return nil }, - IdentifierFunc: func(Globals) (string, error) { - return mockIntegrationName, nil - }, - NewIntegrationFunc: func(log.Logger, Globals) (Integration, error) { - return i, nil - }, - } -} diff --git a/internal/static/integrations/v2/controller_updateintegration_test.go b/internal/static/integrations/v2/controller_updateintegration_test.go deleted file mode 100644 index 4f0940ed6d..0000000000 --- a/internal/static/integrations/v2/controller_updateintegration_test.go +++ /dev/null @@ -1,79 +0,0 @@ -package integrations - -import ( - "context" - "sync" - "testing" - - "github.com/go-kit/log" - "github.com/grafana/agent/internal/util" - "github.com/stretchr/testify/require" - "go.uber.org/atomic" -) - -// -// Tests for controller's utilization of the UpdateIntegration interface. -// - -// Test_controller_UpdateIntegration ensures that the controller will call -// UpdateIntegration for integrations that support it. -func Test_controller_UpdateIntegration(t *testing.T) { - var ( - integrationStartWg sync.WaitGroup - applies, starts atomic.Uint64 - ) - - mockIntegration := mockUpdateIntegration{ - Integration: FuncIntegration(func(ctx context.Context) error { - starts.Inc() - integrationStartWg.Done() - <-ctx.Done() - return nil - }), - ApplyConfigFunc: func(Config, Globals) error { - applies.Inc() - return nil - }, - } - - cfg := controllerConfig{ - mockConfig{ - NameFunc: func() string { return mockIntegrationName }, - ConfigEqualsFunc: func(Config) bool { return false }, - ApplyDefaultsFunc: func(g Globals) error { return nil }, - IdentifierFunc: func(Globals) (string, error) { - return mockIntegrationName, nil - }, - NewIntegrationFunc: func(log.Logger, Globals) (Integration, error) { - integrationStartWg.Add(1) - return mockIntegration, nil - }, - }, - } - - ctrl, err := newController(util.TestLogger(t), cfg, Globals{}) - require.NoError(t, err, "failed to create controller") - - sc := newSyncController(t, ctrl) - - // Wait for our integration to start. - integrationStartWg.Wait() - - // Try to apply again. - require.NoError(t, sc.UpdateController(cfg, ctrl.globals), "failed to re-apply config") - integrationStartWg.Wait() - - sc.Stop() - - require.Equal(t, uint64(1), applies.Load(), "dynamic reload should have occurred") - require.Equal(t, uint64(1), starts.Load(), "restart should not have occurred") -} - -type mockUpdateIntegration struct { - Integration - ApplyConfigFunc func(Config, Globals) error -} - -func (m mockUpdateIntegration) ApplyConfig(c Config, g Globals) error { - return m.ApplyConfigFunc(c, g) -} diff --git a/internal/static/integrations/v2/eventhandler/eventhandler.go b/internal/static/integrations/v2/eventhandler/eventhandler.go deleted file mode 100644 index efb94e206d..0000000000 --- a/internal/static/integrations/v2/eventhandler/eventhandler.go +++ /dev/null @@ -1,472 +0,0 @@ -// Package eventhandler watches for Kubernetes Event objects and hands them off to -// Agent's Logs subsystem (embedded promtail) -package eventhandler - -import ( - "context" - "encoding/json" - "fmt" - "os" - "path/filepath" - "strings" - "sync" - "time" - - v1 "k8s.io/api/core/v1" - "k8s.io/client-go/informers" - "k8s.io/client-go/kubernetes" - "k8s.io/client-go/rest" - "k8s.io/client-go/tools/cache" - "k8s.io/client-go/tools/clientcmd" - "k8s.io/client-go/util/homedir" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/grafana/agent/internal/static/integrations/v2" - "github.com/grafana/agent/internal/static/logs" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/pkg/logproto" - "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/model/labels" -) - -const ( - cacheFileMode = 0600 - logFormatJson = "json" - logFormatFmt = "logfmt" -) - -// EventHandler watches for Kubernetes Event objects and hands them off to -// Agent's logs subsystem (embedded promtail). -type EventHandler struct { - LogsClient *logs.Logs - LogsInstance string - Log log.Logger - CachePath string - LastEvent *ShippedEvents - InitEvent *ShippedEvents - EventInformer cache.SharedIndexInformer - SendTimeout time.Duration - ticker *time.Ticker - instance string - extraLabels labels.Labels - logFormat string - sync.Mutex -} - -// ShippedEvents stores a timestamp and map of event ResourceVersions shipped for that timestamp. -// Used to avoid double-shipping events upon restart. -type ShippedEvents struct { - // shipped event's timestamp - Timestamp time.Time `json:"ts"` - // map of event RVs (resource versions) already "shipped" (handed off) for this timestamp. - // this is to handle the case of a timestamp having multiple events, - // which happens quite frequently. - RvMap map[string]struct{} `json:"resourceVersion"` -} - -func newEventHandler(l log.Logger, globals integrations.Globals, c *Config) (integrations.Integration, error) { - var ( - config *rest.Config - err error - factory informers.SharedInformerFactory - id string - ) - - // Try using KubeconfigPath or inClusterConfig - config, err = clientcmd.BuildConfigFromFlags("", c.KubeconfigPath) - if err != nil { - level.Error(l).Log("msg", "Loading from KubeconfigPath or inClusterConfig failed", "err", err) - // Trying default home location - if home := homedir.HomeDir(); home != "" { - kubeconfigPath := filepath.Join(home, ".kube", "config") - config, err = clientcmd.BuildConfigFromFlags("", kubeconfigPath) - if err != nil { - level.Error(l).Log("msg", "Could not load a kubeconfig", "err", err) - return nil, err - } - } else { - err = fmt.Errorf("could not load a kubeconfig") - return nil, err - } - } - - clientset, err := kubernetes.NewForConfig(config) - if err != nil { - return nil, err - } - - // get an informer - if c.Namespace == "" { - factory = informers.NewSharedInformerFactory(clientset, time.Duration(c.InformerResync)*time.Second) - } else { - factory = informers.NewSharedInformerFactoryWithOptions(clientset, time.Duration(c.InformerResync)*time.Second, informers.WithNamespace(c.Namespace)) - } - - eventInformer := factory.Core().V1().Events().Informer() - id, _ = c.Identifier(globals) - - eh := &EventHandler{ - LogsClient: globals.Logs, - LogsInstance: c.LogsInstance, - Log: l, - CachePath: c.CachePath, - EventInformer: eventInformer, - SendTimeout: time.Duration(c.SendTimeout) * time.Second, - instance: id, - extraLabels: c.ExtraLabels, - logFormat: c.LogFormat, - } - // set the resource handler fns - if err := eh.initInformer(eventInformer); err != nil { - return nil, err - } - eh.ticker = time.NewTicker(time.Duration(c.FlushInterval) * time.Second) - return eh, nil -} - -// Initialize informer by setting event handler fns -func (eh *EventHandler) initInformer(eventsInformer cache.SharedIndexInformer) error { - _, err := eventsInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ - AddFunc: eh.addEvent, - UpdateFunc: eh.updateEvent, - DeleteFunc: eh.deleteEvent, - }) - return err -} - -// Handles new event objects -func (eh *EventHandler) addEvent(obj interface{}) { - event, _ := obj.(*v1.Event) - - err := eh.handleEvent(event) - if err != nil { - level.Error(eh.Log).Log("msg", "Error handling event", "err", err, "event", event) - } -} - -// Handles event object updates. Note that this get triggered on informer resyncs and also -// events occurring more than once (in which case .count is incremented) -func (eh *EventHandler) updateEvent(objOld interface{}, objNew interface{}) { - eOld, _ := objOld.(*v1.Event) - eNew, _ := objNew.(*v1.Event) - - if eOld.GetResourceVersion() == eNew.GetResourceVersion() { - // ignore resync updates - level.Debug(eh.Log).Log("msg", "Event RV didn't change, ignoring", "eRV", eNew.ResourceVersion) - return - } - - err := eh.handleEvent(eNew) - if err != nil { - level.Error(eh.Log).Log("msg", "Error handling event", "err", err, "event", eNew) - } -} - -func (eh *EventHandler) handleEvent(event *v1.Event) error { - eventTs := getTimestamp(event) - - // if event is older than the one stored in cache on startup, we've shipped it - if eventTs.Before(eh.InitEvent.Timestamp) { - return nil - } - // if event is equal and is in map, we've shipped it - if eventTs.Equal(eh.InitEvent.Timestamp) { - if _, ok := eh.InitEvent.RvMap[event.ResourceVersion]; ok { - return nil - } - } - - labels, msg, err := eh.extractEvent(event) - if err != nil { - return err - } - - entry := newEntry(msg, eventTs, labels) - ok := eh.LogsClient.Instance(eh.LogsInstance).SendEntry(entry, eh.SendTimeout) - if !ok { - err = fmt.Errorf("msg=%s entry=%s", "error handing entry off to promtail", entry) - return err - } - - // update cache with new "last" event - err = eh.updateLastEvent(event, eventTs) - if err != nil { - return err - } - return nil -} - -// Called when event objects are removed from etcd, can safely ignore this -func (eh *EventHandler) deleteEvent(obj interface{}) { -} - -// extract data from event fields and create labels, etc. -// TODO: ship JSON blobs and allow users to configure using pipelines etc. -// instead of hardcoding labels here -func (eh *EventHandler) extractEvent(event *v1.Event) (model.LabelSet, string, error) { - var ( - msg strings.Builder - fields = make(map[string]any) - labels = make(model.LabelSet) - appender = appendTextMsg - ) - - if eh.logFormat == logFormatJson { - appender = appendJsonMsg - } - - obj := event.InvolvedObject - if obj.Name == "" { - return nil, "", fmt.Errorf("no involved object for event") - } - appender(&msg, fields, "name", obj.Name, "%s") - - labels[model.LabelName("namespace")] = model.LabelValue(obj.Namespace) - // TODO(hjet) omit "kubernetes" - labels[model.LabelName("job")] = model.LabelValue("integrations/kubernetes/eventhandler") - labels[model.LabelName("instance")] = model.LabelValue(eh.instance) - labels[model.LabelName("agent_hostname")] = model.LabelValue(eh.instance) - for _, lbl := range eh.extraLabels { - labels[model.LabelName(lbl.Name)] = model.LabelValue(lbl.Value) - } - - // we add these fields to the log line to reduce label bloat and cardinality - if obj.Kind != "" { - appender(&msg, fields, "kind", obj.Kind, "%s") - } - if event.Action != "" { - appender(&msg, fields, "action", event.Action, "%s") - } - if obj.APIVersion != "" { - appender(&msg, fields, "objectAPIversion", obj.APIVersion, "%s") - } - if obj.ResourceVersion != "" { - appender(&msg, fields, "objectRV", obj.ResourceVersion, "%s") - } - if event.ResourceVersion != "" { - appender(&msg, fields, "eventRV", event.ResourceVersion, "%s") - } - if event.ReportingInstance != "" { - appender(&msg, fields, "reportinginstance", event.ReportingInstance, "%s") - } - if event.ReportingController != "" { - appender(&msg, fields, "reportingcontroller", event.ReportingController, "%s") - } - if event.Source.Component != "" { - appender(&msg, fields, "sourcecomponent", event.Source.Component, "%s") - } - if event.Source.Host != "" { - appender(&msg, fields, "sourcehost", event.Source.Host, "%s") - } - if event.Reason != "" { - appender(&msg, fields, "reason", event.Reason, "%s") - } - if event.Type != "" { - appender(&msg, fields, "type", event.Type, "%s") - } - if event.Count != 0 { - appender(&msg, fields, "count", event.Count, "%d") - } - - appender(&msg, fields, "msg", event.Message, "%q") - - if eh.logFormat == logFormatJson { - bb, err := json.Marshal(fields) - if err != nil { - return nil, "", fmt.Errorf("failed to marshal Event to JSON: %w", err) - } - msg.WriteString(string(bb)) - } - - return labels, strings.TrimSpace(msg.String()), nil -} - -// Appends the "fields" map with an entry for the provided event field -// Signatures of "appendJsonMsg" and "appendTextMsg" must match -func appendJsonMsg(msg *strings.Builder, fields map[string]any, key string, value any, format string) { - fields[key] = value -} - -// Appends the message builder with the provided event field -// Signatures of "appendJsonMsg" and "appendTextMsg" must match -func appendTextMsg(msg *strings.Builder, fields map[string]any, key string, value any, format string) { - msg.WriteString(key) - msg.WriteByte('=') - msg.WriteString(fmt.Sprintf(format, value)) - msg.WriteByte(' ') -} - -func getTimestamp(event *v1.Event) time.Time { - if !event.LastTimestamp.IsZero() { - return event.LastTimestamp.Time - } - return event.EventTime.Time -} - -func newEntry(msg string, ts time.Time, labels model.LabelSet) api.Entry { - entry := logproto.Entry{Timestamp: ts, Line: msg} - return api.Entry{Labels: labels, Entry: entry} -} - -// maintain "last event" state -func (eh *EventHandler) updateLastEvent(e *v1.Event, eventTs time.Time) error { - eh.Lock() - defer eh.Unlock() - - eventRv := e.ResourceVersion - - if eh.LastEvent == nil { - // startup - eh.LastEvent = &ShippedEvents{Timestamp: eventTs, RvMap: make(map[string]struct{})} - eh.LastEvent.RvMap[eventRv] = struct{}{} - return nil - } - - // if timestamp is the same, add to map - if eh.LastEvent != nil && eventTs.Equal(eh.LastEvent.Timestamp) { - eh.LastEvent.RvMap[eventRv] = struct{}{} - return nil - } - - // if timestamp is different, create a new ShippedEvents struct - eh.LastEvent = &ShippedEvents{Timestamp: eventTs, RvMap: make(map[string]struct{})} - eh.LastEvent.RvMap[eventRv] = struct{}{} - return nil -} - -func (eh *EventHandler) writeOutLastEvent() error { - level.Info(eh.Log).Log("msg", "Flushing last event to disk") - - eh.Lock() - defer eh.Unlock() - - if eh.LastEvent == nil { - level.Info(eh.Log).Log("msg", "No last event to flush, returning") - return nil - } - - temp := eh.CachePath + "-new" - buf, err := json.Marshal(&eh.LastEvent) - if err != nil { - return err - } - - err = os.WriteFile(temp, buf, os.FileMode(cacheFileMode)) - if err != nil { - return err - } - - if err = os.Rename(temp, eh.CachePath); err != nil { - return err - } - level.Info(eh.Log).Log("msg", "Flushed last event to disk") - return nil -} - -// RunIntegration runs the eventhandler integration -func (eh *EventHandler) RunIntegration(ctx context.Context) error { - var wg sync.WaitGroup - - ctx, cancel := context.WithCancel(ctx) - defer cancel() - - // Quick check to make sure logs instance exists - if i := eh.LogsClient.Instance(eh.LogsInstance); i == nil { - level.Error(eh.Log).Log("msg", "Logs instance not configured", "instance", eh.LogsInstance) - cancel() - } - - cacheDir := filepath.Dir(eh.CachePath) - if err := os.MkdirAll(cacheDir, 0755); err != nil { - level.Error(eh.Log).Log("msg", "Failed to create cache dir", "err", err) - cancel() - } - - // cache file to store events shipped (prevents double shipping on restart) - cacheFile, err := os.OpenFile(eh.CachePath, os.O_RDWR|os.O_CREATE, cacheFileMode) - if err != nil { - level.Error(eh.Log).Log("msg", "Failed to open or create cache file", "err", err) - cancel() - } - - // attempt to read last timestamp from cache file into a ShippedEvents struct - initEvent, err := readInitEvent(cacheFile, eh.Log) - if err != nil { - level.Error(eh.Log).Log("msg", "Failed to read last event from cache file", "err", err) - cancel() - } - eh.InitEvent = initEvent - - if err = cacheFile.Close(); err != nil { - level.Error(eh.Log).Log("msg", "Failed to close cache file", "err", err) - cancel() - } - - go func() { - level.Info(eh.Log).Log("msg", "Waiting for cache to sync (initial List of events)") - isSynced := cache.WaitForCacheSync(ctx.Done(), eh.EventInformer.HasSynced) - if !isSynced { - level.Error(eh.Log).Log("msg", "Failed to sync informer cache") - // maybe want to bail here - return - } - level.Info(eh.Log).Log("msg", "Informer cache synced") - }() - - // start the informer - // technically we should prob use the factory here, but since we - // only have one informer atm, this likely doesn't matter - go eh.EventInformer.Run(ctx.Done()) - - // wait for last event to flush before returning - wg.Add(1) - go func() { - defer wg.Done() - eh.runTicker(ctx.Done()) - }() - wg.Wait() - - return nil -} - -// write out last event every FlushInterval -func (eh *EventHandler) runTicker(stopCh <-chan struct{}) { - for { - select { - case <-stopCh: - if err := eh.writeOutLastEvent(); err != nil { - level.Error(eh.Log).Log("msg", "Failed to flush last event", "err", err) - } - return - case <-eh.ticker.C: - if err := eh.writeOutLastEvent(); err != nil { - level.Error(eh.Log).Log("msg", "Failed to flush last event", "err", err) - } - } - } -} - -func readInitEvent(file *os.File, logger log.Logger) (*ShippedEvents, error) { - var ( - initEvent = new(ShippedEvents) - ) - - stat, err := file.Stat() - if err != nil { - return nil, err - } - if stat.Size() == 0 { - level.Info(logger).Log("msg", "Cache file empty, setting zero-valued initEvent") - return initEvent, nil - } - - dec := json.NewDecoder(file) - err = dec.Decode(&initEvent) - if err != nil { - err = fmt.Errorf("could not read init event from cache: %s. Please delete the cache file", err) - return nil, err - } - level.Info(logger).Log("msg", "Loaded init event from cache file", "initEventTime", initEvent.Timestamp) - return initEvent, nil -} diff --git a/internal/static/integrations/v2/eventhandler/eventhandler_test.go b/internal/static/integrations/v2/eventhandler/eventhandler_test.go deleted file mode 100644 index 9d6e08a32a..0000000000 --- a/internal/static/integrations/v2/eventhandler/eventhandler_test.go +++ /dev/null @@ -1,54 +0,0 @@ -package eventhandler - -import ( - "os" - "testing" - "time" - - v1 "k8s.io/api/core/v1" - - "github.com/go-kit/log" - "github.com/stretchr/testify/require" -) - -func TestCacheLoad(t *testing.T) { - l := log.NewNopLogger() - testTime, _ := time.Parse(time.RFC3339, "2022-01-26T13:39:40-05:00") - expectedEvents := &ShippedEvents{ - Timestamp: testTime, - RvMap: map[string]struct{}{"58588": {}}, - } - cacheFile, err := os.OpenFile("testdata/eventhandler.cache", os.O_RDWR|os.O_CREATE, cacheFileMode) - require.NoError(t, err, "Failed to open test eventhandler cache file") - actualEvents, err := readInitEvent(cacheFile, l) - require.NoError(t, err, "Failed to parse last event from eventhandler cache file") - require.Equal(t, expectedEvents, actualEvents) -} - -func TestExtractEventJson(t *testing.T) { - var eh = new(EventHandler) - eh.logFormat = logFormatJson - var event = new(v1.Event) - event.InvolvedObject = v1.ObjectReference{ - Name: "test-object", - } - event.Message = "Event Message" - - _, msg, err := eh.extractEvent(event) - require.NoError(t, err, "Failed to extract test event") - require.Equal(t, "{\"msg\":\"Event Message\",\"name\":\"test-object\"}", msg) -} - -func TestExtractEventText(t *testing.T) { - var eh = new(EventHandler) - eh.logFormat = "logfmt" - var event = new(v1.Event) - event.InvolvedObject = v1.ObjectReference{ - Name: "test-object", - } - event.Message = "Event Message" - - _, msg, err := eh.extractEvent(event) - require.NoError(t, err, "Failed to extract test event") - require.Equal(t, "name=test-object msg=\"Event Message\"", msg) -} diff --git a/internal/static/integrations/v2/eventhandler/integration.go b/internal/static/integrations/v2/eventhandler/integration.go index caba0084e2..4453aeefce 100644 --- a/internal/static/integrations/v2/eventhandler/integration.go +++ b/internal/static/integrations/v2/eventhandler/integration.go @@ -1,6 +1,8 @@ package eventhandler import ( + "context" + "github.com/go-kit/log" "github.com/grafana/agent/internal/static/integrations/v2" "github.com/prometheus/prometheus/model/labels" @@ -13,7 +15,7 @@ var DefaultConfig = Config{ LogsInstance: "default", InformerResync: 120, FlushInterval: 10, - LogFormat: logFormatFmt, + LogFormat: "logfmt", } // Config configures the eventhandler integration @@ -71,9 +73,20 @@ func (c *Config) Identifier(globals integrations.Globals) (string, error) { // NewIntegration converts this config into an instance of an integration. func (c *Config) NewIntegration(l log.Logger, globals integrations.Globals) (integrations.Integration, error) { - return newEventHandler(l, globals, c) + // NOTE(rfratto): the eventhandler integration is never run, and all the + // logic has been moved to the loki.source.kubernetes_events component. + // + // This function is never called, but still exists for config conversion. + return stubIntegration{}, nil } func init() { integrations.Register(&Config{}, integrations.TypeSingleton) } + +type stubIntegration struct{} + +func (stubIntegration) RunIntegration(ctx context.Context) error { + <-ctx.Done() + return nil +} diff --git a/internal/static/integrations/v2/eventhandler/testdata/eventhandler.cache b/internal/static/integrations/v2/eventhandler/testdata/eventhandler.cache deleted file mode 100644 index dc5814de08..0000000000 --- a/internal/static/integrations/v2/eventhandler/testdata/eventhandler.cache +++ /dev/null @@ -1 +0,0 @@ -{"ts":"2022-01-26T13:39:40-05:00","resourceVersion":{"58588":{}}} \ No newline at end of file diff --git a/internal/static/integrations/v2/integrations.go b/internal/static/integrations/v2/integrations.go index 1896280e3f..3c5ba8d3ab 100644 --- a/internal/static/integrations/v2/integrations.go +++ b/internal/static/integrations/v2/integrations.go @@ -26,10 +26,7 @@ import ( "github.com/go-kit/log" "github.com/grafana/agent/internal/static/integrations/v2/autoscrape" - "github.com/grafana/agent/internal/static/logs" - "github.com/grafana/agent/internal/static/metrics" "github.com/grafana/agent/internal/static/server" - "github.com/grafana/agent/internal/static/traces" "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/targetgroup" ) @@ -84,14 +81,6 @@ type Globals struct { // TODO(rfratto): flag to override identifier at agent level? AgentIdentifier string - // Some integrations may wish to interact with various subsystems for their - // implementation if the desired behavior is not supported natively by the - // integration manager. - - Metrics *metrics.Agent // Metrics subsystem - Logs *logs.Logs // Logs subsystem - Tracing *traces.Traces // Traces subsystem - // Options the integrations subsystem is using. SubsystemOpts SubsystemOptions // BaseURL to use to invoke methods against the embedded HTTP server. diff --git a/internal/static/integrations/v2/subsystem.go b/internal/static/integrations/v2/subsystem.go index ca128a1926..ce501b37c6 100644 --- a/internal/static/integrations/v2/subsystem.go +++ b/internal/static/integrations/v2/subsystem.go @@ -1,19 +1,8 @@ package integrations import ( - "context" - "encoding/json" - "fmt" - "net/http" - "sync" - "time" - - "github.com/go-kit/log" - "github.com/gorilla/mux" "github.com/grafana/agent/internal/static/integrations/v2/autoscrape" "github.com/grafana/agent/internal/static/metrics" - "github.com/prometheus/common/model" - http_sd "github.com/prometheus/prometheus/discovery/http" ) const ( @@ -76,172 +65,3 @@ func (o *SubsystemOptions) UnmarshalYAML(unmarshal func(interface{}) error) erro *o = DefaultSubsystemOptions return UnmarshalYAML(o, unmarshal) } - -// Subsystem runs the integrations subsystem, managing a set of integrations. -type Subsystem struct { - logger log.Logger - - mut sync.RWMutex - globals Globals - apiHandler http.Handler // generated from controller - autoscraper *autoscrape.Scraper - - ctrl *controller - stopController context.CancelFunc - controllerExited chan struct{} -} - -// NewSubsystem creates and starts a new integrations Subsystem. Every field in -// IntegrationOptions must be filled out. -func NewSubsystem(l log.Logger, globals Globals) (*Subsystem, error) { - autoscraper := autoscrape.NewScraper(l, globals.Metrics.InstanceManager(), globals.DialContextFunc) - - l = log.With(l, "component", "integrations") - - ctrl, err := newController(l, controllerConfig(globals.SubsystemOpts.Configs), globals) - if err != nil { - autoscraper.Stop() - return nil, err - } - - ctx, cancel := context.WithCancel(context.Background()) - - ctrlExited := make(chan struct{}) - go func() { - ctrl.run(ctx) - close(ctrlExited) - }() - - s := &Subsystem{ - logger: l, - - globals: globals, - autoscraper: autoscraper, - - ctrl: ctrl, - stopController: cancel, - controllerExited: ctrlExited, - } - if err := s.ApplyConfig(globals); err != nil { - cancel() - autoscraper.Stop() - return nil, err - } - return s, nil -} - -// ApplyConfig updates the configuration of the integrations subsystem. -func (s *Subsystem) ApplyConfig(globals Globals) error { - const prefix = "/integrations/" - - s.mut.Lock() - defer s.mut.Unlock() - - if err := s.ctrl.UpdateController(controllerConfig(globals.SubsystemOpts.Configs), globals); err != nil { - return fmt.Errorf("error applying integrations: %w", err) - } - - var firstErr error - saveFirstErr := func(err error) { - if firstErr == nil { - firstErr = err - } - } - - // Set up HTTP wiring - { - handler, err := s.ctrl.Handler(prefix) - if err != nil { - saveFirstErr(fmt.Errorf("HTTP handler update failed: %w", err)) - } - s.apiHandler = handler - } - - // Set up self-scraping - { - httpSDConfig := http_sd.DefaultSDConfig - httpSDConfig.RefreshInterval = model.Duration(time.Second * 5) // TODO(rfratto): make configurable? - - apiURL := globals.CloneAgentBaseURL() - apiURL.Path = IntegrationsSDEndpoint - httpSDConfig.URL = apiURL.String() - - scrapeConfigs := s.ctrl.ScrapeConfigs(prefix, &httpSDConfig) - if err := s.autoscraper.ApplyConfig(scrapeConfigs); err != nil { - saveFirstErr(fmt.Errorf("configuring autoscraper failed: %w", err)) - } - } - - s.globals = globals - return firstErr -} - -// WireAPI hooks up integration endpoints to r. -func (s *Subsystem) WireAPI(r *mux.Router) { - const prefix = "/integrations" - r.PathPrefix(prefix).HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { - s.mut.RLock() - handler := s.apiHandler - s.mut.RUnlock() - - if handler == nil { - rw.WriteHeader(http.StatusServiceUnavailable) - fmt.Fprintf(rw, "Integrations HTTP endpoints not yet available") - return - } - handler.ServeHTTP(rw, r) - }) - - r.HandleFunc(IntegrationsSDEndpoint, func(rw http.ResponseWriter, r *http.Request) { - targetOptions, err := TargetOptionsFromParams(r.URL.Query()) - if err != nil { - http.Error(rw, fmt.Sprintf("invalid query parameters: %s", err), http.StatusBadRequest) - return - } - - rw.Header().Set("Content-Type", "application/json") - rw.WriteHeader(http.StatusOK) - - tgs := s.ctrl.Targets(Endpoint{ - Host: r.Host, - Prefix: prefix, - }, targetOptions) - - // Normalize targets. We may have targets in the group with non-address - // labels. These need to be retained, so we'll just split everything up - // into multiple groups. - // - // TODO(rfratto): optimize to remove redundant groups - finalTgs := []*targetGroup{} - for _, group := range tgs { - for _, target := range group.Targets { - // Create the final labels for the group. This will be everything from - // the group and the target (except for model.AddressLabel). Labels - // from target take precedence labels from over group. - groupLabels := group.Labels.Merge(target) - delete(groupLabels, model.AddressLabel) - - finalTgs = append(finalTgs, &targetGroup{ - Targets: []model.LabelSet{{model.AddressLabel: target[model.AddressLabel]}}, - Labels: groupLabels, - }) - } - } - - enc := json.NewEncoder(rw) - _ = enc.Encode(finalTgs) - }) - - r.HandleFunc(IntegrationsAutoscrapeTargetsEndpoint, func(rw http.ResponseWriter, r *http.Request) { - allTargets := s.autoscraper.TargetsActive() - metrics.ListTargetsHandler(allTargets).ServeHTTP(rw, r) - }) -} - -// Stop stops the manager and all running integrations. Blocks until all -// running integrations exit. -func (s *Subsystem) Stop() { - s.autoscraper.Stop() - s.stopController() - <-s.controllerExited -} diff --git a/internal/static/integrations/v2/targetgroup.go b/internal/static/integrations/v2/targetgroup.go deleted file mode 100644 index 4400105c4f..0000000000 --- a/internal/static/integrations/v2/targetgroup.go +++ /dev/null @@ -1,28 +0,0 @@ -package integrations - -import ( - "encoding/json" - - "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/discovery/targetgroup" -) - -// targetGroup implements json.Marshaler for targetgroup.Group. This is -// required do to an issue with Prometheus: HTTP SD expects to be unmarshaled -// as JSON, but the form it expects to unmarshal the target groups in is not the form -// it marshals out to JSON as. -type targetGroup targetgroup.Group - -func (tg *targetGroup) MarshalJSON() ([]byte, error) { - g := &struct { - Targets []string `json:"targets"` - Labels model.LabelSet `json:"labels,omitempty"` - }{ - Targets: make([]string, 0, len(tg.Targets)), - Labels: tg.Labels, - } - for _, t := range tg.Targets { - g.Targets = append(g.Targets, string(t[model.AddressLabel])) - } - return json.Marshal(g) -} diff --git a/internal/static/integrations/v2/workers.go b/internal/static/integrations/v2/workers.go deleted file mode 100644 index 4315710c49..0000000000 --- a/internal/static/integrations/v2/workers.go +++ /dev/null @@ -1,122 +0,0 @@ -package integrations - -import ( - "context" - "sync" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" -) - -type workerPool struct { - log log.Logger - parentCtx context.Context - - mut sync.Mutex - workers map[*controlledIntegration]worker - - runningWorkers sync.WaitGroup -} - -type worker struct { - ci *controlledIntegration - stop context.CancelFunc - exited chan struct{} -} - -func newWorkerPool(ctx context.Context, l log.Logger) *workerPool { - return &workerPool{ - log: l, - parentCtx: ctx, - - workers: make(map[*controlledIntegration]worker), - } -} - -func (p *workerPool) Reload(newIntegrations []*controlledIntegration) { - p.mut.Lock() - defer p.mut.Unlock() - - level.Debug(p.log).Log("msg", "updating running integrations", "prev_count", len(p.workers), "new_count", len(newIntegrations)) - - // Shut down workers whose integrations have gone away. - var stopped []worker - for ci, w := range p.workers { - var found bool - for _, current := range newIntegrations { - if ci == current { - found = true - break - } - } - if !found { - w.stop() - stopped = append(stopped, w) - } - } - for _, w := range stopped { - // Wait for stopped integrations to fully exit. We do this in a separate - // loop so context cancellations can be handled simultaneously, allowing - // the wait to complete faster. - <-w.exited - } - - // Spawn new workers for integrations that don't have them. - for _, current := range newIntegrations { - if _, workerExists := p.workers[current]; workerExists { - continue - } - // This integration doesn't have an existing worker; schedule a new one. - p.scheduleWorker(current) - } -} - -func (p *workerPool) Close() { - p.mut.Lock() - defer p.mut.Unlock() - - level.Debug(p.log).Log("msg", "stopping all integrations") - - defer p.runningWorkers.Wait() - for _, w := range p.workers { - w.stop() - } -} - -func (p *workerPool) scheduleWorker(ci *controlledIntegration) { - p.runningWorkers.Add(1) - - ctx, cancel := context.WithCancel(p.parentCtx) - - w := worker{ - ci: ci, - stop: cancel, - exited: make(chan struct{}), - } - p.workers[ci] = w - - go func() { - ci.running.Store(true) - - // When the integration stops running, we want to free any of our - // resources that will notify watchers waiting for the worker to stop. - // - // Afterwards, we'll block until we remove ourselves from the map; having - // a worker remove itself on shutdown allows exited integrations to - // re-start when the config is reloaded. - defer func() { - ci.running.Store(false) - close(w.exited) - p.runningWorkers.Done() - - p.mut.Lock() - defer p.mut.Unlock() - delete(p.workers, ci) - }() - - err := ci.i.RunIntegration(ctx) - if err != nil { - level.Error(p.log).Log("msg", "integration exited with error", "id", ci.id, "err", err) - } - }() -} diff --git a/internal/static/logs/http.go b/internal/static/logs/http.go deleted file mode 100644 index b3e7a00d88..0000000000 --- a/internal/static/logs/http.go +++ /dev/null @@ -1,84 +0,0 @@ -package logs - -import ( - "net/http" - "sort" - - "github.com/go-kit/log/level" - "github.com/gorilla/mux" - "github.com/grafana/agent/internal/static/metrics/cluster/configapi" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" - "github.com/prometheus/common/model" -) - -// WireAPI adds API routes to the provided mux router. -func (l *Logs) WireAPI(r *mux.Router) { - r.HandleFunc("/agent/api/v1/logs/instances", l.ListInstancesHandler).Methods("GET") - r.HandleFunc("/agent/api/v1/logs/targets", l.ListTargetsHandler).Methods("GET") -} - -// ListInstancesHandler writes the set of currently running instances to the http.ResponseWriter. -func (l *Logs) ListInstancesHandler(w http.ResponseWriter, _ *http.Request) { - instances := l.instances - instanceNames := make([]string, 0, len(instances)) - for instance := range instances { - instanceNames = append(instanceNames, instance) - } - sort.Strings(instanceNames) - - err := configapi.WriteResponse(w, http.StatusOK, instanceNames) - if err != nil { - level.Error(l.l).Log("msg", "failed to write response", "err", err) - } -} - -// ListTargetsHandler retrieves the full set of targets across all instances and shows -// information on them. -func (l *Logs) ListTargetsHandler(w http.ResponseWriter, r *http.Request) { - instances := l.instances - allTargets := make(map[string]TargetSet, len(instances)) - for instName, inst := range instances { - allTargets[instName] = inst.promtail.ActiveTargets() - } - listTargetsHandler(allTargets).ServeHTTP(w, r) -} - -func listTargetsHandler(targets map[string]TargetSet) http.Handler { - return http.HandlerFunc(func(rw http.ResponseWriter, _ *http.Request) { - resp := ListTargetsResponse{} - for instance, tset := range targets { - for key, targets := range tset { - for _, tgt := range targets { - resp = append(resp, TargetInfo{ - InstanceName: instance, - TargetGroup: key, - Type: tgt.Type(), - DiscoveredLabels: tgt.DiscoveredLabels(), - Labels: tgt.Labels(), - Ready: tgt.Ready(), - Details: tgt.Details(), - }) - } - } - } - _ = configapi.WriteResponse(rw, http.StatusOK, resp) - }) -} - -// TargetSet is a set of targets for an individual scraper. -type TargetSet map[string][]target.Target - -// ListTargetsResponse is returned by the ListTargetsHandler. -type ListTargetsResponse []TargetInfo - -// TargetInfo describes a specific target. -type TargetInfo struct { - InstanceName string `json:"instance"` - TargetGroup string `json:"target_group"` - - Type target.TargetType `json:"type"` - Labels model.LabelSet `json:"labels"` - DiscoveredLabels model.LabelSet `json:"discovered_labels"` - Ready bool `json:"ready"` - Details interface{} `json:"details"` -} diff --git a/internal/static/logs/http_test.go b/internal/static/logs/http_test.go deleted file mode 100644 index e37110f205..0000000000 --- a/internal/static/logs/http_test.go +++ /dev/null @@ -1,177 +0,0 @@ -package logs - -import ( - "net/http" - "net/http/httptest" - "strings" - "testing" - - "github.com/grafana/agent/internal/util" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/common/model" - "github.com/stretchr/testify/require" - "gopkg.in/yaml.v2" -) - -func TestAgent_ListInstancesHandler(t *testing.T) { - cfgText := util.Untab(` -configs: -- name: instance-a - positions: - filename: /tmp/positions.yaml - clients: - - url: http://127.0.0.1:80/loki/api/v1/push - `) - - var cfg Config - - logger := util.TestLogger(t) - l, err := New(prometheus.NewRegistry(), &cfg, logger, false) - require.NoError(t, err) - defer l.Stop() - - r := httptest.NewRequest("GET", "/agent/api/v1/logs/instances", nil) - - t.Run("no instances", func(t *testing.T) { - rr := httptest.NewRecorder() - l.ListInstancesHandler(rr, r) - expect := `{"status":"success","data":[]}` - require.Equal(t, expect, rr.Body.String()) - }) - - dec := yaml.NewDecoder(strings.NewReader(cfgText)) - dec.SetStrict(true) - require.NoError(t, dec.Decode(&cfg)) - t.Run("non-empty", func(t *testing.T) { - require.NoError(t, l.ApplyConfig(&cfg, false)) - - expect := `{"status":"success","data":["instance-a"]}` - - util.Eventually(t, func(t require.TestingT) { - rr := httptest.NewRecorder() - l.ListInstancesHandler(rr, r) - require.Equal(t, expect, rr.Body.String()) - }) - }) -} - -func TestAgent_ListTargetsHandler(t *testing.T) { - cfgText := util.Untab(` -configs: -- name: instance-a - positions: - filename: /tmp/positions.yaml - clients: - - url: http://127.0.0.1:80/loki/api/v1/push - `) - - var cfg Config - dec := yaml.NewDecoder(strings.NewReader(cfgText)) - dec.SetStrict(true) - require.NoError(t, dec.Decode(&cfg)) - - logger := util.TestLogger(t) - l, err := New(prometheus.NewRegistry(), &cfg, logger, false) - require.NoError(t, err) - defer l.Stop() - - r := httptest.NewRequest("GET", "/agent/api/v1/logs/targets", nil) - - t.Run("scrape manager not ready", func(t *testing.T) { - rr := httptest.NewRecorder() - l.ListTargetsHandler(rr, r) - expect := `{"status": "success", "data": []}` - require.JSONEq(t, expect, rr.Body.String()) - require.Equal(t, http.StatusOK, rr.Result().StatusCode) - }) - - t.Run("scrape manager targets", func(t *testing.T) { - rr := httptest.NewRecorder() - targets := map[string]TargetSet{ - "instance-a": mockActiveTargets(), - } - listTargetsHandler(targets).ServeHTTP(rr, r) - expect := `{ - "status": "success", - "data": [ - { - "instance": "instance-a", - "target_group": "varlogs", - "type": "File", - "labels": { - "job": "varlogs" - }, - "discovered_labels": { - "__address__": "localhost", - "__path__": "/var/log/*log", - "job": "varlogs" - }, - "ready": true, - "details": { - "/var/log/alternatives.log": 13386, - "/var/log/apport.log": 0, - "/var/log/auth.log": 37009, - "/var/log/bootstrap.log": 107347, - "/var/log/dpkg.log": 374420, - "/var/log/faillog": 0, - "/var/log/fontconfig.log": 11629, - "/var/log/gpu-manager.log": 1541, - "/var/log/kern.log": 782582, - "/var/log/lastlog": 0, - "/var/log/syslog": 788450 - } - } - ] - }` - require.JSONEq(t, expect, rr.Body.String()) - require.Equal(t, http.StatusOK, rr.Result().StatusCode) - }) -} - -func mockActiveTargets() map[string][]target.Target { - return map[string][]target.Target{ - "varlogs": {&mockTarget{}}, - } -} - -type mockTarget struct { -} - -func (mt *mockTarget) Type() target.TargetType { - return target.TargetType("File") -} - -func (mt *mockTarget) DiscoveredLabels() model.LabelSet { - return map[model.LabelName]model.LabelValue{ - "__address__": "localhost", - "__path__": "/var/log/*log", - "job": "varlogs", - } -} - -func (mt *mockTarget) Labels() model.LabelSet { - return map[model.LabelName]model.LabelValue{ - "job": "varlogs", - } -} - -func (mt *mockTarget) Ready() bool { - return true -} - -func (mt *mockTarget) Details() interface{} { - return map[string]int{ - "/var/log/alternatives.log": 13386, - "/var/log/apport.log": 0, - "/var/log/auth.log": 37009, - "/var/log/bootstrap.log": 107347, - "/var/log/dpkg.log": 374420, - "/var/log/faillog": 0, - "/var/log/fontconfig.log": 11629, - "/var/log/gpu-manager.log": 1541, - "/var/log/kern.log": 782582, - "/var/log/lastlog": 0, - "/var/log/syslog": 788450, - } -} diff --git a/internal/static/logs/logs.go b/internal/static/logs/logs.go index 2d6c478fe5..8dd2035341 100644 --- a/internal/static/logs/logs.go +++ b/internal/static/logs/logs.go @@ -2,142 +2,20 @@ package logs import ( - "fmt" - "os" - "path/filepath" - "sync" - "time" _ "time/tzdata" // embed timezone data - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/grafana/agent/internal/agentseed" "github.com/grafana/agent/internal/useragent" - "github.com/grafana/agent/internal/util" - "github.com/grafana/loki/clients/pkg/promtail" - "github.com/grafana/loki/clients/pkg/promtail/api" "github.com/grafana/loki/clients/pkg/promtail/client" "github.com/grafana/loki/clients/pkg/promtail/config" "github.com/grafana/loki/clients/pkg/promtail/server" - "github.com/grafana/loki/clients/pkg/promtail/targets/file" "github.com/grafana/loki/clients/pkg/promtail/wal" "github.com/grafana/loki/pkg/tracing" - "github.com/prometheus/client_golang/prometheus" ) func init() { client.UserAgent = useragent.Get() } -// Logs is a Logs log collection. It uses multiple distinct sets of Logs -// Promtail agents to collect logs and send them to a Logs server. -type Logs struct { - mut sync.Mutex - - reg prometheus.Registerer - l log.Logger - instances map[string]*Instance -} - -// New creates and starts Loki log collection. -func New(reg prometheus.Registerer, c *Config, l log.Logger, dryRun bool) (*Logs, error) { - logs := &Logs{ - instances: make(map[string]*Instance), - reg: reg, - l: log.With(l, "component", "logs"), - } - if err := logs.ApplyConfig(c, dryRun); err != nil { - return nil, err - } - return logs, nil -} - -// ApplyConfig updates Logs with a new Config. -func (l *Logs) ApplyConfig(c *Config, dryRun bool) error { - l.mut.Lock() - defer l.mut.Unlock() - - if c == nil { - c = &Config{} - } - - newInstances := make(map[string]*Instance, len(c.Configs)) - - for _, ic := range c.Configs { - // If an old instance existed, update it and move it to the new map. - if old, ok := l.instances[ic.Name]; ok { - err := old.ApplyConfig(ic, c.Global, dryRun) - if err != nil { - return err - } - - newInstances[ic.Name] = old - continue - } - - inst, err := NewInstance(l.reg, ic, c.Global, l.l, dryRun) - if err != nil { - return fmt.Errorf("unable to apply config for %s: %w", ic.Name, err) - } - newInstances[ic.Name] = inst - } - - // Any promtail in l.instances that isn't in newInstances has been removed - // from the config. Stop them before replacing the map. - for key, i := range l.instances { - if _, exist := newInstances[key]; exist { - continue - } - i.Stop() - } - l.instances = newInstances - - return nil -} - -// Stop stops the log collector. -func (l *Logs) Stop() { - l.mut.Lock() - defer l.mut.Unlock() - - for _, i := range l.instances { - i.Stop() - } -} - -// Instance is used to retrieve a named Logs instance -func (l *Logs) Instance(name string) *Instance { - l.mut.Lock() - defer l.mut.Unlock() - - return l.instances[name] -} - -// Instance is an individual Logs instance. -type Instance struct { - mut sync.Mutex - - cfg *InstanceConfig - log log.Logger - reg *util.Unregisterer - - promtail *promtail.Promtail -} - -// NewInstance creates and starts a Logs instance. -func NewInstance(reg prometheus.Registerer, c *InstanceConfig, g GlobalConfig, l log.Logger, dryRun bool) (*Instance, error) { - instReg := prometheus.WrapRegistererWith(prometheus.Labels{"logs_config": c.Name}, reg) - - inst := Instance{ - reg: util.WrapWithUnregisterer(instReg), - log: log.With(l, "logs_config", c.Name), - } - if err := inst.ApplyConfig(c, g, dryRun); err != nil { - return nil, err - } - return &inst, nil -} - // DefaultConfig returns a default config for a Logs instance. func DefaultConfig() config.Config { return config.Config{ @@ -146,103 +24,3 @@ func DefaultConfig() config.Config { WAL: wal.Config{Enabled: false}, } } - -// ApplyConfig will apply a new InstanceConfig. If the config hasn't changed, -// then nothing will happen, otherwise the old Promtail will be stopped and -// then replaced with a new one. -func (i *Instance) ApplyConfig(c *InstanceConfig, g GlobalConfig, dryRun bool) error { - i.mut.Lock() - defer i.mut.Unlock() - - // No-op if the configs haven't changed. - if util.CompareYAML(c, i.cfg) { - level.Debug(i.log).Log("msg", "instance config hasn't changed, not recreating Promtail") - return nil - } - i.cfg = c - - positionsDir := filepath.Dir(c.PositionsConfig.PositionsFile) - err := os.MkdirAll(positionsDir, 0775) - if err != nil { - level.Warn(i.log).Log("msg", "failed to create the positions directory. logs may be unable to save their position", "path", positionsDir, "err", err) - } - - if i.promtail != nil { - i.promtail.Shutdown() - i.promtail = nil - } - - // Unregister all existing metrics before trying to create a new instance. - if !i.reg.UnregisterAll() { - // If UnregisterAll fails, we need to abort, otherwise the new promtail - // would try to re-register an existing metric and might panic. - return fmt.Errorf("failed to unregister all metrics from previous promtail. THIS IS A BUG") - } - - if len(c.ClientConfigs) == 0 { - level.Debug(i.log).Log("msg", "skipping creation of a promtail because no client_configs are present") - return nil - } - - uid := agentseed.Get().UID - for i := range c.ClientConfigs { - // ClientConfigs is a slice of struct, so we set values with the index - if c.ClientConfigs[i].Headers == nil { - c.ClientConfigs[i].Headers = map[string]string{} - } - c.ClientConfigs[i].Headers[agentseed.HeaderName] = uid - } - - clientMetrics := client.NewMetrics(i.reg) - cfg := DefaultConfig() - cfg.Global = config.GlobalConfig{ - FileWatch: file.WatchConfig{ - MinPollFrequency: g.FileWatch.MinPollFrequency, - MaxPollFrequency: g.FileWatch.MaxPollFrequency, - }, - } - cfg.ClientConfigs = c.ClientConfigs - cfg.PositionsConfig = c.PositionsConfig - cfg.ScrapeConfig = c.ScrapeConfig - cfg.TargetConfig = c.TargetConfig - cfg.LimitsConfig = c.LimitsConfig - - p, err := promtail.New(cfg, nil, clientMetrics, dryRun, promtail.WithLogger(i.log), promtail.WithRegisterer(i.reg)) - if err != nil { - return fmt.Errorf("unable to create logs instance: %w", err) - } - - i.promtail = p - return nil -} - -// SendEntry passes an entry to the internal promtail client and returns true if successfully sent. It is -// best effort and not guaranteed to succeed. -func (i *Instance) SendEntry(entry api.Entry, dur time.Duration) bool { - i.mut.Lock() - defer i.mut.Unlock() - - // promtail is nil it has been stopped - if i.promtail != nil { - // send non blocking so we don't block the mutex. this is best effort - select { - case i.promtail.Client().Chan() <- entry: - return true - case <-time.After(dur): - } - } - - return false -} - -// Stop stops the Promtail instance. -func (i *Instance) Stop() { - i.mut.Lock() - defer i.mut.Unlock() - - if i.promtail != nil { - i.promtail.Shutdown() - i.promtail = nil - } - i.reg.UnregisterAll() -} diff --git a/internal/static/logs/logs_test.go b/internal/static/logs/logs_test.go deleted file mode 100644 index 255c99b55f..0000000000 --- a/internal/static/logs/logs_test.go +++ /dev/null @@ -1,206 +0,0 @@ -//go:build !race - -package logs - -import ( - "fmt" - "net" - "net/http" - "os" - "path/filepath" - "strings" - "testing" - "time" - - "github.com/grafana/loki/pkg/loghttp/push" - - "github.com/go-kit/log" - "github.com/grafana/agent/internal/util" - "github.com/grafana/loki/pkg/logproto" - "github.com/prometheus/client_golang/prometheus" - "github.com/stretchr/testify/require" - "gopkg.in/yaml.v2" -) - -func TestLogs_NilConfig(t *testing.T) { - l, err := New(prometheus.NewRegistry(), nil, util.TestLogger(t), false) - require.NoError(t, err) - require.NoError(t, l.ApplyConfig(nil, false)) - - defer l.Stop() -} - -func TestLogs(t *testing.T) { - // - // Create a temporary file to tail - // - positionsDir := t.TempDir() - - tmpFile, err := os.CreateTemp(os.TempDir(), "*.log") - require.NoError(t, err) - t.Cleanup(func() { - _ = os.RemoveAll(tmpFile.Name()) - }) - - // - // Listen for push requests and pass them through to a channel - // - pushes := make(chan *logproto.PushRequest) - - lis, err := net.Listen("tcp", "127.0.0.1:0") - require.NoError(t, err) - t.Cleanup(func() { - require.NoError(t, lis.Close()) - }) - go func() { - _ = http.Serve(lis, http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { - req, err := push.ParseRequest(log.NewNopLogger(), "user_id", r, nil, nil, push.ParseLokiRequest) - require.NoError(t, err) - - pushes <- req - _, _ = rw.Write(nil) - })) - }() - - // - // Launch Loki so it starts tailing the file and writes to our server. - // - cfgText := util.Untab(fmt.Sprintf(` -positions_directory: %s -configs: -- name: default - clients: - - url: http://%s/loki/api/v1/push - batchwait: 50ms - batchsize: 1 - scrape_configs: - - job_name: system - static_configs: - - targets: [localhost] - labels: - job: test - __path__: %s - `, positionsDir, lis.Addr().String(), tmpFile.Name())) - - var cfg Config - dec := yaml.NewDecoder(strings.NewReader(cfgText)) - dec.SetStrict(true) - require.NoError(t, dec.Decode(&cfg)) - require.NoError(t, cfg.ApplyDefaults()) - logger := log.NewSyncLogger(log.NewNopLogger()) - l, err := New(prometheus.NewRegistry(), &cfg, logger, false) - require.NoError(t, err) - defer l.Stop() - - // - // Write a log line and wait for it to come through. - // - fmt.Fprintf(tmpFile, "Hello, world!\n") - select { - case <-time.After(time.Second * 30): - require.FailNow(t, "timed out waiting for data to be pushed") - case req := <-pushes: - require.Equal(t, "Hello, world!", req.Streams[0].Entries[0].Line) - } - - // - // Apply a new config and write a new line. - // - cfgText = util.Untab(fmt.Sprintf(` -positions_directory: %s -configs: -- name: default - clients: - - url: http://%s/loki/api/v1/push - batchwait: 50ms - batchsize: 5 - scrape_configs: - - job_name: system - static_configs: - - targets: [localhost] - labels: - job: test-2 - __path__: %s - `, positionsDir, lis.Addr().String(), tmpFile.Name())) - - var newCfg Config - dec = yaml.NewDecoder(strings.NewReader(cfgText)) - dec.SetStrict(true) - require.NoError(t, dec.Decode(&newCfg)) - require.NoError(t, newCfg.ApplyDefaults()) - require.NoError(t, l.ApplyConfig(&newCfg, false)) - - fmt.Fprintf(tmpFile, "Hello again!\n") - select { - case <-time.After(time.Second * 30): - require.FailNow(t, "timed out waiting for data to be pushed") - case req := <-pushes: - require.Equal(t, "Hello again!", req.Streams[0].Entries[0].Line) - } - - t.Run("update to nil", func(t *testing.T) { - // Applying a nil config should remove all instances. - err := l.ApplyConfig(nil, false) - require.NoError(t, err) - require.Len(t, l.instances, 0) - }) - - t.Run("re-apply previous config", func(t *testing.T) { - // Applying a nil config should remove all instances. - l.ApplyConfig(nil, false) - - // Re-Apply the previous config and write a new line. - var newCfg Config - dec = yaml.NewDecoder(strings.NewReader(cfgText)) - dec.SetStrict(true) - require.NoError(t, dec.Decode(&newCfg)) - require.NoError(t, newCfg.ApplyDefaults()) - require.NoError(t, l.ApplyConfig(&newCfg, false)) - - fmt.Fprintf(tmpFile, "Hello again!\n") - select { - case <-time.After(time.Second * 30): - require.FailNow(t, "timed out waiting for data to be pushed") - case req := <-pushes: - require.Equal(t, "Hello again!", req.Streams[0].Entries[0].Line) - } - }) -} - -func TestLogs_PositionsDirectory(t *testing.T) { - // - // Create a temporary file to tail - // - positionsDir := t.TempDir() - - // - // Launch Loki so it starts tailing the file and writes to our server. - // - cfgText := util.Untab(fmt.Sprintf(` -positions_directory: %[1]s/positions -configs: -- name: instance-a - clients: - - url: http://127.0.0.1:80/loki/api/v1/push -- name: instance-b - positions: - filename: %[1]s/other-positions/instance.yml - clients: - - url: http://127.0.0.1:80/loki/api/v1/push - `, positionsDir)) - - var cfg Config - dec := yaml.NewDecoder(strings.NewReader(cfgText)) - dec.SetStrict(true) - require.NoError(t, dec.Decode(&cfg)) - require.NoError(t, cfg.ApplyDefaults()) - logger := util.TestLogger(t) - l, err := New(prometheus.NewRegistry(), &cfg, logger, false) - require.NoError(t, err) - defer l.Stop() - - _, err = os.Stat(filepath.Join(positionsDir, "positions")) - require.NoError(t, err, "default shared positions directory did not get created") - _, err = os.Stat(filepath.Join(positionsDir, "other-positions")) - require.NoError(t, err, "instance-specific positions directory did not get created") -} diff --git a/internal/static/metrics/agent.go b/internal/static/metrics/agent.go index 0c2c745300..6ef123c1ec 100644 --- a/internal/static/metrics/agent.go +++ b/internal/static/metrics/agent.go @@ -7,29 +7,21 @@ import ( "errors" "flag" "fmt" - "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/prometheus/client_golang/prometheus" - "go.uber.org/atomic" - "google.golang.org/grpc" - "github.com/grafana/agent/internal/static/metrics/cluster" "github.com/grafana/agent/internal/static/metrics/cluster/client" "github.com/grafana/agent/internal/static/metrics/instance" "github.com/grafana/agent/internal/util" - "github.com/prometheus/prometheus/discovery" ) // DefaultConfig is the default settings for the Prometheus-lite client. var DefaultConfig = Config{ Global: instance.DefaultGlobalConfig, - InstanceRestartBackoff: instance.DefaultBasicManagerConfig.InstanceRestartBackoff, + InstanceRestartBackoff: 5 * time.Second, WALDir: "data-agent/", - WALCleanupAge: DefaultCleanupAge, - WALCleanupPeriod: DefaultCleanupPeriod, + WALCleanupAge: 12 * time.Hour, + WALCleanupPeriod: 30 * time.Minute, ServiceConfig: cluster.DefaultConfig, ServiceClientConfig: client.DefaultConfig, InstanceMode: instance.DefaultMode, @@ -123,257 +115,3 @@ func (c *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { c.ServiceConfig.RegisterFlagsWithPrefix(prefix+"service.", f) c.ServiceClientConfig.RegisterFlagsWithPrefix(prefix, f) } - -// Agent is an agent for collecting Prometheus metrics. It acts as a -// Prometheus-lite; only running the service discovery, remote_write, and WAL -// components of Prometheus. It is broken down into a series of Instances, each -// of which perform metric collection. -type Agent struct { - mut sync.RWMutex - cfg Config - logger log.Logger - reg prometheus.Registerer - - // Store both the basic manager and the modal manager, so we can update their - // settings independently. Only the ModalManager should be used for mutating - // configs. - bm *instance.BasicManager - mm *instance.ModalManager - cleaner *WALCleaner - - instanceFactory instanceFactory - - cluster *cluster.Cluster - - stopped bool - stopOnce sync.Once - actor chan func() - - initialBootDone atomic.Bool -} - -// New creates and starts a new Agent. -func New(reg prometheus.Registerer, cfg Config, logger log.Logger) (*Agent, error) { - // This registers discovery metrics with the default registry which should be the reg specified above. - discovery.RegisterMetrics() - return newAgent(reg, cfg, logger, defaultInstanceFactory) -} - -func newAgent(reg prometheus.Registerer, cfg Config, logger log.Logger, fact instanceFactory) (*Agent, error) { - a := &Agent{ - logger: log.With(logger, "agent", "prometheus"), - instanceFactory: fact, - reg: reg, - actor: make(chan func(), 1), - } - - a.bm = instance.NewBasicManager(instance.BasicManagerConfig{ - InstanceRestartBackoff: cfg.InstanceRestartBackoff, - }, a.logger, a.newInstance) - - var err error - a.mm, err = instance.NewModalManager(a.reg, a.logger, a.bm, cfg.InstanceMode) - if err != nil { - return nil, fmt.Errorf("failed to create modal instance manager: %w", err) - } - - a.cluster, err = cluster.New(a.logger, reg, cfg.ServiceConfig, a.mm, a.Validate) - if err != nil { - return nil, err - } - - if err := a.ApplyConfig(cfg); err != nil { - return nil, err - } - go a.run() - return a, nil -} - -// newInstance creates a new Instance given a config. -func (a *Agent) newInstance(c instance.Config) (instance.ManagedInstance, error) { - a.mut.RLock() - defer a.mut.RUnlock() - - // Controls the label - instanceLabel := "instance_name" - if a.cfg.InstanceMode == instance.ModeShared { - instanceLabel = "instance_group_name" - } - - reg := prometheus.WrapRegistererWith(prometheus.Labels{ - instanceLabel: c.Name, - }, a.reg) - - return a.instanceFactory(reg, c, a.cfg.WALDir, a.logger) -} - -// Validate will validate the incoming Config and mutate it to apply defaults. -func (a *Agent) Validate(c *instance.Config) error { - a.mut.RLock() - defer a.mut.RUnlock() - - if a.cfg.WALDir == "" { - return fmt.Errorf("no wal_directory configured") - } - - if err := c.ApplyDefaults(a.cfg.Global); err != nil { - return fmt.Errorf("failed to apply defaults to %q: %w", c.Name, err) - } - return nil -} - -// ApplyConfig applies config changes to the Agent. -func (a *Agent) ApplyConfig(cfg Config) error { - a.mut.Lock() - defer a.mut.Unlock() - - if util.CompareYAML(a.cfg, cfg) { - return nil - } - - if a.stopped { - return fmt.Errorf("agent stopped") - } - - // The ordering here is done to minimze the number of instances that need to - // be restarted. We update components from lowest to highest level: - // - // 1. WAL Cleaner - // 2. Basic manager - // 3. Modal Manager - // 4. Cluster - // 5. Local configs - - if a.cleaner != nil { - a.cleaner.Stop() - a.cleaner = nil - } - if cfg.WALDir != "" { - a.cleaner = NewWALCleaner( - a.logger, - a.mm, - cfg.WALDir, - cfg.WALCleanupAge, - cfg.WALCleanupPeriod, - ) - } - - a.bm.UpdateManagerConfig(instance.BasicManagerConfig{ - InstanceRestartBackoff: cfg.InstanceRestartBackoff, - }) - - if err := a.mm.SetMode(cfg.InstanceMode); err != nil { - return err - } - - if err := a.cluster.ApplyConfig(cfg.ServiceConfig); err != nil { - return fmt.Errorf("failed to apply cluster config: %w", err) - } - - // Queue an actor in the background to sync the instances. This is required - // because creating both this function and newInstance grab the mutex. - oldConfig := a.cfg - - a.actor <- func() { - a.syncInstances(oldConfig, cfg) - a.initialBootDone.Store(true) - } - - a.cfg = cfg - return nil -} - -// syncInstances syncs the state of the instance manager to newConfig by -// applying all configs from newConfig and deleting any configs from oldConfig -// that are not in newConfig. -func (a *Agent) syncInstances(oldConfig, newConfig Config) { - // Apply the new configs - for _, c := range newConfig.Configs { - if err := a.mm.ApplyConfig(c); err != nil { - level.Error(a.logger).Log("msg", "failed to apply config", "name", c.Name, "err", err) - } - } - - // Remove any configs from oldConfig that aren't in newConfig. - for _, oc := range oldConfig.Configs { - foundConfig := false - for _, nc := range newConfig.Configs { - if nc.Name == oc.Name { - foundConfig = true - break - } - } - if foundConfig { - continue - } - - if err := a.mm.DeleteConfig(oc.Name); err != nil { - level.Error(a.logger).Log("msg", "failed to delete old config", "name", oc.Name, "err", err) - } - } -} - -// run calls received actor functions in the background. -func (a *Agent) run() { - for f := range a.actor { - f() - } -} - -// Ready returns true if both the agent and all instances -// spawned by a Manager have completed startup. -func (a *Agent) Ready() bool { - // Wait for the initial load to complete so the instance manager has at least - // the base set of expected instances. - if !a.initialBootDone.Load() { - return false - } - - for _, inst := range a.mm.ListInstances() { - if !inst.Ready() { - return false - } - } - - return true -} - -// WireGRPC wires gRPC services into the provided server. -func (a *Agent) WireGRPC(s *grpc.Server) { - a.cluster.WireGRPC(s) -} - -// Config returns the configuration of this Agent. -func (a *Agent) Config() Config { return a.cfg } - -// InstanceManager returns the instance manager used by this Agent. -func (a *Agent) InstanceManager() instance.Manager { return a.mm } - -// Stop stops the agent and all its instances. -func (a *Agent) Stop() { - a.mut.Lock() - defer a.mut.Unlock() - - // Close the actor channel to stop run. - a.stopOnce.Do(func() { - close(a.actor) - }) - - a.cluster.Stop() - - if a.cleaner != nil { - a.cleaner.Stop() - } - - // Only need to stop the ModalManager, which will pass through everything to the - // BasicManager. - a.mm.Stop() - - a.stopped = true -} - -type instanceFactory = func(reg prometheus.Registerer, cfg instance.Config, walDir string, logger log.Logger) (instance.ManagedInstance, error) - -func defaultInstanceFactory(reg prometheus.Registerer, cfg instance.Config, walDir string, logger log.Logger) (instance.ManagedInstance, error) { - return instance.New(reg, cfg, walDir, logger) -} diff --git a/internal/static/metrics/agent_test.go b/internal/static/metrics/agent_test.go index bd311a07cd..2d1d063b20 100644 --- a/internal/static/metrics/agent_test.go +++ b/internal/static/metrics/agent_test.go @@ -1,22 +1,11 @@ package metrics import ( - "context" "errors" - "fmt" - "net/http" - "sync" "testing" - "time" - "github.com/go-kit/log" "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/util" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/prometheus/scrape" - "github.com/prometheus/prometheus/storage" "github.com/stretchr/testify/require" - "go.uber.org/atomic" "gopkg.in/yaml.v2" ) @@ -113,221 +102,6 @@ configs: require.Greater(t, int64(scrapeConfig.ScrapeInterval), int64(0)) } -func TestAgent(t *testing.T) { - // Launch two instances - cfg := Config{ - WALDir: "/tmp/wal", - Configs: []instance.Config{ - makeInstanceConfig("instance_a"), - makeInstanceConfig("instance_b"), - }, - InstanceRestartBackoff: time.Duration(0), - InstanceMode: instance.ModeDistinct, - } - - fact := newFakeInstanceFactory() - - a, err := newAgent(prometheus.NewRegistry(), cfg, log.NewNopLogger(), fact.factory) - require.NoError(t, err) - - util.Eventually(t, func(t require.TestingT) { - require.NotNil(t, fact.created) - require.Equal(t, 2, int(fact.created.Load())) - require.Equal(t, 2, len(a.mm.ListInstances())) - }) - - t.Run("instances should be running", func(t *testing.T) { - for _, mi := range fact.Mocks() { - // Each instance should have wait called on it - util.Eventually(t, func(t require.TestingT) { - require.True(t, mi.running.Load()) - }) - } - }) - - t.Run("instances should be restarted when stopped", func(t *testing.T) { - for _, mi := range fact.Mocks() { - util.Eventually(t, func(t require.TestingT) { - require.Equal(t, 1, int(mi.startedCount.Load())) - }) - } - - for _, mi := range fact.Mocks() { - mi.err <- fmt.Errorf("really bad error") - } - - for _, mi := range fact.Mocks() { - util.Eventually(t, func(t require.TestingT) { - require.Equal(t, 2, int(mi.startedCount.Load())) - }) - } - }) -} - -func TestAgent_NormalInstanceExits(t *testing.T) { - tt := []struct { - name string - simulateError error - }{ - {"no error", nil}, - {"context cancelled", context.Canceled}, - } - - cfg := Config{ - WALDir: "/tmp/wal", - Configs: []instance.Config{ - makeInstanceConfig("instance_a"), - makeInstanceConfig("instance_b"), - }, - InstanceRestartBackoff: time.Duration(0), - InstanceMode: instance.ModeDistinct, - } - - for _, tc := range tt { - t.Run(tc.name, func(t *testing.T) { - fact := newFakeInstanceFactory() - - a, err := newAgent(prometheus.NewRegistry(), cfg, log.NewNopLogger(), fact.factory) - require.NoError(t, err) - - util.Eventually(t, func(t require.TestingT) { - require.NotNil(t, fact.created) - require.Equal(t, 2, int(fact.created.Load())) - require.Equal(t, 2, len(a.mm.ListInstances())) - }) - for _, mi := range fact.Mocks() { - mi.err <- tc.simulateError - } - - time.Sleep(time.Millisecond * 100) - - // Get the new total amount of instances starts; value should - // be unchanged. - var startedCount int64 - for _, i := range fact.Mocks() { - startedCount += i.startedCount.Load() - } - - // There should only be two instances that started. If there's more, something - // restarted despite our error. - require.Equal(t, int64(2), startedCount, "instances should not have restarted") - }) - } -} - -func TestAgent_Stop(t *testing.T) { - // Launch two instances - cfg := Config{ - WALDir: "/tmp/wal", - Configs: []instance.Config{ - makeInstanceConfig("instance_a"), - makeInstanceConfig("instance_b"), - }, - InstanceRestartBackoff: time.Duration(0), - InstanceMode: instance.ModeDistinct, - } - - fact := newFakeInstanceFactory() - - a, err := newAgent(prometheus.NewRegistry(), cfg, log.NewNopLogger(), fact.factory) - require.NoError(t, err) - - util.Eventually(t, func(t require.TestingT) { - require.NotNil(t, fact.created) - require.Equal(t, 2, int(fact.created.Load())) - require.Equal(t, 2, len(a.mm.ListInstances())) - }) - - a.Stop() - - time.Sleep(time.Millisecond * 100) - - for _, mi := range fact.Mocks() { - require.False(t, mi.running.Load(), "instance should not have been restarted") - } -} - -type fakeInstance struct { - cfg instance.Config - - err chan error - startedCount *atomic.Int64 - running *atomic.Bool -} - -func (i *fakeInstance) Run(ctx context.Context) error { - i.startedCount.Inc() - i.running.Store(true) - defer i.running.Store(false) - - select { - case <-ctx.Done(): - return ctx.Err() - case err := <-i.err: - return err - } -} - -func (i *fakeInstance) Ready() bool { - return true -} - -func (i *fakeInstance) Update(_ instance.Config) error { - return instance.ErrInvalidUpdate{ - Inner: fmt.Errorf("can't dynamically update fakeInstance"), - } -} - -func (i *fakeInstance) TargetsActive() map[string][]*scrape.Target { - return nil -} - -func (i *fakeInstance) StorageDirectory() string { - return "" -} - -func (i *fakeInstance) WriteHandler() http.Handler { - return nil -} - -func (i *fakeInstance) Appender(ctx context.Context) storage.Appender { - return nil -} - -type fakeInstanceFactory struct { - mut sync.Mutex - mocks []*fakeInstance - - created *atomic.Int64 -} - -func newFakeInstanceFactory() *fakeInstanceFactory { - return &fakeInstanceFactory{created: atomic.NewInt64(0)} -} - -func (f *fakeInstanceFactory) Mocks() []*fakeInstance { - f.mut.Lock() - defer f.mut.Unlock() - return f.mocks -} - -func (f *fakeInstanceFactory) factory(_ prometheus.Registerer, cfg instance.Config, _ string, _ log.Logger) (instance.ManagedInstance, error) { - f.created.Add(1) - - f.mut.Lock() - defer f.mut.Unlock() - - inst := &fakeInstance{ - cfg: cfg, - running: atomic.NewBool(false), - startedCount: atomic.NewInt64(0), - err: make(chan error), - } - - f.mocks = append(f.mocks, inst) - return inst, nil -} - func makeInstanceConfig(name string) instance.Config { cfg := instance.DefaultConfig cfg.Name = name diff --git a/internal/static/metrics/cleaner.go b/internal/static/metrics/cleaner.go deleted file mode 100644 index 0bf577a5b6..0000000000 --- a/internal/static/metrics/cleaner.go +++ /dev/null @@ -1,271 +0,0 @@ -package metrics - -import ( - "fmt" - "os" - "path/filepath" - "time" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/static/metrics/wal" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" - promwal "github.com/prometheus/prometheus/tsdb/wlog" -) - -// Default settings for the WAL cleaner. -const ( - DefaultCleanupAge = 12 * time.Hour - DefaultCleanupPeriod = 30 * time.Minute -) - -var ( - discoveryError = promauto.NewCounterVec( - prometheus.CounterOpts{ - Name: "agent_metrics_cleaner_storage_error_total", - Help: "Errors encountered discovering local storage paths", - }, - []string{"storage"}, - ) - - segmentError = promauto.NewCounterVec( - prometheus.CounterOpts{ - Name: "agent_metrics_cleaner_segment_error_total", - Help: "Errors encountered finding most recent WAL segments", - }, - []string{"storage"}, - ) - - managedStorage = promauto.NewGauge( - prometheus.GaugeOpts{ - Name: "agent_metrics_cleaner_managed_storage", - Help: "Number of storage directories associated with managed instances", - }, - ) - - abandonedStorage = promauto.NewGauge( - prometheus.GaugeOpts{ - Name: "agent_metrics_cleaner_abandoned_storage", - Help: "Number of storage directories not associated with any managed instance", - }, - ) - - cleanupRunsSuccess = promauto.NewCounter( - prometheus.CounterOpts{ - Name: "agent_metrics_cleaner_success_total", - Help: "Number of successfully removed abandoned WALs", - }, - ) - - cleanupRunsErrors = promauto.NewCounter( - prometheus.CounterOpts{ - Name: "agent_metrics_cleaner_errors_total", - Help: "Number of errors removing abandoned WALs", - }, - ) - - cleanupTimes = promauto.NewHistogram( - prometheus.HistogramOpts{ - Name: "agent_metrics_cleaner_cleanup_seconds", - Help: "Time spent performing each periodic WAL cleanup", - }, - ) -) - -// lastModifiedFunc gets the last modified time of the most recent segment of a WAL -type lastModifiedFunc func(path string) (time.Time, error) - -func lastModified(path string) (time.Time, error) { - existing, err := promwal.Open(nil, path) - if err != nil { - return time.Time{}, err - } - - // We don't care if there are errors closing the abandoned WAL - defer func() { _ = existing.Close() }() - - _, last, err := promwal.Segments(existing.Dir()) - if err != nil { - return time.Time{}, fmt.Errorf("unable to open WAL: %w", err) - } - - if last == -1 { - return time.Time{}, fmt.Errorf("unable to determine most recent segment for %s", path) - } - - // full path to the most recent segment in this WAL - lastSegment := promwal.SegmentName(path, last) - segmentFile, err := os.Stat(lastSegment) - if err != nil { - return time.Time{}, fmt.Errorf("unable to determine mtime for %s segment: %w", lastSegment, err) - } - - return segmentFile.ModTime(), nil -} - -// WALCleaner periodically checks for Write Ahead Logs (WALs) that are not associated -// with any active instance.ManagedInstance and have not been written to in some configured -// amount of time and deletes them. -type WALCleaner struct { - logger log.Logger - instanceManager instance.Manager - walDirectory string - walLastModified lastModifiedFunc - minAge time.Duration - period time.Duration - done chan bool -} - -// NewWALCleaner creates a new cleaner that looks for abandoned WALs in the given -// directory and removes them if they haven't been modified in over minAge. Starts -// a goroutine to periodically run the cleanup method in a loop -func NewWALCleaner(logger log.Logger, manager instance.Manager, walDirectory string, minAge time.Duration, period time.Duration) *WALCleaner { - c := &WALCleaner{ - logger: log.With(logger, "component", "cleaner"), - instanceManager: manager, - walDirectory: filepath.Clean(walDirectory), - walLastModified: lastModified, - minAge: DefaultCleanupAge, - period: DefaultCleanupPeriod, - done: make(chan bool), - } - - if minAge > 0 { - c.minAge = minAge - } - - // We allow a period of 0 here because '0' means "don't run the task". This - // is handled by not running a ticker at all in the run method. - if period >= 0 { - c.period = period - } - - go c.run() - return c -} - -// getManagedStorage gets storage directories used for each ManagedInstance -func (c *WALCleaner) getManagedStorage(instances map[string]instance.ManagedInstance) map[string]bool { - out := make(map[string]bool) - - for _, inst := range instances { - out[inst.StorageDirectory()] = true - } - - return out -} - -// getAllStorage gets all storage directories under walDirectory -func (c *WALCleaner) getAllStorage() []string { - var out []string - - _ = filepath.Walk(c.walDirectory, func(p string, info os.FileInfo, err error) error { - if os.IsNotExist(err) { - // The root WAL directory doesn't exist. Maybe this Agent isn't responsible for any - // instances yet. Log at debug since this isn't a big deal. We'll just try to crawl - // the direction again on the next periodic run. - level.Debug(c.logger).Log("msg", "WAL storage path does not exist", "path", p, "err", err) - } else if err != nil { - // Just log any errors traversing the WAL directory. This will potentially result - // in a WAL (that has incorrect permissions or some similar problem) not being cleaned - // up. This is better than preventing *all* other WALs from being cleaned up. - discoveryError.WithLabelValues(p).Inc() - level.Warn(c.logger).Log("msg", "unable to traverse WAL storage path", "path", p, "err", err) - } else if info.IsDir() && filepath.Dir(p) == c.walDirectory { - // Single level below the root are instance storage directories (including WALs) - out = append(out, p) - } - - return nil - }) - - return out -} - -// getAbandonedStorage gets the full path of storage directories that aren't associated with -// an active instance and haven't been written to within a configured duration (usually several -// hours or more). -func (c *WALCleaner) getAbandonedStorage(all []string, managed map[string]bool, now time.Time) []string { - var out []string - - for _, dir := range all { - if managed[dir] { - level.Debug(c.logger).Log("msg", "active WAL", "name", dir) - continue - } - - walDir := wal.SubDirectory(dir) - mtime, err := c.walLastModified(walDir) - if err != nil { - segmentError.WithLabelValues(dir).Inc() - level.Warn(c.logger).Log("msg", "unable to find segment mtime of WAL", "name", dir, "err", err) - continue - } - - diff := now.Sub(mtime) - if diff > c.minAge { - // The last segment for this WAL was modified more than $minAge (positive number of hours) - // in the past. This makes it a candidate for deletion since it's also not associated with - // any Instances this agent knows about. - out = append(out, dir) - } - - level.Debug(c.logger).Log("msg", "abandoned WAL", "name", dir, "mtime", mtime, "diff", diff) - } - - return out -} - -// run cleans up abandoned WALs (if period != 0) in a loop periodically until stopped -func (c *WALCleaner) run() { - // A period of 0 means don't run a cleanup task - if c.period == 0 { - return - } - - ticker := time.NewTicker(c.period) - defer ticker.Stop() - - for { - select { - case <-c.done: - level.Debug(c.logger).Log("msg", "stopping cleaner...") - return - case <-ticker.C: - c.cleanup() - } - } -} - -// cleanup removes any abandoned and unused WAL directories. Note that it shouldn't be -// necessary to call this method explicitly in most cases since it will be run periodically -// in a goroutine (started when WALCleaner is created). -func (c *WALCleaner) cleanup() { - start := time.Now() - all := c.getAllStorage() - managed := c.getManagedStorage(c.instanceManager.ListInstances()) - abandoned := c.getAbandonedStorage(all, managed, time.Now()) - - managedStorage.Set(float64(len(managed))) - abandonedStorage.Set(float64(len(abandoned))) - - for _, a := range abandoned { - level.Info(c.logger).Log("msg", "deleting abandoned WAL", "name", a) - err := os.RemoveAll(a) - if err != nil { - level.Error(c.logger).Log("msg", "failed to delete abandoned WAL", "name", a, "err", err) - cleanupRunsErrors.Inc() - } else { - cleanupRunsSuccess.Inc() - } - } - - cleanupTimes.Observe(time.Since(start).Seconds()) -} - -// Stop the cleaner and any background tasks running -func (c *WALCleaner) Stop() { - close(c.done) -} diff --git a/internal/static/metrics/cleaner_test.go b/internal/static/metrics/cleaner_test.go deleted file mode 100644 index f8aeac7fa7..0000000000 --- a/internal/static/metrics/cleaner_test.go +++ /dev/null @@ -1,146 +0,0 @@ -package metrics - -import ( - "os" - "path/filepath" - "testing" - "time" - - "github.com/go-kit/log" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/stretchr/testify/require" -) - -func TestWALCleaner_getAllStorageNoRoot(t *testing.T) { - walRoot := filepath.Join(os.TempDir(), "getAllStorageNoRoot") - logger := log.NewLogfmtLogger(os.Stderr) - cleaner := NewWALCleaner( - logger, - &instance.MockManager{}, - walRoot, - DefaultCleanupAge, - DefaultCleanupPeriod, - ) - - // Bogus WAL root that doesn't exist. Method should return no results - wals := cleaner.getAllStorage() - - require.Empty(t, wals) -} - -func TestWALCleaner_getAllStorageSuccess(t *testing.T) { - walRoot := t.TempDir() - - walDir := filepath.Join(walRoot, "instance-1") - err := os.MkdirAll(walDir, 0755) - require.NoError(t, err) - - logger := log.NewLogfmtLogger(os.Stderr) - cleaner := NewWALCleaner( - logger, - &instance.MockManager{}, - walRoot, - DefaultCleanupAge, - DefaultCleanupPeriod, - ) - wals := cleaner.getAllStorage() - - require.Equal(t, []string{walDir}, wals) -} - -func TestWALCleaner_getAbandonedStorageBeforeCutoff(t *testing.T) { - walRoot := t.TempDir() - - walDir := filepath.Join(walRoot, "instance-1") - err := os.MkdirAll(walDir, 0755) - require.NoError(t, err) - - all := []string{walDir} - managed := make(map[string]bool) - now := time.Now() - - logger := log.NewLogfmtLogger(os.Stderr) - cleaner := NewWALCleaner( - logger, - &instance.MockManager{}, - walRoot, - 5*time.Minute, - DefaultCleanupPeriod, - ) - - cleaner.walLastModified = func(path string) (time.Time, error) { - return now, nil - } - - // Last modification time on our WAL directory is the same as "now" - // so there shouldn't be any results even though it's not part of the - // set of "managed" directories. - abandoned := cleaner.getAbandonedStorage(all, managed, now) - require.Empty(t, abandoned) -} - -func TestWALCleaner_getAbandonedStorageAfterCutoff(t *testing.T) { - walRoot := t.TempDir() - - walDir := filepath.Join(walRoot, "instance-1") - err := os.MkdirAll(walDir, 0755) - require.NoError(t, err) - - all := []string{walDir} - managed := make(map[string]bool) - now := time.Now() - - logger := log.NewLogfmtLogger(os.Stderr) - cleaner := NewWALCleaner( - logger, - &instance.MockManager{}, - walRoot, - 5*time.Minute, - DefaultCleanupPeriod, - ) - - cleaner.walLastModified = func(path string) (time.Time, error) { - return now.Add(-30 * time.Minute), nil - } - - // Last modification time on our WAL directory is 30 minutes in the past - // compared to "now" and we've set the cutoff for our cleaner to be 5 - // minutes: our WAL directory should show up as abandoned - abandoned := cleaner.getAbandonedStorage(all, managed, now) - require.Equal(t, []string{walDir}, abandoned) -} - -func TestWALCleaner_cleanup(t *testing.T) { - walRoot := t.TempDir() - - walDir := filepath.Join(walRoot, "instance-1") - err := os.MkdirAll(walDir, 0755) - require.NoError(t, err) - - now := time.Now() - logger := log.NewLogfmtLogger(os.Stderr) - manager := &instance.MockManager{} - manager.ListInstancesFunc = func() map[string]instance.ManagedInstance { - return make(map[string]instance.ManagedInstance) - } - - cleaner := NewWALCleaner( - logger, - manager, - walRoot, - 5*time.Minute, - DefaultCleanupPeriod, - ) - - cleaner.walLastModified = func(path string) (time.Time, error) { - return now.Add(-30 * time.Minute), nil - } - - // Last modification time on our WAL directory is 30 minutes in the past - // compared to "now" and we've set the cutoff for our cleaner to be 5 - // minutes: our WAL directory should be removed since it's abandoned - cleaner.cleanup() - _, err = os.Stat(walDir) - require.Error(t, err) - require.True(t, os.IsNotExist(err)) -} diff --git a/internal/static/metrics/cluster/client/client.go b/internal/static/metrics/cluster/client/client.go index 1b90feb99f..b4180ab3b0 100644 --- a/internal/static/metrics/cluster/client/client.go +++ b/internal/static/metrics/cluster/client/client.go @@ -2,25 +2,12 @@ package client import ( "flag" - "io" "reflect" - "github.com/grafana/agent/internal/static/agentproto" "github.com/grafana/agent/internal/util" "github.com/grafana/dskit/grpcclient" - "github.com/grafana/dskit/middleware" - otgrpc "github.com/opentracing-contrib/go-grpc" - "github.com/opentracing/opentracing-go" - "google.golang.org/grpc" - "google.golang.org/grpc/credentials/insecure" ) -// ScrapingServiceClient wraps agentproto.ScrapingServiceClient with a Close method. -type ScrapingServiceClient interface { - agentproto.ScrapingServiceClient - io.Closer -} - var ( // DefaultConfig provides default Config values. DefaultConfig = *util.DefaultConfigFromFlags(&Config{}).(*Config) @@ -54,40 +41,3 @@ func (c *Config) RegisterFlags(f *flag.FlagSet) { func (c *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { c.GRPCClientConfig.RegisterFlagsWithPrefix(prefix+"service-client", f) } - -// New returns a new scraping service client. -func New(cfg Config, addr string) (ScrapingServiceClient, error) { - opts := []grpc.DialOption{ - grpc.WithTransportCredentials(insecure.NewCredentials()), - grpc.WithDefaultCallOptions(cfg.GRPCClientConfig.CallOptions()...), - } - grpcDialOpts, err := cfg.GRPCClientConfig.DialOption(instrumentation()) - if err != nil { - return nil, err - } - opts = append(opts, grpcDialOpts...) - conn, err := grpc.Dial(addr, opts...) - if err != nil { - return nil, err - } - - return struct { - agentproto.ScrapingServiceClient - io.Closer - }{ - ScrapingServiceClient: agentproto.NewScrapingServiceClient(conn), - Closer: conn, - }, nil -} - -func instrumentation() ([]grpc.UnaryClientInterceptor, []grpc.StreamClientInterceptor) { - unary := []grpc.UnaryClientInterceptor{ - otgrpc.OpenTracingClientInterceptor(opentracing.GlobalTracer()), - middleware.ClientUserHeaderInterceptor, - } - stream := []grpc.StreamClientInterceptor{ - otgrpc.OpenTracingStreamClientInterceptor(opentracing.GlobalTracer()), - middleware.StreamClientUserHeaderInterceptor, - } - return unary, stream -} diff --git a/internal/static/metrics/cluster/cluster.go b/internal/static/metrics/cluster/cluster.go deleted file mode 100644 index 9ab498f7e4..0000000000 --- a/internal/static/metrics/cluster/cluster.go +++ /dev/null @@ -1,179 +0,0 @@ -package cluster - -import ( - "context" - "fmt" - "sync" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/golang/protobuf/ptypes/empty" - "github.com/gorilla/mux" - "github.com/grafana/agent/internal/static/agentproto" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/static/metrics/instance/configstore" - "github.com/grafana/agent/internal/util" - "github.com/prometheus/client_golang/prometheus" - "google.golang.org/grpc" -) - -// Cluster connects an Agent to other Agents and allows them to distribute -// workload. -type Cluster struct { - mut sync.RWMutex - - log log.Logger - cfg Config - baseValidation ValidationFunc - - // - // Internally, Cluster glues together four separate pieces of logic. - // See comments below to get an understanding of what is going on. - // - - // node manages membership in the cluster and performs cluster-wide reshards. - node *node - - // store connects to a configstore for changes. storeAPI is an HTTP API for it. - store *configstore.Remote - storeAPI *configstore.API - - // watcher watches the store and applies changes to an instance.Manager, - // triggering metrics to be collected and sent. configWatcher also does a - // complete refresh of its state on an interval. - watcher *configWatcher -} - -// New creates a new Cluster. -func New( - l log.Logger, - reg prometheus.Registerer, - cfg Config, - im instance.Manager, - validate ValidationFunc, -) (*Cluster, error) { - - l = log.With(l, "component", "cluster") - - var ( - c = &Cluster{log: l, cfg: cfg, baseValidation: validate} - err error - ) - - // Hold the lock for the initialization. This is necessary since newNode will - // eventually call Reshard, and we want c.watcher to be initialized when that - // happens. - c.mut.Lock() - defer c.mut.Unlock() - - c.node, err = newNode(reg, l, cfg, c) - if err != nil { - return nil, fmt.Errorf("failed to initialize node membership: %w", err) - } - - c.store, err = configstore.NewRemote(l, reg, cfg.KVStore.Config, cfg.Enabled) - if err != nil { - return nil, fmt.Errorf("failed to initialize configstore: %w", err) - } - c.storeAPI = configstore.NewAPI(l, c.store, c.storeValidate, cfg.APIEnableGetConfiguration) - reg.MustRegister(c.storeAPI) - - c.watcher, err = newConfigWatcher(l, cfg, c.store, im, c.node.Owns, validate) - if err != nil { - return nil, fmt.Errorf("failed to initialize configwatcher: %w", err) - } - - // NOTE(rfratto): ApplyConfig isn't necessary for the initialization but must - // be called for any changes to the configuration. - return c, nil -} - -func (c *Cluster) storeValidate(cfg *instance.Config) error { - c.mut.RLock() - defer c.mut.RUnlock() - - if err := c.baseValidation(cfg); err != nil { - return err - } - - if c.cfg.DangerousAllowReadingFiles { - return nil - } - - // If configs aren't allowed to read from the store, we need to make sure no - // configs coming in from the API set files for passwords. - return validateNofiles(cfg) -} - -// Reshard implements agentproto.ScrapingServiceServer, and syncs the state of -// configs with the configstore. -func (c *Cluster) Reshard(ctx context.Context, _ *agentproto.ReshardRequest) (*empty.Empty, error) { - c.mut.RLock() - defer c.mut.RUnlock() - - level.Info(c.log).Log("msg", "received reshard notification, requesting refresh") - c.watcher.RequestRefresh() - return &empty.Empty{}, nil -} - -// ApplyConfig applies configuration changes to Cluster. -func (c *Cluster) ApplyConfig(cfg Config) error { - c.mut.Lock() - defer c.mut.Unlock() - - if util.CompareYAML(c.cfg, cfg) { - return nil - } - - if err := c.node.ApplyConfig(cfg); err != nil { - return fmt.Errorf("failed to apply config to node membership: %w", err) - } - - if err := c.store.ApplyConfig(cfg.Lifecycler.RingConfig.KVStore, cfg.Enabled); err != nil { - return fmt.Errorf("failed to apply config to config store: %w", err) - } - - if err := c.watcher.ApplyConfig(cfg); err != nil { - return fmt.Errorf("failed to apply config to watcher: %w", err) - } - - c.cfg = cfg - - // Force a refresh so all the configs get updated with new defaults. - level.Info(c.log).Log("msg", "cluster config changed, queueing refresh") - c.watcher.RequestRefresh() - return nil -} - -// WireAPI injects routes into the provided mux router for the config -// management API. -func (c *Cluster) WireAPI(r *mux.Router) { - c.storeAPI.WireAPI(r) - c.node.WireAPI(r) -} - -// WireGRPC injects gRPC server handlers into the provided gRPC server. -func (c *Cluster) WireGRPC(srv *grpc.Server) { - agentproto.RegisterScrapingServiceServer(srv, c) -} - -// Stop stops the cluster and all of its dependencies. -func (c *Cluster) Stop() { - c.mut.Lock() - defer c.mut.Unlock() - - deps := []struct { - name string - closer func() error - }{ - {"node", c.node.Stop}, - {"config store", c.store.Close}, - {"config watcher", c.watcher.Stop}, - } - for _, dep := range deps { - err := dep.closer() - if err != nil { - level.Error(c.log).Log("msg", "failed to stop dependency", "dependency", dep.name, "err", err) - } - } -} diff --git a/internal/static/metrics/cluster/config_watcher.go b/internal/static/metrics/cluster/config_watcher.go deleted file mode 100644 index 2544975c8d..0000000000 --- a/internal/static/metrics/cluster/config_watcher.go +++ /dev/null @@ -1,340 +0,0 @@ -package cluster - -import ( - "context" - "fmt" - "sync" - "time" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/static/metrics/instance/configstore" - "github.com/grafana/agent/internal/util" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" -) - -var ( - reshardDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ - Name: "agent_metrics_scraping_service_reshard_duration", - Help: "How long it took for resharding to run.", - }, []string{"success"}) -) - -// configWatcher connects to a configstore and will apply configs to an -// instance.Manager. -type configWatcher struct { - log log.Logger - - mut sync.Mutex - cfg Config - stopped bool - stop context.CancelFunc - - store configstore.Store - im instance.Manager - owns OwnershipFunc - validate ValidationFunc - - refreshCh chan struct{} - instanceMut sync.Mutex - instances map[string]struct{} -} - -// OwnershipFunc should determine if a given keep is owned by the caller. -type OwnershipFunc = func(key string) (bool, error) - -// ValidationFunc should validate a config. -type ValidationFunc = func(*instance.Config) error - -// newConfigWatcher watches store for changes and checks for each config against -// owns. It will also poll the configstore at a configurable interval. -func newConfigWatcher(log log.Logger, cfg Config, store configstore.Store, im instance.Manager, owns OwnershipFunc, validate ValidationFunc) (*configWatcher, error) { - ctx, cancel := context.WithCancel(context.Background()) - - w := &configWatcher{ - log: log, - - stop: cancel, - - store: store, - im: im, - owns: owns, - validate: validate, - - refreshCh: make(chan struct{}, 1), - instances: make(map[string]struct{}), - } - if err := w.ApplyConfig(cfg); err != nil { - return nil, err - } - // Delay duration, this is to prevent a race condition, see method for details - delay := cfg.Lifecycler.HeartbeatPeriod * 3 - go w.run(ctx, delay) - return w, nil -} - -func (w *configWatcher) ApplyConfig(cfg Config) error { - w.mut.Lock() - defer w.mut.Unlock() - - if util.CompareYAML(w.cfg, cfg) { - return nil - } - - if w.stopped { - return fmt.Errorf("configWatcher already stopped") - } - - w.cfg = cfg - return nil -} - -func (w *configWatcher) run(ctx context.Context, delay time.Duration) { - defer level.Info(w.log).Log("msg", "config watcher run loop exiting") - // This is due to a race condition between the heartbeat and config ring in a very narrow set of circumstances - // https://gist.github.com/mattdurham/c15f27de17a6da97bf2e6a870991c7f2 - time.Sleep(delay) - lastReshard := time.Now() - - for { - select { - case <-ctx.Done(): - return - case <-w.nextReshard(lastReshard): - level.Debug(w.log).Log("msg", "reshard timer ticked, scheduling refresh") - w.RequestRefresh() - lastReshard = time.Now() - case <-w.refreshCh: - err := w.refresh(ctx) - if err != nil { - level.Error(w.log).Log("msg", "refresh failed", "err", err) - } - case ev := <-w.store.Watch(): - level.Debug(w.log).Log("msg", "handling event from config store") - if err := w.handleEvent(ev); err != nil { - level.Error(w.log).Log("msg", "failed to handle changed or deleted config", "key", ev.Key, "err", err) - } - } - } -} - -// nextReshard returns a channel to that will fill a value when the reshard -// interval has elapsed. -func (w *configWatcher) nextReshard(lastReshard time.Time) <-chan time.Time { - w.mut.Lock() - nextReshard := lastReshard.Add(w.cfg.ReshardInterval) - w.mut.Unlock() - - remaining := time.Until(nextReshard) - - // NOTE(rfratto): clamping to 0 isn't necessary for time.After, - // but it makes the log message clearer to always use "0s" as - // "next reshard will be scheduled immediately." - if remaining < 0 { - remaining = 0 - } - - level.Debug(w.log).Log("msg", "waiting for next reshard interval", "last_reshard", lastReshard, "next_reshard", nextReshard, "remaining", remaining) - return time.After(remaining) -} - -// RequestRefresh will queue a refresh. No more than one refresh can be queued at a time. -func (w *configWatcher) RequestRefresh() { - select { - case w.refreshCh <- struct{}{}: - level.Debug(w.log).Log("msg", "successfully scheduled a refresh") - default: - level.Debug(w.log).Log("msg", "ignoring request refresh: refresh already scheduled") - } -} - -// refresh reloads all configs from the configstore. Deleted configs will be -// removed. refresh may not be called concurrently and must only be invoked from run. -// Call RequestRefresh to queue a call to refresh. -func (w *configWatcher) refresh(ctx context.Context) (err error) { - w.mut.Lock() - enabled := w.cfg.Enabled - refreshTimeout := w.cfg.ReshardTimeout - w.mut.Unlock() - - if !enabled { - level.Debug(w.log).Log("msg", "refresh skipped because clustering is disabled") - return nil - } - level.Info(w.log).Log("msg", "starting refresh") - - if refreshTimeout > 0 { - var cancel context.CancelFunc - ctx, cancel = context.WithTimeout(ctx, refreshTimeout) - defer cancel() - } - - start := time.Now() - defer func() { - success := "1" - if err != nil { - success = "0" - } - duration := time.Since(start) - level.Info(w.log).Log("msg", "refresh finished", "duration", duration, "success", success, "err", err) - reshardDuration.WithLabelValues(success).Observe(duration.Seconds()) - }() - - // This is used to determine if the context was already exceeded before calling the kv provider - if err = ctx.Err(); err != nil { - level.Error(w.log).Log("msg", "context deadline exceeded before calling store.all", "err", err) - return err - } - deadline, _ := ctx.Deadline() - level.Debug(w.log).Log("msg", "deadline before store.all", "deadline", deadline) - configs, err := w.store.All(ctx, func(key string) bool { - owns, err := w.owns(key) - if err != nil { - level.Error(w.log).Log("msg", "failed to check for ownership, instance will be deleted if it is running", "key", key, "err", err) - return false - } - return owns - }) - level.Debug(w.log).Log("msg", "count of configs from store.all", "count", len(configs)) - - if err != nil { - return fmt.Errorf("failed to get configs from store: %w", err) - } - - var ( - keys = make(map[string]struct{}) - firstError error - ) - -Outer: - for { - select { - case <-ctx.Done(): - return ctx.Err() - case cfg, ok := <-configs: - // w.store.All will close configs when all of them have been read. - if !ok { - break Outer - } - - if err := w.handleEvent(configstore.WatchEvent{Key: cfg.Name, Config: &cfg}); err != nil { - level.Error(w.log).Log("msg", "failed to process changed config", "key", cfg.Name, "err", err) - if firstError == nil { - firstError = err - } - } - - keys[cfg.Name] = struct{}{} - } - } - - // Any config we used to be running that disappeared from this most recent - // iteration should be deleted. We hold the lock just for the duration of - // populating deleted because handleEvent also grabs a hold on the lock. - var deleted []string - w.instanceMut.Lock() - for key := range w.instances { - if _, exist := keys[key]; exist { - continue - } - deleted = append(deleted, key) - } - w.instanceMut.Unlock() - - // Send a deleted event for any key that has gone away. - for _, key := range deleted { - if err := w.handleEvent(configstore.WatchEvent{Key: key, Config: nil}); err != nil { - level.Error(w.log).Log("msg", "failed to process changed config", "key", key, "err", err) - } - } - - return firstError -} - -func (w *configWatcher) handleEvent(ev configstore.WatchEvent) error { - w.mut.Lock() - defer w.mut.Unlock() - - if w.stopped { - return fmt.Errorf("configWatcher stopped") - } - - w.instanceMut.Lock() - defer w.instanceMut.Unlock() - - owned, err := w.owns(ev.Key) - if err != nil { - level.Error(w.log).Log("msg", "failed to see if config is owned. instance will be deleted if it is running", "err", err) - } - - var ( - _, isRunning = w.instances[ev.Key] - isDeleted = ev.Config == nil - ) - - switch { - // Two deletion scenarios: - // 1. A config we're running got moved to a new owner. - // 2. A config we're running got deleted - case (isRunning && !owned) || (isDeleted && isRunning): - if isDeleted { - level.Info(w.log).Log("msg", "untracking deleted config", "key", ev.Key) - } else { - level.Info(w.log).Log("msg", "untracking config that changed owners", "key", ev.Key) - } - - err := w.im.DeleteConfig(ev.Key) - delete(w.instances, ev.Key) - if err != nil { - return fmt.Errorf("failed to delete: %w", err) - } - - case !isDeleted && owned: - if err := w.validate(ev.Config); err != nil { - return fmt.Errorf( - "failed to validate config. %[1]s cannot run until the global settings are adjusted or the config is adjusted to operate within the global constraints. error: %[2]w", - ev.Key, err, - ) - } - - if _, exist := w.instances[ev.Key]; !exist { - level.Info(w.log).Log("msg", "tracking new config", "key", ev.Key) - } - - if err := w.im.ApplyConfig(*ev.Config); err != nil { - return fmt.Errorf("failed to apply config: %w", err) - } - w.instances[ev.Key] = struct{}{} - } - - return nil -} - -// Stop stops the configWatcher. Cannot be called more than once. -func (w *configWatcher) Stop() error { - w.mut.Lock() - defer w.mut.Unlock() - - if w.stopped { - return fmt.Errorf("already stopped") - } - w.stop() - w.stopped = true - - // Shut down all the instances that this configWatcher managed. It *MUST* - // happen after w.stop() is called to prevent the run loop from applying any - // new configs. - w.instanceMut.Lock() - defer w.instanceMut.Unlock() - - for key := range w.instances { - if err := w.im.DeleteConfig(key); err != nil { - level.Warn(w.log).Log("msg", "failed deleting config on shutdown", "key", key, "err", err) - } - } - w.instances = make(map[string]struct{}) - - return nil -} diff --git a/internal/static/metrics/cluster/config_watcher_test.go b/internal/static/metrics/cluster/config_watcher_test.go deleted file mode 100644 index e91bffe5d8..0000000000 --- a/internal/static/metrics/cluster/config_watcher_test.go +++ /dev/null @@ -1,267 +0,0 @@ -package cluster - -import ( - "context" - "testing" - "time" - - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/static/metrics/instance/configstore" - "github.com/grafana/agent/internal/util" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" -) - -func Test_configWatcher_Refresh(t *testing.T) { - var ( - log = util.TestLogger(t) - - cfg = DefaultConfig - store = configstore.Mock{ - WatchFunc: func() <-chan configstore.WatchEvent { - return make(chan configstore.WatchEvent) - }, - } - - im mockConfigManager - - validate = func(*instance.Config) error { return nil } - owned = func(key string) (bool, error) { return true, nil } - ) - cfg.Enabled = true - cfg.ReshardInterval = time.Hour - - w, err := newConfigWatcher(log, cfg, &store, &im, owned, validate) - require.NoError(t, err) - t.Cleanup(func() { _ = w.Stop() }) - - im.On("ApplyConfig", mock.Anything).Return(nil) - im.On("DeleteConfig", mock.Anything).Return(nil) - - // First: return a "hello" config. - store.AllFunc = func(ctx context.Context, keep func(key string) bool) (<-chan instance.Config, error) { - ch := make(chan instance.Config) - go func() { - ch <- instance.Config{Name: "hello"} - close(ch) - }() - return ch, nil - } - - err = w.refresh(context.Background()) - require.NoError(t, err) - - // Then: return a "new" config. - store.AllFunc = func(ctx context.Context, keep func(key string) bool) (<-chan instance.Config, error) { - ch := make(chan instance.Config, 1) - go func() { - ch <- instance.Config{Name: "new"} - close(ch) - }() - return ch, nil - } - - err = w.refresh(context.Background()) - require.NoError(t, err) - - // "hello" and "new" should've been applied, and "hello" should've been deleted - // from the second refresh. - im.AssertCalled(t, "ApplyConfig", instance.Config{Name: "hello"}) - im.AssertCalled(t, "ApplyConfig", instance.Config{Name: "new"}) - im.AssertCalled(t, "DeleteConfig", "hello") -} - -func Test_configWatcher_handleEvent(t *testing.T) { - var ( - cfg = DefaultConfig - store = configstore.Mock{ - WatchFunc: func() <-chan configstore.WatchEvent { - return make(chan configstore.WatchEvent) - }, - } - - validate = func(*instance.Config) error { return nil } - - owned = func(key string) (bool, error) { return true, nil } - unowned = func(key string) (bool, error) { return false, nil } - ) - cfg.Enabled = true - - t.Run("new owned config", func(t *testing.T) { - var ( - log = util.TestLogger(t) - im mockConfigManager - ) - - w, err := newConfigWatcher(log, cfg, &store, &im, owned, validate) - require.NoError(t, err) - t.Cleanup(func() { _ = w.Stop() }) - - im.On("ApplyConfig", mock.Anything).Return(nil) - im.On("DeleteConfig", mock.Anything).Return(nil) - - err = w.handleEvent(configstore.WatchEvent{Key: "new", Config: &instance.Config{}}) - require.NoError(t, err) - - im.AssertNumberOfCalls(t, "ApplyConfig", 1) - }) - - t.Run("updated owned config", func(t *testing.T) { - var ( - log = util.TestLogger(t) - im mockConfigManager - ) - - w, err := newConfigWatcher(log, cfg, &store, &im, owned, validate) - require.NoError(t, err) - t.Cleanup(func() { _ = w.Stop() }) - - im.On("ApplyConfig", mock.Anything).Return(nil) - im.On("DeleteConfig", mock.Anything).Return(nil) - - // One for create, one for update - err = w.handleEvent(configstore.WatchEvent{Key: "update", Config: &instance.Config{}}) - require.NoError(t, err) - - err = w.handleEvent(configstore.WatchEvent{Key: "update", Config: &instance.Config{}}) - require.NoError(t, err) - - im.AssertNumberOfCalls(t, "ApplyConfig", 2) - }) - - t.Run("new unowned config", func(t *testing.T) { - var ( - log = util.TestLogger(t) - im mockConfigManager - ) - - w, err := newConfigWatcher(log, cfg, &store, &im, unowned, validate) - require.NoError(t, err) - t.Cleanup(func() { _ = w.Stop() }) - - im.On("ApplyConfig", mock.Anything).Return(nil) - im.On("DeleteConfig", mock.Anything).Return(nil) - - // One for create, one for update - err = w.handleEvent(configstore.WatchEvent{Key: "unowned", Config: &instance.Config{}}) - require.NoError(t, err) - - im.AssertNumberOfCalls(t, "ApplyConfig", 0) - }) - - t.Run("lost ownership", func(t *testing.T) { - var ( - log = util.TestLogger(t) - - im mockConfigManager - - isOwned = true - owns = func(key string) (bool, error) { return isOwned, nil } - ) - - w, err := newConfigWatcher(log, cfg, &store, &im, owns, validate) - require.NoError(t, err) - t.Cleanup(func() { _ = w.Stop() }) - - im.On("ApplyConfig", mock.Anything).Return(nil) - im.On("DeleteConfig", mock.Anything).Return(nil) - - // One for create, then one for ownership change - err = w.handleEvent(configstore.WatchEvent{Key: "disappear", Config: &instance.Config{}}) - require.NoError(t, err) - - // Mark the config as unowned. The re-apply should then delete it. - isOwned = false - - err = w.handleEvent(configstore.WatchEvent{Key: "disappear", Config: &instance.Config{}}) - require.NoError(t, err) - - im.AssertNumberOfCalls(t, "ApplyConfig", 1) - im.AssertNumberOfCalls(t, "DeleteConfig", 1) - }) - - t.Run("deleted running config", func(t *testing.T) { - var ( - log = util.TestLogger(t) - - im mockConfigManager - ) - - w, err := newConfigWatcher(log, cfg, &store, &im, owned, validate) - require.NoError(t, err) - t.Cleanup(func() { _ = w.Stop() }) - - im.On("ApplyConfig", mock.Anything).Return(nil) - im.On("DeleteConfig", mock.Anything).Return(nil) - - // One for create, then one for deleted. - err = w.handleEvent(configstore.WatchEvent{Key: "new-key", Config: &instance.Config{}}) - require.NoError(t, err) - - err = w.handleEvent(configstore.WatchEvent{Key: "new-key", Config: nil}) - require.NoError(t, err) - - im.AssertNumberOfCalls(t, "ApplyConfig", 1) - im.AssertNumberOfCalls(t, "DeleteConfig", 1) - }) -} - -func Test_configWatcher_nextReshard(t *testing.T) { - watcher := &configWatcher{ - log: util.TestLogger(t), - cfg: Config{ReshardInterval: time.Second}, - } - - t.Run("past time", func(t *testing.T) { - select { - case <-watcher.nextReshard(time.Time{}): - case <-time.After(250 * time.Millisecond): - require.FailNow(t, "nextReshard did not return an already ready channel") - } - }) - - t.Run("future time", func(t *testing.T) { - select { - case <-watcher.nextReshard(time.Now()): - case <-time.After(1500 * time.Millisecond): - require.FailNow(t, "nextReshard took too long to return") - } - }) -} - -type mockConfigManager struct { - mock.Mock -} - -func (m *mockConfigManager) GetInstance(name string) (instance.ManagedInstance, error) { - args := m.Mock.Called() - return args.Get(0).(instance.ManagedInstance), args.Error(1) -} - -func (m *mockConfigManager) ListInstances() map[string]instance.ManagedInstance { - args := m.Mock.Called() - return args.Get(0).(map[string]instance.ManagedInstance) -} - -// ListConfigs implements Manager. -func (m *mockConfigManager) ListConfigs() map[string]instance.Config { - args := m.Mock.Called() - return args.Get(0).(map[string]instance.Config) -} - -// ApplyConfig implements Manager. -func (m *mockConfigManager) ApplyConfig(c instance.Config) error { - args := m.Mock.Called(c) - return args.Error(0) -} - -// DeleteConfig implements Manager. -func (m *mockConfigManager) DeleteConfig(name string) error { - args := m.Mock.Called(name) - return args.Error(0) -} - -// Stop implements Manager. -func (m *mockConfigManager) Stop() { - m.Mock.Called() -} diff --git a/internal/static/metrics/cluster/configapi/types.go b/internal/static/metrics/cluster/configapi/types.go deleted file mode 100644 index bf16b72bdb..0000000000 --- a/internal/static/metrics/cluster/configapi/types.go +++ /dev/null @@ -1,73 +0,0 @@ -package configapi - -import ( - "encoding/json" - "fmt" - "net/http" -) - -// APIResponse is the base object returned for any API call. -// The Data field will be set to either nil or a value of -// another *Response type value from this package. -type APIResponse struct { - Status string `json:"status"` - Data interface{} `json:"data,omitempty"` -} - -// WriteTo writes the response to the given ResponseWriter with the provided -// statusCode. -func (r *APIResponse) WriteTo(w http.ResponseWriter, statusCode int) error { - bb, err := json.Marshal(r) - if err != nil { - // If we fail here, we should at least write a 500 back. - w.WriteHeader(http.StatusInternalServerError) - return err - } - - w.WriteHeader(statusCode) - n, err := w.Write(bb) - if err != nil { - return err - } else if n != len(bb) { - return fmt.Errorf("could not write full response. expected %d, wrote %d", len(bb), n) - } - - return nil -} - -// ErrorResponse is contained inside an APIResponse and returns -// an error string. Returned by any API call that can fail. -type ErrorResponse struct { - Error string `json:"error"` -} - -// ListConfigurationsResponse is contained inside an APIResponse -// and provides the list of configurations known to the KV store. -// Returned by ListConfigurations. -type ListConfigurationsResponse struct { - // Configs is the list of configuration names. - Configs []string `json:"configs"` -} - -// GetConfigurationResponse is contained inside an APIResponse -// and provides a single configuration known to the KV store. -// Returned by GetConfiguration. -type GetConfigurationResponse struct { - // Value is the stringified YAML configuration. - Value string `json:"value"` -} - -// WriteResponse writes a response object to the provided ResponseWriter w and with a -// status code of statusCode. resp is marshaled to JSON. -func WriteResponse(w http.ResponseWriter, statusCode int, resp interface{}) error { - apiResp := &APIResponse{Status: "success", Data: resp} - w.Header().Set("Content-Type", "application/json") - return apiResp.WriteTo(w, statusCode) -} - -// WriteError writes an error response back to the ResponseWriter. -func WriteError(w http.ResponseWriter, statusCode int, err error) error { - resp := &APIResponse{Status: "error", Data: &ErrorResponse{Error: err.Error()}} - w.Header().Set("Content-Type", "application/json") - return resp.WriteTo(w, statusCode) -} diff --git a/internal/static/metrics/cluster/node.go b/internal/static/metrics/cluster/node.go deleted file mode 100644 index fab9bc6b94..0000000000 --- a/internal/static/metrics/cluster/node.go +++ /dev/null @@ -1,381 +0,0 @@ -package cluster - -import ( - "context" - "fmt" - "hash/fnv" - "net/http" - "sync" - "time" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/gorilla/mux" - pb "github.com/grafana/agent/internal/static/agentproto" - "github.com/grafana/agent/internal/static/metrics/cluster/client" - "github.com/grafana/agent/internal/util" - "github.com/grafana/dskit/backoff" - "github.com/grafana/dskit/kv" - "github.com/grafana/dskit/ring" - "github.com/grafana/dskit/services" - "github.com/grafana/dskit/user" - "github.com/prometheus/client_golang/prometheus" -) - -const ( - // agentKey is the key used for storing the hash ring. - agentKey = "agent" -) - -var backoffConfig = backoff.Config{ - MinBackoff: time.Second, - MaxBackoff: 2 * time.Minute, - MaxRetries: 10, -} - -// node manages membership within a ring. when a node joins or leaves the ring, -// it will inform other nodes to reshard their workloads. After a node joins -// the ring, it will inform the local service to reshard. -type node struct { - log log.Logger - reg *util.Unregisterer - srv pb.ScrapingServiceServer - - mut sync.RWMutex - cfg Config - ring *ring.Ring - lc *ring.Lifecycler - - exited bool - reload chan struct{} -} - -// newNode creates a new node and registers it to the ring. -func newNode(reg prometheus.Registerer, log log.Logger, cfg Config, s pb.ScrapingServiceServer) (*node, error) { - n := &node{ - reg: util.WrapWithUnregisterer(reg), - srv: s, - log: log, - - reload: make(chan struct{}, 1), - } - if err := n.ApplyConfig(cfg); err != nil { - return nil, err - } - go n.run() - return n, nil -} - -func (n *node) ApplyConfig(cfg Config) error { - n.mut.Lock() - defer n.mut.Unlock() - - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) - defer cancel() - - // Detect if the config changed. - if util.CompareYAML(n.cfg, cfg) { - return nil - } - - if n.exited { - return fmt.Errorf("node already exited") - } - - level.Info(n.log).Log("msg", "applying config") - - // Shut down old components before re-creating the updated ones. - n.reg.UnregisterAll() - - if n.lc != nil { - // Note that this will call performClusterReshard and will block until it - // completes. - err := services.StopAndAwaitTerminated(ctx, n.lc) - if err != nil { - return fmt.Errorf("failed to stop lifecycler: %w", err) - } - n.lc = nil - } - - if n.ring != nil { - err := services.StopAndAwaitTerminated(ctx, n.ring) - if err != nil { - return fmt.Errorf("failed to stop ring: %w", err) - } - n.ring = nil - } - - if !cfg.Enabled { - n.cfg = cfg - return nil - } - - r, err := newRing(cfg.Lifecycler.RingConfig, "agent_viewer", agentKey, n.reg, n.log) - if err != nil { - return fmt.Errorf("failed to create ring: %w", err) - } - - if err := services.StartAndAwaitRunning(context.Background(), r); err != nil { - return fmt.Errorf("failed to start ring: %w", err) - } - n.ring = r - - lc, err := ring.NewLifecycler(cfg.Lifecycler.LifecyclerConfig, n, "agent", agentKey, false, n.log, prometheus.WrapRegistererWithPrefix("agent_dskit_", n.reg)) - if err != nil { - return fmt.Errorf("failed to create lifecycler: %w", err) - } - if err := services.StartAndAwaitRunning(context.Background(), lc); err != nil { - if err := services.StopAndAwaitTerminated(ctx, r); err != nil { - level.Error(n.log).Log("msg", "failed to stop ring when returning error. next config reload will fail", "err", err) - } - return fmt.Errorf("failed to start lifecycler: %w", err) - } - n.lc = lc - - n.cfg = cfg - - // Reload and reshard the cluster. - n.reload <- struct{}{} - return nil -} - -// newRing creates a new Cortex Ring that ignores unhealthy nodes. -func newRing(cfg ring.Config, name, key string, reg prometheus.Registerer, log log.Logger) (*ring.Ring, error) { - codec := ring.GetCodec() - store, err := kv.NewClient( - cfg.KVStore, - codec, - kv.RegistererWithKVName(reg, name+"-ring"), - log, - ) - if err != nil { - return nil, err - } - return ring.NewWithStoreClientAndStrategy(cfg, name, key, store, ring.NewIgnoreUnhealthyInstancesReplicationStrategy(), prometheus.WrapRegistererWithPrefix("agent_dskit_", reg), log) -} - -// run waits for connection to the ring and kickstarts the join process. -func (n *node) run() { - for range n.reload { - n.mut.RLock() - - if err := n.performClusterReshard(context.Background(), true); err != nil { - level.Warn(n.log).Log("msg", "dynamic cluster reshard did not succeed", "err", err) - } - - n.mut.RUnlock() - } - - level.Info(n.log).Log("msg", "node run loop exiting") -} - -// performClusterReshard informs the cluster to immediately trigger a reshard -// of their workloads. if joining is true, the server provided to newNode will -// also be informed. -func (n *node) performClusterReshard(ctx context.Context, joining bool) error { - if n.ring == nil || n.lc == nil { - level.Info(n.log).Log("msg", "node disabled, not resharding") - return nil - } - - if n.cfg.ClusterReshardEventTimeout > 0 { - var cancel context.CancelFunc - ctx, cancel = context.WithTimeout(ctx, n.cfg.ClusterReshardEventTimeout) - defer cancel() - } - - var ( - rs ring.ReplicationSet - err error - ) - - backoff := backoff.New(ctx, backoffConfig) - for backoff.Ongoing() { - if ctx.Err() != nil { - return ctx.Err() - } - rs, err = n.ring.GetAllHealthy(ring.Read) - if err == nil { - break - } - backoff.Wait() - } - - if len(rs.Instances) > 0 { - level.Info(n.log).Log("msg", "informing remote nodes to reshard") - } - - // These are not in the go routine below due to potential race condition with n.lc.addr - _, err = rs.Do(ctx, 500*time.Millisecond, func(c context.Context, id *ring.InstanceDesc) (interface{}, error) { - // Skip over ourselves. - if id.Addr == n.lc.Addr { - return nil, nil - } - - notifyCtx := user.InjectOrgID(c, "fake") - return nil, n.notifyReshard(notifyCtx, id) - }) - - if err != nil { - level.Error(n.log).Log("msg", "notifying other nodes failed", "err", err) - } - - if joining { - level.Info(n.log).Log("msg", "running local reshard") - if _, err := n.srv.Reshard(ctx, &pb.ReshardRequest{}); err != nil { - level.Warn(n.log).Log("msg", "dynamic local reshard did not succeed", "err", err) - } - } - return err -} - -// notifyReshard informs an individual node to reshard. -func (n *node) notifyReshard(ctx context.Context, id *ring.InstanceDesc) error { - cli, err := client.New(n.cfg.Client, id.Addr) - if err != nil { - return err - } - defer cli.Close() - - level.Info(n.log).Log("msg", "attempting to notify remote agent to reshard", "addr", id.Addr) - - backoff := backoff.New(ctx, backoffConfig) - for backoff.Ongoing() { - if ctx.Err() != nil { - return ctx.Err() - } - _, err := cli.Reshard(ctx, &pb.ReshardRequest{}) - if err == nil { - break - } - - level.Warn(n.log).Log("msg", "reshard notification attempt failed", "addr", id.Addr, "err", err, "attempt", backoff.NumRetries()) - backoff.Wait() - } - - return backoff.Err() -} - -// WaitJoined waits for the node the join the cluster and enter the -// ACTIVE state. -func (n *node) WaitJoined(ctx context.Context) error { - n.mut.RLock() - defer n.mut.RUnlock() - - level.Info(n.log).Log("msg", "waiting for the node to join the cluster") - defer level.Info(n.log).Log("msg", "node has joined the cluster") - - if n.ring == nil || n.lc == nil { - return fmt.Errorf("node disabled") - } - - return waitJoined(ctx, agentKey, n.ring.KVClient, n.lc.ID) -} - -func waitJoined(ctx context.Context, key string, kvClient kv.Client, id string) error { - kvClient.WatchKey(ctx, key, func(value interface{}) bool { - if value == nil { - return true - } - - desc := value.(*ring.Desc) - for ingID, ing := range desc.Ingesters { - if ingID == id && ing.State == ring.ACTIVE { - return false - } - } - - return true - }) - - return ctx.Err() -} - -func (n *node) WireAPI(r *mux.Router) { - r.HandleFunc("/debug/ring", func(rw http.ResponseWriter, r *http.Request) { - n.mut.RLock() - defer n.mut.RUnlock() - - if n.ring == nil { - http.NotFoundHandler().ServeHTTP(rw, r) - return - } - - n.ring.ServeHTTP(rw, r) - }) -} - -// Stop stops the node and cancels it from running. The node cannot be used -// again once Stop is called. -func (n *node) Stop() error { - n.mut.Lock() - defer n.mut.Unlock() - - if n.exited { - return fmt.Errorf("node already exited") - } - n.exited = true - - level.Info(n.log).Log("msg", "shutting down node") - - // Shut down dependencies. The lifecycler *MUST* be shut down first since n.ring is - // used during the shutdown process to inform other nodes to reshard. - // - // Note that stopping the lifecycler will call performClusterReshard and will block - // until it completes. - var ( - firstError error - deps []services.Service - ) - - if n.lc != nil { - deps = append(deps, n.lc) - } - if n.ring != nil { - deps = append(deps, n.ring) - } - for _, dep := range deps { - err := services.StopAndAwaitTerminated(context.Background(), dep) - if err != nil && firstError == nil { - firstError = err - } - } - - close(n.reload) - level.Info(n.log).Log("msg", "node shut down") - return firstError -} - -// Flush implements ring.FlushTransferer. It's a no-op. -func (n *node) Flush() {} - -// TransferOut implements ring.FlushTransferer. It connects to all other healthy agents and -// tells them to reshard. TransferOut should NOT be called manually unless the mutex is -// held. -func (n *node) TransferOut(ctx context.Context) error { - return n.performClusterReshard(ctx, false) -} - -// Owns checks to see if a key is owned by this node. owns will return -// an error if the ring is empty or if there aren't enough healthy nodes. -func (n *node) Owns(key string) (bool, error) { - n.mut.RLock() - defer n.mut.RUnlock() - - rs, err := n.ring.Get(keyHash(key), ring.Write, nil, nil, nil) - if err != nil { - return false, err - } - for _, r := range rs.Instances { - if r.Addr == n.lc.Addr { - return true, nil - } - } - return false, nil -} - -func keyHash(key string) uint32 { - h := fnv.New32() - _, _ = h.Write([]byte(key)) - return h.Sum32() -} diff --git a/internal/static/metrics/cluster/node_test.go b/internal/static/metrics/cluster/node_test.go deleted file mode 100644 index 15df4ab7fb..0000000000 --- a/internal/static/metrics/cluster/node_test.go +++ /dev/null @@ -1,223 +0,0 @@ -package cluster - -import ( - "context" - "flag" - "fmt" - "math/rand" - "net" - "testing" - "time" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/golang/protobuf/ptypes/empty" - "github.com/grafana/agent/internal/static/agentproto" - "github.com/grafana/agent/internal/util" - "github.com/grafana/dskit/ring" - "github.com/grafana/dskit/services" - "github.com/prometheus/client_golang/prometheus" - "github.com/stretchr/testify/require" - "go.uber.org/atomic" - "google.golang.org/grpc" - "gopkg.in/yaml.v2" -) - -func Test_node_Join(t *testing.T) { - var ( - reg = prometheus.NewRegistry() - logger = util.TestLogger(t) - - localReshard = make(chan struct{}, 2) - remoteReshard = make(chan struct{}, 2) - ) - - local := &agentproto.FuncScrapingServiceServer{ - ReshardFunc: func(c context.Context, rr *agentproto.ReshardRequest) (*empty.Empty, error) { - localReshard <- struct{}{} - return &empty.Empty{}, nil - }, - } - - remote := &agentproto.FuncScrapingServiceServer{ - ReshardFunc: func(c context.Context, rr *agentproto.ReshardRequest) (*empty.Empty, error) { - remoteReshard <- struct{}{} - return &empty.Empty{}, nil - }, - } - startNode(t, remote, logger) - - nodeConfig := DefaultConfig - nodeConfig.Enabled = true - nodeConfig.Lifecycler.LifecyclerConfig = testLifecyclerConfig(t) - - n, err := newNode(reg, logger, nodeConfig, local) - require.NoError(t, err) - t.Cleanup(func() { _ = n.Stop() }) - - require.NoError(t, n.WaitJoined(context.Background())) - - waitAll(t, remoteReshard, localReshard) -} - -// waitAll waits for a message on all channels. -func waitAll(t *testing.T, chs ...chan struct{}) { - timeoutCh := time.After(5 * time.Second) - for _, ch := range chs { - select { - case <-timeoutCh: - require.FailNow(t, "timeout exceeded") - case <-ch: - } - } -} - -func Test_node_Leave(t *testing.T) { - var ( - reg = prometheus.NewRegistry() - logger = util.TestLogger(t) - - sendReshard = atomic.NewBool(false) - remoteReshard = make(chan struct{}, 2) - ) - - local := &agentproto.FuncScrapingServiceServer{ - ReshardFunc: func(c context.Context, rr *agentproto.ReshardRequest) (*empty.Empty, error) { - return &empty.Empty{}, nil - }, - } - - remote := &agentproto.FuncScrapingServiceServer{ - ReshardFunc: func(c context.Context, rr *agentproto.ReshardRequest) (*empty.Empty, error) { - if sendReshard.Load() { - remoteReshard <- struct{}{} - } - return &empty.Empty{}, nil - }, - } - startNode(t, remote, logger) - - nodeConfig := DefaultConfig - nodeConfig.Enabled = true - nodeConfig.Lifecycler.LifecyclerConfig = testLifecyclerConfig(t) - - n, err := newNode(reg, logger, nodeConfig, local) - require.NoError(t, err) - require.NoError(t, n.WaitJoined(context.Background())) - - // Update the reshard function to write to remoteReshard on shutdown. - sendReshard.Store(true) - - // Stop the node so it transfers data outward. - require.NoError(t, n.Stop(), "failed to stop the node") - - level.Info(logger).Log("msg", "waiting for remote reshard to occur") - waitAll(t, remoteReshard) -} - -func Test_node_ApplyConfig(t *testing.T) { - var ( - reg = prometheus.NewRegistry() - logger = util.TestLogger(t) - - localReshard = make(chan struct{}, 10) - ) - - local := &agentproto.FuncScrapingServiceServer{ - ReshardFunc: func(c context.Context, rr *agentproto.ReshardRequest) (*empty.Empty, error) { - localReshard <- struct{}{} - return &empty.Empty{}, nil - }, - } - - nodeConfig := DefaultConfig - nodeConfig.Enabled = true - nodeConfig.Lifecycler.LifecyclerConfig = testLifecyclerConfig(t) - - n, err := newNode(reg, logger, nodeConfig, local) - require.NoError(t, err) - t.Cleanup(func() { _ = n.Stop() }) - require.NoError(t, n.WaitJoined(context.Background())) - - // Wait for the initial join to trigger. - waitAll(t, localReshard) - - // An ApplyConfig working correctly should re-join the cluster, which can be - // detected by local resharding applying twice. - nodeConfig.Lifecycler.NumTokens = 1 - require.NoError(t, n.ApplyConfig(nodeConfig), "failed to apply new config") - require.NoError(t, n.WaitJoined(context.Background())) - - waitAll(t, localReshard) -} - -// startNode launches srv as a gRPC server and registers it to the ring. -func startNode(t *testing.T, srv agentproto.ScrapingServiceServer, logger log.Logger) { - t.Helper() - - l, err := net.Listen("tcp", "127.0.0.1:0") - require.NoError(t, err) - - grpcServer := grpc.NewServer() - agentproto.RegisterScrapingServiceServer(grpcServer, srv) - - go func() { - _ = grpcServer.Serve(l) - }() - t.Cleanup(func() { grpcServer.Stop() }) - - lcConfig := testLifecyclerConfig(t) - lcConfig.Addr = l.Addr().(*net.TCPAddr).IP.String() - lcConfig.Port = l.Addr().(*net.TCPAddr).Port - - lc, err := ring.NewLifecycler(lcConfig, ring.NewNoopFlushTransferer(), "agent", "agent", false, logger, nil) - require.NoError(t, err) - - err = services.StartAndAwaitRunning(context.Background(), lc) - require.NoError(t, err) - - // Wait for the new node to be in the ring. - joinWaitCtx, joinWaitCancel := context.WithTimeout(context.Background(), 5*time.Second) - defer joinWaitCancel() - err = waitJoined(joinWaitCtx, agentKey, lc.KVStore, lc.ID) - require.NoError(t, err) - - t.Cleanup(func() { - _ = services.StopAndAwaitTerminated(context.Background(), lc) - }) -} - -func testLifecyclerConfig(t *testing.T) ring.LifecyclerConfig { - t.Helper() - - cfgText := util.Untab(fmt.Sprintf(` -ring: - kvstore: - store: inmemory - prefix: tests/%s -final_sleep: 0s -min_ready_duration: 0s - `, t.Name())) - - // Apply default values by registering to a fake flag set. - var lc ring.LifecyclerConfig - lc.RegisterFlagsWithPrefix("", flag.NewFlagSet("", flag.ContinueOnError), log.NewNopLogger()) - - err := yaml.Unmarshal([]byte(cfgText), &lc) - require.NoError(t, err) - - // Assign a random default ID. - var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") - name := make([]rune, 10) - for i := range name { - name[i] = letters[rand.Intn(len(letters))] - } - lc.ID = string(name) - - // Add an invalid default address/port. Tests can override if they expect - // incoming traffic. - lc.Addr = "x.x.x.x" - lc.Port = -1 - - return lc -} diff --git a/internal/static/metrics/cluster/validation.go b/internal/static/metrics/cluster/validation.go deleted file mode 100644 index 6821a8beeb..0000000000 --- a/internal/static/metrics/cluster/validation.go +++ /dev/null @@ -1,150 +0,0 @@ -package cluster - -import ( - "fmt" - - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/loki/clients/pkg/promtail/discovery/consulagent" - "github.com/prometheus/common/config" - "github.com/prometheus/prometheus/discovery" - "github.com/prometheus/prometheus/discovery/aws" - "github.com/prometheus/prometheus/discovery/azure" - "github.com/prometheus/prometheus/discovery/consul" - "github.com/prometheus/prometheus/discovery/digitalocean" - "github.com/prometheus/prometheus/discovery/dns" - "github.com/prometheus/prometheus/discovery/eureka" - "github.com/prometheus/prometheus/discovery/file" - "github.com/prometheus/prometheus/discovery/gce" - "github.com/prometheus/prometheus/discovery/hetzner" - "github.com/prometheus/prometheus/discovery/http" - "github.com/prometheus/prometheus/discovery/kubernetes" - "github.com/prometheus/prometheus/discovery/linode" - "github.com/prometheus/prometheus/discovery/marathon" - "github.com/prometheus/prometheus/discovery/moby" - "github.com/prometheus/prometheus/discovery/openstack" - "github.com/prometheus/prometheus/discovery/scaleway" - "github.com/prometheus/prometheus/discovery/triton" - "github.com/prometheus/prometheus/discovery/zookeeper" -) - -func validateNofiles(c *instance.Config) error { - for i, rw := range c.RemoteWrite { - if err := validateHTTPNoFiles(&rw.HTTPClientConfig); err != nil { - return fmt.Errorf("failed to validate remote_write at index %d: %w", i, err) - } - } - - for i, sc := range c.ScrapeConfigs { - if err := validateHTTPNoFiles(&sc.HTTPClientConfig); err != nil { - return fmt.Errorf("failed to validate scrape_config at index %d: %w", i, err) - } - - for j, disc := range sc.ServiceDiscoveryConfigs { - if err := validateDiscoveryNoFiles(disc); err != nil { - return fmt.Errorf("failed to validate service discovery at index %d within scrape_config at index %d: %w", j, i, err) - } - } - } - - return nil -} - -func validateHTTPNoFiles(cfg *config.HTTPClientConfig) error { - checks := []struct { - name string - check func() bool - }{ - {"bearer_token_file", func() bool { return cfg.BearerTokenFile != "" }}, - {"password_file", func() bool { return cfg.BasicAuth != nil && cfg.BasicAuth.PasswordFile != "" }}, - {"credentials_file", func() bool { return cfg.Authorization != nil && cfg.Authorization.CredentialsFile != "" }}, - {"ca_file", func() bool { return cfg.TLSConfig.CAFile != "" }}, - {"cert_file", func() bool { return cfg.TLSConfig.CertFile != "" }}, - {"key_file", func() bool { return cfg.TLSConfig.KeyFile != "" }}, - } - for _, check := range checks { - if check.check() { - return fmt.Errorf("%s must be empty unless dangerous_allow_reading_files is set", check.name) - } - } - return nil -} - -func validateDiscoveryNoFiles(disc discovery.Config) error { - switch d := disc.(type) { - case discovery.StaticConfig: - // no-op - case *azure.SDConfig: - // no-op - case *consul.SDConfig: - if err := validateHTTPNoFiles(&config.HTTPClientConfig{TLSConfig: d.HTTPClientConfig.TLSConfig}); err != nil { - return err - } - case *consulagent.SDConfig: - if err := validateHTTPNoFiles(&config.HTTPClientConfig{TLSConfig: d.TLSConfig}); err != nil { - return err - } - case *digitalocean.SDConfig: - if err := validateHTTPNoFiles(&d.HTTPClientConfig); err != nil { - return err - } - case *dns.SDConfig: - // no-op - case *moby.DockerSwarmSDConfig: - if err := validateHTTPNoFiles(&d.HTTPClientConfig); err != nil { - return err - } - case *aws.EC2SDConfig: - // no-op - case *eureka.SDConfig: - if err := validateHTTPNoFiles(&d.HTTPClientConfig); err != nil { - return err - } - case *file.SDConfig: - // no-op - case *gce.SDConfig: - // no-op - case *hetzner.SDConfig: - if err := validateHTTPNoFiles(&d.HTTPClientConfig); err != nil { - return err - } - case *kubernetes.SDConfig: - if err := validateHTTPNoFiles(&d.HTTPClientConfig); err != nil { - return err - } - case *marathon.SDConfig: - if err := validateHTTPNoFiles(&d.HTTPClientConfig); err != nil { - return err - } - if d.AuthTokenFile != "" { - return fmt.Errorf("auth_token_file must be empty unless dangerous_allow_reading_files is set") - } - case *openstack.SDConfig: - if err := validateHTTPNoFiles(&config.HTTPClientConfig{TLSConfig: d.TLSConfig}); err != nil { - return err - } - case *scaleway.SDConfig: - if err := validateHTTPNoFiles(&d.HTTPClientConfig); err != nil { - return err - } - case *triton.SDConfig: - if err := validateHTTPNoFiles(&config.HTTPClientConfig{TLSConfig: d.TLSConfig}); err != nil { - return err - } - case *http.SDConfig: - if err := validateHTTPNoFiles(&d.HTTPClientConfig); err != nil { - return err - } - case *linode.SDConfig: - if err := validateHTTPNoFiles(&d.HTTPClientConfig); err != nil { - return err - } - case *zookeeper.NerveSDConfig: - // no-op - case *zookeeper.ServersetSDConfig: - // no-op - default: - return fmt.Errorf("unknown service discovery %s; rejecting config for safety. set dangerous_allow_reading_files to ignore", d.Name()) - } - - return nil -} diff --git a/internal/static/metrics/cluster/validation_test.go b/internal/static/metrics/cluster/validation_test.go deleted file mode 100644 index 180a0bfabb..0000000000 --- a/internal/static/metrics/cluster/validation_test.go +++ /dev/null @@ -1,118 +0,0 @@ -package cluster - -import ( - "fmt" - "strings" - "testing" - - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/util" - "github.com/stretchr/testify/require" -) - -func Test_validateNoFiles(t *testing.T) { - tt := []struct { - name string - input string - expect error - }{ - { - name: "valid config", - input: util.Untab(` - scrape_configs: - - job_name: innocent_scrape - static_configs: - - targets: ['127.0.0.1:12345'] - remote_write: - - url: http://localhost:9009/api/prom/push - `), - expect: nil, - }, - { - name: "all SDs", - input: util.Untab(` - scrape_configs: - - job_name: basic_sds - static_configs: - - targets: ['localhost'] - azure_sd_configs: - - subscription_id: fake - tenant_id: fake - client_id: fake - client_secret: fake - consul_sd_configs: - - {} - dns_sd_configs: - - names: ['fake'] - ec2_sd_configs: - - region: fake - eureka_sd_configs: - - server: http://localhost:80/eureka - file_sd_configs: - - files: ['fake.json'] - digitalocean_sd_configs: - - {} - dockerswarm_sd_configs: - - host: localhost - role: nodes - gce_sd_configs: - - project: fake - zone: fake - hetzner_sd_configs: - - role: hcloud - kubernetes_sd_configs: - - role: pod - marathon_sd_configs: - - servers: ['localhost'] - nerve_sd_configs: - - servers: ['localhost'] - paths: ['/'] - openstack_sd_configs: - - role: instance - region: fake - scaleway_sd_configs: - - role: instance - project_id: ffffffff-ffff-ffff-ffff-ffffffffffff - secret_key: ffffffff-ffff-ffff-ffff-ffffffffffff - access_key: SCWXXXXXXXXXXXXXXXXX - serverset_sd_configs: - - servers: ['localhost'] - paths: ['/'] - triton_sd_configs: - - account: fake - dns_suffix: fake - endpoint: fake - `), - expect: nil, - }, - { - name: "invalid http client config", - input: util.Untab(` - scrape_configs: - - job_name: malicious_scrape - static_configs: - - targets: ['badsite.com'] - basic_auth: - username: file_leak - password_file: /etc/password - remote_write: - - url: http://localhost:9009/api/prom/push - `), - expect: fmt.Errorf("failed to validate scrape_config at index 0: password_file must be empty unless dangerous_allow_reading_files is set"), - }, - } - - for _, tc := range tt { - t.Run(tc.name, func(t *testing.T) { - cfg, err := instance.UnmarshalConfig(strings.NewReader(tc.input)) - require.NoError(t, err) - - actual := validateNofiles(cfg) - if tc.expect == nil { - require.NoError(t, actual) - } else { - require.EqualError(t, actual, tc.expect.Error()) - } - }) - } -} diff --git a/internal/static/metrics/http.go b/internal/static/metrics/http.go deleted file mode 100644 index 51e5aa64e6..0000000000 --- a/internal/static/metrics/http.go +++ /dev/null @@ -1,166 +0,0 @@ -package metrics - -import ( - "fmt" - "net/http" - "net/url" - "sort" - "time" - - "github.com/go-kit/log/level" - "github.com/gorilla/mux" - "github.com/grafana/agent/internal/static/metrics/cluster/configapi" - "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/scrape" -) - -// WireAPI adds API routes to the provided mux router. -func (a *Agent) WireAPI(r *mux.Router) { - a.cluster.WireAPI(r) - - // Backwards compatible endpoints. Use endpoints with `metrics` prefix instead - r.HandleFunc("/agent/api/v1/instances", a.ListInstancesHandler).Methods("GET") - r.HandleFunc("/agent/api/v1/targets", a.ListTargetsHandler).Methods("GET") - - r.HandleFunc("/agent/api/v1/metrics/instances", a.ListInstancesHandler).Methods("GET") - r.HandleFunc("/agent/api/v1/metrics/targets", a.ListTargetsHandler).Methods("GET") - r.HandleFunc("/agent/api/v1/metrics/instance/{instance}/write", a.PushMetricsHandler).Methods("POST") -} - -// ListInstancesHandler writes the set of currently running instances to the http.ResponseWriter. -func (a *Agent) ListInstancesHandler(w http.ResponseWriter, _ *http.Request) { - cfgs := a.mm.ListConfigs() - instanceNames := make([]string, 0, len(cfgs)) - for k := range cfgs { - instanceNames = append(instanceNames, k) - } - sort.Strings(instanceNames) - - err := configapi.WriteResponse(w, http.StatusOK, instanceNames) - if err != nil { - level.Error(a.logger).Log("msg", "failed to write response", "err", err) - } -} - -// ListTargetsHandler retrieves the full set of targets across all instances and shows -// information on them. -func (a *Agent) ListTargetsHandler(w http.ResponseWriter, r *http.Request) { - instances := a.mm.ListInstances() - allTagets := make(map[string]TargetSet, len(instances)) - for instName, inst := range instances { - allTagets[instName] = inst.TargetsActive() - } - ListTargetsHandler(allTagets).ServeHTTP(w, r) -} - -// ListTargetsHandler renders a mapping of instance to target set. -func ListTargetsHandler(targets map[string]TargetSet) http.Handler { - return http.HandlerFunc(func(rw http.ResponseWriter, _ *http.Request) { - resp := ListTargetsResponse{} - - for instance, tset := range targets { - for key, targets := range tset { - for _, tgt := range targets { - var lastError string - if scrapeError := tgt.LastError(); scrapeError != nil { - lastError = scrapeError.Error() - } - - resp = append(resp, TargetInfo{ - InstanceName: instance, - TargetGroup: key, - - Endpoint: tgt.URL().String(), - State: string(tgt.Health()), - DiscoveredLabels: tgt.DiscoveredLabels(), - Labels: tgt.Labels(), - LastScrape: tgt.LastScrape(), - ScrapeDuration: tgt.LastScrapeDuration().Milliseconds(), - ScrapeError: lastError, - }) - } - } - } - - sort.Slice(resp, func(i, j int) bool { - // sort by instance, then target group, then job label, then instance label - var ( - iInstance = resp[i].InstanceName - iTargetGroup = resp[i].TargetGroup - iJobLabel = resp[i].Labels.Get(model.JobLabel) - iInstanceLabel = resp[i].Labels.Get(model.InstanceLabel) - - jInstance = resp[j].InstanceName - jTargetGroup = resp[j].TargetGroup - jJobLabel = resp[j].Labels.Get(model.JobLabel) - jInstanceLabel = resp[j].Labels.Get(model.InstanceLabel) - ) - - switch { - case iInstance != jInstance: - return iInstance < jInstance - case iTargetGroup != jTargetGroup: - return iTargetGroup < jTargetGroup - case iJobLabel != jJobLabel: - return iJobLabel < jJobLabel - default: - return iInstanceLabel < jInstanceLabel - } - }) - - _ = configapi.WriteResponse(rw, http.StatusOK, resp) - }) -} - -// TargetSet is a set of targets for an individual scraper. -type TargetSet map[string][]*scrape.Target - -// ListTargetsResponse is returned by the ListTargetsHandler. -type ListTargetsResponse []TargetInfo - -// TargetInfo describes a specific target. -type TargetInfo struct { - InstanceName string `json:"instance"` - TargetGroup string `json:"target_group"` - - Endpoint string `json:"endpoint"` - State string `json:"state"` - Labels labels.Labels `json:"labels"` - DiscoveredLabels labels.Labels `json:"discovered_labels"` - LastScrape time.Time `json:"last_scrape"` - ScrapeDuration int64 `json:"scrape_duration_ms"` - ScrapeError string `json:"scrape_error"` -} - -// PushMetricsHandler provides a way to POST data directly into -// an instance's WAL. -func (a *Agent) PushMetricsHandler(w http.ResponseWriter, r *http.Request) { - // Get instance name. - instanceName, err := getInstanceName(r) - if err != nil { - http.Error(w, err.Error(), http.StatusBadRequest) - return - } - - // Get the metrics instance and serve the request. - managedInstance, err := a.InstanceManager().GetInstance(instanceName) - if err != nil || managedInstance == nil { - http.Error(w, err.Error(), http.StatusBadRequest) - return - } - - managedInstance.WriteHandler().ServeHTTP(w, r) -} - -// getInstanceName uses gorilla/mux's route variables to extract the -// "instance" variable. If not found, getInstanceName will return an error. -func getInstanceName(r *http.Request) (string, error) { - vars := mux.Vars(r) - name := vars["instance"] - name, err := url.PathUnescape(name) - if err != nil { - return "", fmt.Errorf("could not decode instance name: %w", err) - } - return name, nil -} diff --git a/internal/static/metrics/http_test.go b/internal/static/metrics/http_test.go deleted file mode 100644 index 7f557a5a40..0000000000 --- a/internal/static/metrics/http_test.go +++ /dev/null @@ -1,142 +0,0 @@ -package metrics - -import ( - "fmt" - "net/http" - "net/http/httptest" - "testing" - "time" - - "github.com/go-kit/log" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/util" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/scrape" - "github.com/stretchr/testify/require" -) - -func TestAgent_ListInstancesHandler(t *testing.T) { - fact := newFakeInstanceFactory() - a, err := newAgent(prometheus.NewRegistry(), Config{ - WALDir: "/tmp/agent", - }, log.NewNopLogger(), fact.factory) - require.NoError(t, err) - defer a.Stop() - - r := httptest.NewRequest("GET", "/agent/api/v1/metrics/instances", nil) - - t.Run("no instances", func(t *testing.T) { - rr := httptest.NewRecorder() - a.ListInstancesHandler(rr, r) - expect := `{"status":"success","data":[]}` - require.Equal(t, expect, rr.Body.String()) - }) - - t.Run("non-empty", func(t *testing.T) { - require.NoError(t, a.mm.ApplyConfig(makeInstanceConfig("foo"))) - require.NoError(t, a.mm.ApplyConfig(makeInstanceConfig("bar"))) - - expect := `{"status":"success","data":["bar","foo"]}` - util.Eventually(t, func(t require.TestingT) { - rr := httptest.NewRecorder() - a.ListInstancesHandler(rr, r) - require.Equal(t, expect, rr.Body.String()) - }) - }) -} - -func TestAgent_ListTargetsHandler(t *testing.T) { - fact := newFakeInstanceFactory() - a, err := newAgent(prometheus.NewRegistry(), Config{ - WALDir: "/tmp/agent", - }, log.NewNopLogger(), fact.factory) - require.NoError(t, err) - - mockManager := &instance.MockManager{ - ListInstancesFunc: func() map[string]instance.ManagedInstance { return nil }, - ListConfigsFunc: func() map[string]instance.Config { return nil }, - ApplyConfigFunc: func(_ instance.Config) error { return nil }, - DeleteConfigFunc: func(name string) error { return nil }, - StopFunc: func() {}, - } - a.mm, err = instance.NewModalManager(prometheus.NewRegistry(), a.logger, mockManager, instance.ModeDistinct) - require.NoError(t, err) - - r := httptest.NewRequest("GET", "/agent/api/v1/metrics/targets", nil) - - t.Run("scrape manager not ready", func(t *testing.T) { - mockManager.ListInstancesFunc = func() map[string]instance.ManagedInstance { - return map[string]instance.ManagedInstance{ - "test_instance": &mockInstanceScrape{}, - } - } - - rr := httptest.NewRecorder() - a.ListTargetsHandler(rr, r) - expect := `{"status": "success", "data": []}` - require.JSONEq(t, expect, rr.Body.String()) - require.Equal(t, http.StatusOK, rr.Result().StatusCode) - }) - - t.Run("scrape manager targets", func(t *testing.T) { - tgt := scrape.NewTarget(labels.FromMap(map[string]string{ - model.JobLabel: "job", - model.InstanceLabel: "instance", - "foo": "bar", - model.SchemeLabel: "http", - model.AddressLabel: "localhost:12345", - model.MetricsPathLabel: "/metrics", - }), labels.FromMap(map[string]string{ - "__discovered__": "yes", - }), nil) - - startTime := time.Date(1994, time.January, 12, 0, 0, 0, 0, time.UTC) - tgt.Report(startTime, time.Minute, fmt.Errorf("something went wrong")) - - mockManager.ListInstancesFunc = func() map[string]instance.ManagedInstance { - return map[string]instance.ManagedInstance{ - "test_instance": &mockInstanceScrape{ - tgts: map[string][]*scrape.Target{ - "group_a": {tgt}, - }, - }, - } - } - - rr := httptest.NewRecorder() - a.ListTargetsHandler(rr, r) - expect := `{ - "status": "success", - "data": [{ - "instance": "test_instance", - "target_group": "group_a", - "endpoint": "http://localhost:12345/metrics", - "state": "down", - "labels": { - "foo": "bar", - "instance": "instance", - "job": "job" - }, - "discovered_labels": { - "__discovered__": "yes" - }, - "last_scrape": "1994-01-12T00:00:00Z", - "scrape_duration_ms": 60000, - "scrape_error":"something went wrong" - }] - }` - require.JSONEq(t, expect, rr.Body.String()) - require.Equal(t, http.StatusOK, rr.Result().StatusCode) - }) -} - -type mockInstanceScrape struct { - instance.NoOpInstance - tgts map[string][]*scrape.Target -} - -func (i *mockInstanceScrape) TargetsActive() map[string][]*scrape.Target { - return i.tgts -} diff --git a/internal/static/metrics/instance/configstore/api.go b/internal/static/metrics/instance/configstore/api.go deleted file mode 100644 index 552b6ba404..0000000000 --- a/internal/static/metrics/instance/configstore/api.go +++ /dev/null @@ -1,268 +0,0 @@ -package configstore - -import ( - "errors" - "fmt" - "io" - "net/http" - "net/url" - "strings" - "sync" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/gorilla/mux" - "github.com/grafana/agent/internal/static/metrics/cluster/configapi" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/prometheus/client_golang/prometheus" -) - -// API is an HTTP API to interact with a configstore. -type API struct { - log log.Logger - storeMut sync.Mutex - store Store - validator Validator - - totalCreatedConfigs prometheus.Counter - totalUpdatedConfigs prometheus.Counter - totalDeletedConfigs prometheus.Counter - - enableGet bool -} - -// Validator valides a config before putting it into the store. -// Validator is allowed to mutate the config and will only be given a copy. -type Validator = func(c *instance.Config) error - -// NewAPI creates a new API. Store can be applied later with SetStore. -func NewAPI(l log.Logger, store Store, v Validator, enableGet bool) *API { - return &API{ - log: l, - store: store, - validator: v, - - totalCreatedConfigs: prometheus.NewCounter(prometheus.CounterOpts{ - Name: "agent_metrics_ha_configs_created_total", - Help: "Total number of created scraping service configs", - }), - totalUpdatedConfigs: prometheus.NewCounter(prometheus.CounterOpts{ - Name: "agent_metrics_ha_configs_updated_total", - Help: "Total number of updated scraping service configs", - }), - totalDeletedConfigs: prometheus.NewCounter(prometheus.CounterOpts{ - Name: "agent_metrics_ha_configs_deleted_total", - Help: "Total number of deleted scraping service configs", - }), - enableGet: enableGet, - } -} - -// WireAPI injects routes into the provided mux router for the config -// store API. -func (api *API) WireAPI(r *mux.Router) { - // Support URL-encoded config names. The handlers will need to decode the - // name when reading the path variable. - r = r.UseEncodedPath() - - r.HandleFunc("/agent/api/v1/configs", api.ListConfigurations).Methods("GET") - getConfigHandler := messageHandlerFunc(http.StatusNotFound, "404 - config endpoint is disabled") - if api.enableGet { - getConfigHandler = api.GetConfiguration - } - r.HandleFunc("/agent/api/v1/configs/{name}", getConfigHandler).Methods("GET") - r.HandleFunc("/agent/api/v1/config/{name}", api.PutConfiguration).Methods("PUT", "POST") - r.HandleFunc("/agent/api/v1/config/{name}", api.DeleteConfiguration).Methods("DELETE") -} - -// Describe implements prometheus.Collector. -func (api *API) Describe(ch chan<- *prometheus.Desc) { - ch <- api.totalCreatedConfigs.Desc() - ch <- api.totalUpdatedConfigs.Desc() - ch <- api.totalDeletedConfigs.Desc() -} - -// Collect implements prometheus.Collector. -func (api *API) Collect(mm chan<- prometheus.Metric) { - mm <- api.totalCreatedConfigs - mm <- api.totalUpdatedConfigs - mm <- api.totalDeletedConfigs -} - -// ListConfigurations returns a list of configurations. -func (api *API) ListConfigurations(rw http.ResponseWriter, r *http.Request) { - api.storeMut.Lock() - defer api.storeMut.Unlock() - if api.store == nil { - api.writeError(rw, http.StatusNotFound, fmt.Errorf("no config store running")) - return - } - - keys, err := api.store.List(r.Context()) - if errors.Is(err, ErrNotConnected) { - api.writeError(rw, http.StatusNotFound, fmt.Errorf("no config store running")) - return - } else if err != nil { - api.writeError(rw, http.StatusInternalServerError, fmt.Errorf("failed to write config: %w", err)) - return - } - api.writeResponse(rw, http.StatusOK, configapi.ListConfigurationsResponse{Configs: keys}) -} - -// GetConfiguration gets an individual configuration. -func (api *API) GetConfiguration(rw http.ResponseWriter, r *http.Request) { - api.storeMut.Lock() - defer api.storeMut.Unlock() - if api.store == nil { - api.writeError(rw, http.StatusNotFound, fmt.Errorf("no config store running")) - return - } - - configKey, err := getConfigName(r) - if err != nil { - api.writeError(rw, http.StatusBadRequest, err) - return - } - - cfg, err := api.store.Get(r.Context(), configKey) - switch { - case errors.Is(err, ErrNotConnected): - api.writeError(rw, http.StatusNotFound, err) - case errors.As(err, &NotExistError{}): - api.writeError(rw, http.StatusNotFound, err) - case err != nil: - api.writeError(rw, http.StatusInternalServerError, err) - case err == nil: - bb, err := instance.MarshalConfig(&cfg, true) - if err != nil { - api.writeError(rw, http.StatusInternalServerError, fmt.Errorf("could not marshal config for response: %w", err)) - return - } - api.writeResponse(rw, http.StatusOK, &configapi.GetConfigurationResponse{ - Value: string(bb), - }) - } -} - -// PutConfiguration creates or updates a configuration. -func (api *API) PutConfiguration(rw http.ResponseWriter, r *http.Request) { - api.storeMut.Lock() - defer api.storeMut.Unlock() - if api.store == nil { - api.writeError(rw, http.StatusNotFound, fmt.Errorf("no config store running")) - return - } - - configName, err := getConfigName(r) - if err != nil { - api.writeError(rw, http.StatusBadRequest, err) - return - } - - var config strings.Builder - if _, err := io.Copy(&config, r.Body); err != nil { - api.writeError(rw, http.StatusInternalServerError, err) - return - } - - cfg, err := instance.UnmarshalConfig(strings.NewReader(config.String())) - if err != nil { - api.writeError(rw, http.StatusBadRequest, fmt.Errorf("could not unmarshal config: %w", err)) - return - } - cfg.Name = configName - - if api.validator != nil { - validateCfg, err := instance.UnmarshalConfig(strings.NewReader(config.String())) - if err != nil { - api.writeError(rw, http.StatusBadRequest, fmt.Errorf("could not unmarshal config: %w", err)) - return - } - validateCfg.Name = configName - - if err := api.validator(validateCfg); err != nil { - api.writeError(rw, http.StatusBadRequest, fmt.Errorf("failed to validate config: %w", err)) - return - } - } - - created, err := api.store.Put(r.Context(), *cfg) - switch { - case errors.Is(err, ErrNotConnected): - api.writeError(rw, http.StatusNotFound, err) - case errors.As(err, &NotUniqueError{}): - api.writeError(rw, http.StatusBadRequest, err) - case err != nil: - api.writeError(rw, http.StatusInternalServerError, err) - default: - if created { - api.totalCreatedConfigs.Inc() - api.writeResponse(rw, http.StatusCreated, nil) - } else { - api.totalUpdatedConfigs.Inc() - api.writeResponse(rw, http.StatusOK, nil) - } - } -} - -// DeleteConfiguration deletes a configuration. -func (api *API) DeleteConfiguration(rw http.ResponseWriter, r *http.Request) { - api.storeMut.Lock() - defer api.storeMut.Unlock() - if api.store == nil { - api.writeError(rw, http.StatusNotFound, fmt.Errorf("no config store running")) - return - } - - configKey, err := getConfigName(r) - if err != nil { - api.writeError(rw, http.StatusBadRequest, err) - return - } - - err = api.store.Delete(r.Context(), configKey) - switch { - case errors.Is(err, ErrNotConnected): - api.writeError(rw, http.StatusNotFound, err) - case errors.As(err, &NotExistError{}): - api.writeError(rw, http.StatusNotFound, err) - case err != nil: - api.writeError(rw, http.StatusInternalServerError, err) - default: - api.totalDeletedConfigs.Inc() - api.writeResponse(rw, http.StatusOK, nil) - } -} - -func (api *API) writeError(rw http.ResponseWriter, statusCode int, writeErr error) { - err := configapi.WriteError(rw, statusCode, writeErr) - if err != nil { - level.Error(api.log).Log("msg", "failed to write response", "err", err) - } -} - -func (api *API) writeResponse(rw http.ResponseWriter, statusCode int, v interface{}) { - err := configapi.WriteResponse(rw, statusCode, v) - if err != nil { - level.Error(api.log).Log("msg", "failed to write response", "err", err) - } -} - -// getConfigName uses gorilla/mux's route variables to extract the -// "name" variable. If not found, getConfigName will return an error. -func getConfigName(r *http.Request) (string, error) { - vars := mux.Vars(r) - name := vars["name"] - name, err := url.PathUnescape(name) - if err != nil { - return "", fmt.Errorf("could not decode config name: %w", err) - } - return name, nil -} - -func messageHandlerFunc(statusCode int, msg string) http.HandlerFunc { - return func(rw http.ResponseWriter, r *http.Request) { - rw.WriteHeader(statusCode) - _, _ = rw.Write([]byte(msg)) - } -} diff --git a/internal/static/metrics/instance/configstore/api_test.go b/internal/static/metrics/instance/configstore/api_test.go deleted file mode 100644 index 71bd558810..0000000000 --- a/internal/static/metrics/instance/configstore/api_test.go +++ /dev/null @@ -1,408 +0,0 @@ -package configstore - -import ( - "bytes" - "context" - "encoding/json" - "fmt" - "io" - "net/http" - "net/http/httptest" - "strings" - "testing" - "time" - - "github.com/go-kit/log" - "github.com/gorilla/mux" - "github.com/grafana/agent/internal/static/client" - "github.com/grafana/agent/internal/static/metrics/cluster/configapi" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestAPI_ListConfigurations(t *testing.T) { - s := &Mock{ - ListFunc: func(ctx context.Context) ([]string, error) { - return []string{"a", "b", "c"}, nil - }, - } - - api := NewAPI(log.NewNopLogger(), s, nil, true) - env := newAPITestEnvironment(t, api) - - resp, err := http.Get(env.srv.URL + "/agent/api/v1/configs") - require.NoError(t, err) - require.Equal(t, http.StatusOK, resp.StatusCode) - - expect := `{ - "status": "success", - "data": { - "configs": ["a", "b", "c"] - } - }` - body, err := io.ReadAll(resp.Body) - require.NoError(t, err) - require.JSONEq(t, expect, string(body)) - - t.Run("With Client", func(t *testing.T) { - cli := client.New(env.srv.URL) - apiResp, err := cli.ListConfigs(context.Background()) - require.NoError(t, err) - - expect := &configapi.ListConfigurationsResponse{Configs: []string{"a", "b", "c"}} - require.Equal(t, expect, apiResp) - }) -} - -func TestAPI_GetConfiguration_Invalid(t *testing.T) { - s := &Mock{ - GetFunc: func(ctx context.Context, key string) (instance.Config, error) { - return instance.Config{}, NotExistError{Key: key} - }, - } - - api := NewAPI(log.NewNopLogger(), s, nil, true) - env := newAPITestEnvironment(t, api) - - resp, err := http.Get(env.srv.URL + "/agent/api/v1/configs/does-not-exist") - require.NoError(t, err) - require.Equal(t, http.StatusNotFound, resp.StatusCode) - - expect := `{ - "status": "error", - "data": { - "error": "configuration does-not-exist does not exist" - } - }` - body, err := io.ReadAll(resp.Body) - require.NoError(t, err) - require.JSONEq(t, expect, string(body)) - - t.Run("With Client", func(t *testing.T) { - cli := client.New(env.srv.URL) - _, err := cli.GetConfiguration(context.Background(), "does-not-exist") - require.NotNil(t, err) - require.Equal(t, "configuration does-not-exist does not exist", err.Error()) - }) -} - -func TestAPI_GetConfiguration(t *testing.T) { - s := &Mock{ - GetFunc: func(ctx context.Context, key string) (instance.Config, error) { - return instance.Config{ - Name: key, - HostFilter: true, - RemoteFlushDeadline: 10 * time.Minute, - }, nil - }, - } - - api := NewAPI(log.NewNopLogger(), s, nil, true) - env := newAPITestEnvironment(t, api) - - resp, err := http.Get(env.srv.URL + "/agent/api/v1/configs/exists") - require.NoError(t, err) - require.Equal(t, http.StatusOK, resp.StatusCode) - - expect := `{ - "status": "success", - "data": { - "value": "name: exists\nhost_filter: true\nremote_flush_deadline: 10m0s\n" - } - }` - body, err := io.ReadAll(resp.Body) - require.NoError(t, err) - require.JSONEq(t, expect, string(body)) - - t.Run("With Client", func(t *testing.T) { - cli := client.New(env.srv.URL) - actual, err := cli.GetConfiguration(context.Background(), "exists") - require.NoError(t, err) - - // The client will apply defaults, so we need to start with the DefaultConfig - // as a base here. - expect := instance.DefaultConfig - expect.Name = "exists" - expect.HostFilter = true - expect.RemoteFlushDeadline = 10 * time.Minute - require.Equal(t, &expect, actual) - }) -} - -func TestAPI_GetConfiguration_ScrubSecrets(t *testing.T) { - rawConfig := `name: exists -scrape_configs: -- job_name: local_scrape - follow_redirects: true - enable_http2: true - honor_timestamps: true - metrics_path: /metrics - scheme: http - track_timestamps_staleness: true - static_configs: - - targets: - - 127.0.0.1:12345 - labels: - cluster: localhost - basic_auth: - username: admin - password: SCRUBME -remote_write: -- url: http://localhost:9009/api/prom/push - remote_timeout: 30s - name: test-d0f32c - send_exemplars: true - basic_auth: - username: admin - password: SCRUBME - queue_config: - capacity: 500 - max_shards: 1000 - min_shards: 1 - max_samples_per_send: 100 - batch_send_deadline: 5s - min_backoff: 30ms - max_backoff: 100ms - retry_on_http_429: true - follow_redirects: true - enable_http2: true - metadata_config: - send: true - send_interval: 1m - max_samples_per_send: 500 -wal_truncate_frequency: 1m0s -min_wal_time: 5m0s -max_wal_time: 4h0m0s -remote_flush_deadline: 1m0s -` - scrubbedConfig := strings.ReplaceAll(rawConfig, "SCRUBME", "") - - s := &Mock{ - GetFunc: func(ctx context.Context, key string) (instance.Config, error) { - c, err := instance.UnmarshalConfig(strings.NewReader(rawConfig)) - if err != nil { - return instance.Config{}, err - } - return *c, nil - }, - } - - api := NewAPI(log.NewNopLogger(), s, nil, true) - env := newAPITestEnvironment(t, api) - - resp, err := http.Get(env.srv.URL + "/agent/api/v1/configs/exists") - require.NoError(t, err) - require.Equal(t, http.StatusOK, resp.StatusCode) - respBytes, err := io.ReadAll(resp.Body) - require.NoError(t, err) - - var apiResp struct { - Status string `json:"status"` - Data struct { - Value string `json:"value"` - } `json:"data"` - } - err = json.Unmarshal(respBytes, &apiResp) - require.NoError(t, err) - require.Equal(t, "success", apiResp.Status) - require.YAMLEq(t, scrubbedConfig, apiResp.Data.Value) - - t.Run("With Client", func(t *testing.T) { - cli := client.New(env.srv.URL) - actual, err := cli.GetConfiguration(context.Background(), "exists") - require.NoError(t, err) - - // Marshal the retrieved config _without_ scrubbing. This means - // that if the secrets weren't scrubbed from GetConfiguration, something - // bad happened at the API level. - actualBytes, err := instance.MarshalConfig(actual, false) - require.NoError(t, err) - require.YAMLEq(t, scrubbedConfig, string(actualBytes)) - }) -} - -func TestServer_GetConfiguration_Disabled(t *testing.T) { - api := NewAPI(log.NewNopLogger(), nil, nil, false) - env := newAPITestEnvironment(t, api) - resp, err := http.Get(env.srv.URL + "/agent/api/v1/configs/exists") - require.NoError(t, err) - require.Equal(t, http.StatusNotFound, resp.StatusCode) - body, err := io.ReadAll(resp.Body) - require.NoError(t, err) - require.Equal(t, []byte("404 - config endpoint is disabled"), body) -} - -func TestServer_PutConfiguration(t *testing.T) { - var s Mock - - api := NewAPI(log.NewNopLogger(), &s, nil, true) - env := newAPITestEnvironment(t, api) - - cfg := instance.Config{Name: "newconfig"} - bb, err := instance.MarshalConfig(&cfg, false) - require.NoError(t, err) - - t.Run("Created", func(t *testing.T) { - // Created configs should return http.StatusCreated - s.PutFunc = func(ctx context.Context, c instance.Config) (created bool, err error) { - return true, nil - } - - resp, err := http.Post(env.srv.URL+"/agent/api/v1/config/newconfig", "", bytes.NewReader(bb)) - require.NoError(t, err) - require.Equal(t, http.StatusCreated, resp.StatusCode) - }) - - t.Run("Updated", func(t *testing.T) { - // Updated configs should return http.StatusOK - s.PutFunc = func(ctx context.Context, c instance.Config) (created bool, err error) { - return false, nil - } - - resp, err := http.Post(env.srv.URL+"/agent/api/v1/config/newconfig", "", bytes.NewReader(bb)) - require.NoError(t, err) - require.Equal(t, http.StatusOK, resp.StatusCode) - }) -} - -func TestServer_PutConfiguration_Invalid(t *testing.T) { - var s Mock - - api := NewAPI(log.NewNopLogger(), &s, func(c *instance.Config) error { - return fmt.Errorf("custom validation error") - }, true) - env := newAPITestEnvironment(t, api) - - cfg := instance.Config{Name: "newconfig"} - bb, err := instance.MarshalConfig(&cfg, false) - require.NoError(t, err) - - resp, err := http.Post(env.srv.URL+"/agent/api/v1/config/newconfig", "", bytes.NewReader(bb)) - require.NoError(t, err) - require.Equal(t, http.StatusBadRequest, resp.StatusCode) - - expect := `{ - "status": "error", - "data": { - "error": "failed to validate config: custom validation error" - } - }` - body, err := io.ReadAll(resp.Body) - require.NoError(t, err) - require.JSONEq(t, expect, string(body)) -} - -func TestServer_PutConfiguration_WithClient(t *testing.T) { - var s Mock - api := NewAPI(log.NewNopLogger(), &s, nil, true) - env := newAPITestEnvironment(t, api) - - cfg := instance.DefaultConfig - cfg.Name = "newconfig-withclient" - cfg.HostFilter = true - cfg.RemoteFlushDeadline = 10 * time.Minute - - s.PutFunc = func(ctx context.Context, c instance.Config) (created bool, err error) { - assert.Equal(t, cfg, c) - return true, nil - } - - cli := client.New(env.srv.URL) - err := cli.PutConfiguration(context.Background(), "newconfig-withclient", &cfg) - require.NoError(t, err) -} - -func TestServer_DeleteConfiguration(t *testing.T) { - s := &Mock{ - DeleteFunc: func(ctx context.Context, key string) error { - assert.Equal(t, "deleteme", key) - return nil - }, - } - - api := NewAPI(log.NewNopLogger(), s, nil, true) - env := newAPITestEnvironment(t, api) - - req, err := http.NewRequest(http.MethodDelete, env.srv.URL+"/agent/api/v1/config/deleteme", nil) - require.NoError(t, err) - resp, err := http.DefaultClient.Do(req) - require.NoError(t, err) - require.Equal(t, http.StatusOK, resp.StatusCode) - - t.Run("With Client", func(t *testing.T) { - cli := client.New(env.srv.URL) - err := cli.DeleteConfiguration(context.Background(), "deleteme") - require.NoError(t, err) - }) -} - -func TestServer_DeleteConfiguration_Invalid(t *testing.T) { - s := &Mock{ - DeleteFunc: func(ctx context.Context, key string) error { - assert.Equal(t, "deleteme", key) - return NotExistError{Key: key} - }, - } - - api := NewAPI(log.NewNopLogger(), s, nil, true) - env := newAPITestEnvironment(t, api) - - req, err := http.NewRequest(http.MethodDelete, env.srv.URL+"/agent/api/v1/config/deleteme", nil) - require.NoError(t, err) - resp, err := http.DefaultClient.Do(req) - require.NoError(t, err) - require.Equal(t, http.StatusNotFound, resp.StatusCode) - - t.Run("With Client", func(t *testing.T) { - cli := client.New(env.srv.URL) - err := cli.DeleteConfiguration(context.Background(), "deleteme") - require.Error(t, err) - }) -} - -func TestServer_URLEncoded(t *testing.T) { - var s Mock - - api := NewAPI(log.NewNopLogger(), &s, nil, true) - env := newAPITestEnvironment(t, api) - - var cfg instance.Config - bb, err := instance.MarshalConfig(&cfg, false) - require.NoError(t, err) - - s.PutFunc = func(ctx context.Context, c instance.Config) (created bool, err error) { - assert.Equal(t, "url/encoded", c.Name) - return true, nil - } - - resp, err := http.Post(env.srv.URL+"/agent/api/v1/config/url%2Fencoded", "", bytes.NewReader(bb)) - require.NoError(t, err) - require.Equal(t, http.StatusCreated, resp.StatusCode) - - s.GetFunc = func(ctx context.Context, key string) (instance.Config, error) { - assert.Equal(t, "url/encoded", key) - return instance.Config{Name: "url/encoded"}, nil - } - - resp, err = http.Get(env.srv.URL + "/agent/api/v1/configs/url%2Fencoded") - require.NoError(t, err) - require.Equal(t, http.StatusOK, resp.StatusCode) -} - -type apiTestEnvironment struct { - srv *httptest.Server - router *mux.Router -} - -func newAPITestEnvironment(t *testing.T, api *API) apiTestEnvironment { - t.Helper() - - router := mux.NewRouter() - srv := httptest.NewServer(router) - t.Cleanup(srv.Close) - - api.WireAPI(router) - - return apiTestEnvironment{srv: srv, router: router} -} diff --git a/internal/static/metrics/instance/configstore/codec.go b/internal/static/metrics/instance/configstore/codec.go deleted file mode 100644 index 38a837c5be..0000000000 --- a/internal/static/metrics/instance/configstore/codec.go +++ /dev/null @@ -1,65 +0,0 @@ -package configstore - -import ( - "bytes" - "compress/gzip" - "fmt" - "io" - "strings" - - "github.com/grafana/dskit/kv/codec" -) - -// GetCodec returns the codec for encoding and decoding instance.Configs -// in the Remote store. -func GetCodec() codec.Codec { - return &yamlCodec{} -} - -type yamlCodec struct{} - -func (*yamlCodec) Decode(bb []byte) (interface{}, error) { - // Decode is called by kv.Clients with an empty slice when a - // key is deleted. We should stop early here and don't return - // an error so the deletion event propagates to watchers. - if len(bb) == 0 { - return nil, nil - } - - r, err := gzip.NewReader(bytes.NewReader(bb)) - if err != nil { - return nil, err - } - - var sb strings.Builder - if _, err := io.Copy(&sb, r); err != nil { - return nil, err - } - return sb.String(), nil -} - -func (*yamlCodec) Encode(v interface{}) ([]byte, error) { - var buf bytes.Buffer - - var cfg string - - switch v := v.(type) { - case string: - cfg = v - default: - panic(fmt.Sprintf("unexpected type %T passed to yamlCodec.Encode", v)) - } - - w := gzip.NewWriter(&buf) - - if _, err := io.Copy(w, strings.NewReader(cfg)); err != nil { - return nil, err - } - - w.Close() - return buf.Bytes(), nil -} - -func (*yamlCodec) CodecID() string { - return "agentConfig/yaml" -} diff --git a/internal/static/metrics/instance/configstore/codec_test.go b/internal/static/metrics/instance/configstore/codec_test.go deleted file mode 100644 index ab717c0bb6..0000000000 --- a/internal/static/metrics/instance/configstore/codec_test.go +++ /dev/null @@ -1,41 +0,0 @@ -package configstore - -import ( - "testing" - - "github.com/stretchr/testify/require" -) - -func TestCodec(t *testing.T) { - exampleConfig := `name: 'test' -host_filter: false -scrape_configs: - - job_name: process-1 - static_configs: - - targets: ['process-1:80'] - labels: - cluster: 'local' - origin: 'agent'` - - c := &yamlCodec{} - bb, err := c.Encode(exampleConfig) - require.NoError(t, err) - - out, err := c.Decode(bb) - require.NoError(t, err) - require.Equal(t, exampleConfig, out) -} - -// TestCodec_Decode_Nil makes sure that if Decode is called with an empty value, -// which may happen when a key is deleted, that no error occurs and instead a -// nil value is returned. -func TestCodec_Decode_Nil(t *testing.T) { - c := &yamlCodec{} - - input := [][]byte{nil, make([]byte, 0)} - for _, bb := range input { - out, err := c.Decode(bb) - require.Nil(t, err) - require.Nil(t, out) - } -} diff --git a/internal/static/metrics/instance/configstore/errors.go b/internal/static/metrics/instance/configstore/errors.go deleted file mode 100644 index 8d668a5ee3..0000000000 --- a/internal/static/metrics/instance/configstore/errors.go +++ /dev/null @@ -1,27 +0,0 @@ -package configstore - -import "fmt" - -// ErrNotConnected is used when a store operation was called but no connection -// to the store was active. -var ErrNotConnected = fmt.Errorf("not connected to store") - -// NotExistError is used when a config doesn't exist. -type NotExistError struct { - Key string -} - -// Error implements error. -func (e NotExistError) Error() string { - return fmt.Sprintf("configuration %s does not exist", e.Key) -} - -// NotUniqueError is used when two scrape jobs have the same name. -type NotUniqueError struct { - ScrapeJob string -} - -// Error implements error. -func (e NotUniqueError) Error() string { - return fmt.Sprintf("found multiple scrape configs in config store with job name %q", e.ScrapeJob) -} diff --git a/internal/static/metrics/instance/configstore/mock.go b/internal/static/metrics/instance/configstore/mock.go deleted file mode 100644 index 5ff303669c..0000000000 --- a/internal/static/metrics/instance/configstore/mock.go +++ /dev/null @@ -1,74 +0,0 @@ -package configstore - -import ( - "context" - - "github.com/grafana/agent/internal/static/metrics/instance" -) - -// Mock is a Mock Store. Useful primarily for testing. -type Mock struct { - ListFunc func(ctx context.Context) ([]string, error) - GetFunc func(ctx context.Context, key string) (instance.Config, error) - PutFunc func(ctx context.Context, c instance.Config) (created bool, err error) - DeleteFunc func(ctx context.Context, key string) error - AllFunc func(ctx context.Context, keep func(key string) bool) (<-chan instance.Config, error) - WatchFunc func() <-chan WatchEvent - CloseFunc func() error -} - -// List implements Store. -func (s *Mock) List(ctx context.Context) ([]string, error) { - if s.ListFunc != nil { - return s.ListFunc(ctx) - } - panic("List not implemented") -} - -// Get implements Store. -func (s *Mock) Get(ctx context.Context, key string) (instance.Config, error) { - if s.GetFunc != nil { - return s.GetFunc(ctx, key) - } - panic("Get not implemented") -} - -// Put implements Store. -func (s *Mock) Put(ctx context.Context, c instance.Config) (created bool, err error) { - if s.PutFunc != nil { - return s.PutFunc(ctx, c) - } - panic("Put not implemented") -} - -// Delete implements Store. -func (s *Mock) Delete(ctx context.Context, key string) error { - if s.DeleteFunc != nil { - return s.DeleteFunc(ctx, key) - } - panic("Delete not implemented") -} - -// All implements Store. -func (s *Mock) All(ctx context.Context, keep func(key string) bool) (<-chan instance.Config, error) { - if s.AllFunc != nil { - return s.AllFunc(ctx, keep) - } - panic("All not implemented") -} - -// Watch implements Store. -func (s *Mock) Watch() <-chan WatchEvent { - if s.WatchFunc != nil { - return s.WatchFunc() - } - panic("Watch not implemented") -} - -// Close implements Store. -func (s *Mock) Close() error { - if s.CloseFunc != nil { - return s.CloseFunc() - } - panic("Close not implemented") -} diff --git a/internal/static/metrics/instance/configstore/remote.go b/internal/static/metrics/instance/configstore/remote.go deleted file mode 100644 index a97df01e25..0000000000 --- a/internal/static/metrics/instance/configstore/remote.go +++ /dev/null @@ -1,471 +0,0 @@ -package configstore - -import ( - "context" - "errors" - "fmt" - "net/http" - "strings" - "sync" - - "github.com/grafana/dskit/instrument" - - "github.com/hashicorp/go-cleanhttp" - - "github.com/hashicorp/consul/api" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/util" - "github.com/grafana/dskit/kv" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" -) - -/*********************************************************************************************************************** -The consul code skipping the cortex handler is due to performance issue with a large number of configs and overloading -consul. See issue https://github.com/grafana/agent/issues/789. The long term method will be to refactor and extract -the cortex code so other stores can also benefit from this. @mattdurham -***********************************************************************************************************************/ - -var consulRequestDuration = instrument.NewHistogramCollector(promauto.NewHistogramVec(prometheus.HistogramOpts{ - Name: "agent_configstore_consul_request_duration_seconds", - Help: "Time spent on consul requests when listing configs.", - Buckets: prometheus.DefBuckets, -}, []string{"operation", "status_code"})) - -// Remote loads instance files from a remote KV store. The KV store -// can be swapped out in real time. -type Remote struct { - log log.Logger - reg *util.Unregisterer - - kvMut sync.RWMutex - kv *agentRemoteClient - reloadKV chan struct{} - - cancelCtx context.Context - cancelFunc context.CancelFunc - - configsMut sync.Mutex - configsCh chan WatchEvent -} - -// agentRemoteClient is a simple wrapper to allow the shortcircuit of consul, while being backwards compatible with non -// consul kv stores -type agentRemoteClient struct { - kv.Client - consul *api.Client - config kv.Config -} - -// NewRemote creates a new Remote store that uses a Key-Value client to store -// and retrieve configs. If enable is true, the store will be immediately -// connected to. Otherwise, it can be lazily loaded by enabling later through -// a call to Remote.ApplyConfig. -func NewRemote(l log.Logger, reg prometheus.Registerer, cfg kv.Config, enable bool) (*Remote, error) { - cancelCtx, cancelFunc := context.WithCancel(context.Background()) - - r := &Remote{ - log: l, - reg: util.WrapWithUnregisterer(reg), - - reloadKV: make(chan struct{}, 1), - - cancelCtx: cancelCtx, - cancelFunc: cancelFunc, - - configsCh: make(chan WatchEvent), - } - if err := r.ApplyConfig(cfg, enable); err != nil { - return nil, fmt.Errorf("failed to apply config for config store: %w", err) - } - - go r.run() - return r, nil -} - -// ApplyConfig applies the config for a kv client. -func (r *Remote) ApplyConfig(cfg kv.Config, enable bool) error { - r.kvMut.Lock() - defer r.kvMut.Unlock() - - if r.cancelCtx.Err() != nil { - return fmt.Errorf("remote store already stopped") - } - - // Unregister all metrics that the previous kv may have registered. - r.reg.UnregisterAll() - - if !enable { - r.setClient(nil, nil, kv.Config{}) - return nil - } - - cli, err := kv.NewClient(cfg, GetCodec(), kv.RegistererWithKVName(r.reg, "agent_configs"), r.log) - // This is a hack to get a consul client, the client above has it embedded but it's not exposed - var consulClient *api.Client - if cfg.Store == "consul" { - consulClient, err = api.NewClient(&api.Config{ - Address: cfg.Consul.Host, - Token: cfg.Consul.ACLToken.String(), - Scheme: "http", - HttpClient: &http.Client{ - Transport: cleanhttp.DefaultPooledTransport(), - // See https://blog.cloudflare.com/the-complete-guide-to-golang-net-http-timeouts/ - Timeout: cfg.Consul.HTTPClientTimeout, - }, - }) - if err != nil { - return err - } - } - - if err != nil { - return fmt.Errorf("failed to create kv client: %w", err) - } - - r.setClient(cli, consulClient, cfg) - return nil -} - -// setClient sets the active client and notifies run to restart the -// kv watcher. -func (r *Remote) setClient(client kv.Client, consulClient *api.Client, config kv.Config) { - if client == nil && consulClient == nil { - r.kv = nil - } else { - r.kv = &agentRemoteClient{ - Client: client, - consul: consulClient, - config: config, - } - } - r.reloadKV <- struct{}{} -} - -func (r *Remote) run() { - var ( - kvContext context.Context - kvCancel context.CancelFunc - ) - -Outer: - for { - select { - case <-r.cancelCtx.Done(): - break Outer - case <-r.reloadKV: - r.kvMut.RLock() - kv := r.kv - r.kvMut.RUnlock() - - if kvCancel != nil { - kvCancel() - } - kvContext, kvCancel = context.WithCancel(r.cancelCtx) - go r.watchKV(kvContext, kv) - } - } - - if kvCancel != nil { - kvCancel() - } -} - -func (r *Remote) watchKV(ctx context.Context, client *agentRemoteClient) { - // Edge case: client was unset, nothing to do here. - if client == nil { - level.Info(r.log).Log("msg", "not watching the KV, none set") - return - } - - client.WatchPrefix(ctx, "", func(key string, v interface{}) bool { - if ctx.Err() != nil { - return false - } - - r.configsMut.Lock() - defer r.configsMut.Unlock() - - switch { - case v == nil: - r.configsCh <- WatchEvent{Key: key, Config: nil} - default: - cfg, err := instance.UnmarshalConfig(strings.NewReader(v.(string))) - if err != nil { - level.Error(r.log).Log("msg", "could not unmarshal config from store", "name", key, "err", err) - break - } - - r.configsCh <- WatchEvent{Key: key, Config: cfg} - } - - return true - }) -} - -// List returns the list of all configs in the KV store. -func (r *Remote) List(ctx context.Context) ([]string, error) { - r.kvMut.RLock() - defer r.kvMut.RUnlock() - if r.kv == nil { - return nil, ErrNotConnected - } - - return r.kv.List(ctx, "") -} - -// listConsul returns Key Value Pairs instead of []string -func (r *Remote) listConsul(ctx context.Context) (api.KVPairs, error) { - if r.kv == nil { - return nil, ErrNotConnected - } - - var pairs api.KVPairs - options := &api.QueryOptions{ - AllowStale: !r.kv.config.Consul.ConsistentReads, - RequireConsistent: r.kv.config.Consul.ConsistentReads, - } - // This is copied from cortex list so that stats stay the same - err := instrument.CollectedRequest(ctx, "List", consulRequestDuration, instrument.ErrorCode, func(ctx context.Context) error { - var err error - pairs, _, err = r.kv.consul.KV().List(r.kv.config.Prefix, options.WithContext(ctx)) - return err - }) - - if err != nil { - return nil, err - } - // This mirrors the previous behavior of returning a blank array as opposed to nil. - if pairs == nil { - blankPairs := make(api.KVPairs, 0) - return blankPairs, nil - } - for _, kvp := range pairs { - kvp.Key = strings.TrimPrefix(kvp.Key, r.kv.config.Prefix) - } - return pairs, nil -} - -// Get retrieves an individual config from the KV store. -func (r *Remote) Get(ctx context.Context, key string) (instance.Config, error) { - r.kvMut.RLock() - defer r.kvMut.RUnlock() - if r.kv == nil { - return instance.Config{}, ErrNotConnected - } - - v, err := r.kv.Get(ctx, key) - if err != nil { - return instance.Config{}, fmt.Errorf("failed to get config %s: %w", key, err) - } else if v == nil { - return instance.Config{}, NotExistError{Key: key} - } - - cfg, err := instance.UnmarshalConfig(strings.NewReader(v.(string))) - if err != nil { - return instance.Config{}, fmt.Errorf("failed to unmarshal config %s: %w", key, err) - } - return *cfg, nil -} - -// Put adds or updates a config in the KV store. -func (r *Remote) Put(ctx context.Context, c instance.Config) (bool, error) { - // We need to use a write lock here since two Applies can't run concurrently - // (given the current need to perform a store-wide validation.) - r.kvMut.Lock() - defer r.kvMut.Unlock() - if r.kv == nil { - return false, ErrNotConnected - } - - bb, err := instance.MarshalConfig(&c, false) - if err != nil { - return false, fmt.Errorf("failed to marshal config: %w", err) - } - - cfgCh, err := r.all(ctx, nil) - if err != nil { - return false, fmt.Errorf("failed to check validity of config: %w", err) - } - if err := checkUnique(cfgCh, &c); err != nil { - return false, fmt.Errorf("failed to check uniqueness of config: %w", err) - } - - var created bool - err = r.kv.CAS(ctx, c.Name, func(in interface{}) (out interface{}, retry bool, err error) { - // The configuration is new if there's no previous value from the CAS - created = (in == nil) - return string(bb), false, nil - }) - if err != nil { - return false, fmt.Errorf("failed to put config: %w", err) - } - return created, nil -} - -// Delete deletes a config from the KV store. It returns NotExistError if -// the config doesn't exist. -func (r *Remote) Delete(ctx context.Context, key string) error { - r.kvMut.RLock() - defer r.kvMut.RUnlock() - if r.kv == nil { - return ErrNotConnected - } - - // Some KV stores don't return an error if something failed to be - // deleted, so we'll try to get it first. This isn't perfect, and - // it may fail, so we'll silently ignore any errors here unless - // we know for sure the config doesn't exist. - v, err := r.kv.Get(ctx, key) - if err != nil { - level.Warn(r.log).Log("msg", "error validating key existence for deletion", "err", err) - } else if v == nil { - return NotExistError{Key: key} - } - - err = r.kv.Delete(ctx, key) - if err != nil { - return fmt.Errorf("error deleting configuration: %w", err) - } - - return nil -} - -// All retrieves the set of all configs in the store. -func (r *Remote) All(ctx context.Context, keep func(key string) bool) (<-chan instance.Config, error) { - r.kvMut.RLock() - defer r.kvMut.RUnlock() - return r.all(ctx, keep) -} - -// all can only be called if the kvMut lock is already held. -func (r *Remote) all(ctx context.Context, keep func(key string) bool) (<-chan instance.Config, error) { - if r.kv == nil { - return nil, ErrNotConnected - } - - // If we are using a consul client then do the short circuit way, this is done so that we receive all the key value pairs - // in one call then, operate on them in memory. Previously we retrieved the list (which stripped the values) - // then ran a goroutine to get each individual value from consul. In situations with an extremely large number of - // configs this overloaded the consul instances. This reduces that to one call, that was being made anyways. - if r.kv.consul != nil { - return r.allConsul(ctx, keep) - } - - return r.allOther(ctx, keep) -} - -// allConsul is ONLY usable when consul is the keystore. This is a performance improvement in using the client directly -// -// instead of the cortex multi store kv interface. That interface returns the list then each value must be retrieved -// individually. This returns all the keys and values in one call and works on them in memory -func (r *Remote) allConsul(ctx context.Context, keep func(key string) bool) (<-chan instance.Config, error) { - if r.kv.consul == nil { - level.Error(r.log).Log("err", "allConsul called but consul client nil") - return nil, errors.New("allConsul called but consul client nil") - } - var configs []*instance.Config - c := GetCodec() - - pairs, err := r.listConsul(ctx) - - if err != nil { - return nil, err - } - for _, kvp := range pairs { - if keep != nil && !keep(kvp.Key) { - level.Debug(r.log).Log("msg", "skipping key that was filtered out", "key", kvp.Key) - continue - } - value, err := c.Decode(kvp.Value) - if err != nil { - level.Error(r.log).Log("msg", "failed to decode config from store", "key", kvp.Key, "err", err) - continue - } - if value == nil { - // Config was deleted since we called list, skip it. - level.Debug(r.log).Log("msg", "skipping key that was deleted after list was called", "key", kvp.Key) - continue - } - - cfg, err := instance.UnmarshalConfig(strings.NewReader(value.(string))) - if err != nil { - level.Error(r.log).Log("msg", "failed to unmarshal config from store", "key", kvp.Key, "err", err) - continue - } - configs = append(configs, cfg) - } - ch := make(chan instance.Config, len(configs)) - for _, cfg := range configs { - ch <- *cfg - } - close(ch) - return ch, nil -} - -func (r *Remote) allOther(ctx context.Context, keep func(key string) bool) (<-chan instance.Config, error) { - if r.kv == nil { - return nil, ErrNotConnected - } - - keys, err := r.kv.List(ctx, "") - if err != nil { - return nil, fmt.Errorf("failed to list configs: %w", err) - } - - ch := make(chan instance.Config) - - var wg sync.WaitGroup - wg.Add(len(keys)) - go func() { - wg.Wait() - close(ch) - }() - - for _, key := range keys { - go func(key string) { - defer wg.Done() - - if keep != nil && !keep(key) { - level.Debug(r.log).Log("msg", "skipping key that was filtered out", "key", key) - return - } - - // TODO(rfratto): retries might be useful here - v, err := r.kv.Get(ctx, key) - if err != nil { - level.Error(r.log).Log("msg", "failed to get config with key", "key", key, "err", err) - return - } else if v == nil { - // Config was deleted since we called list, skip it. - level.Debug(r.log).Log("msg", "skipping key that was deleted after list was called", "key", key) - return - } - - cfg, err := instance.UnmarshalConfig(strings.NewReader(v.(string))) - if err != nil { - level.Error(r.log).Log("msg", "failed to unmarshal config from store", "key", key, "err", err) - return - } - ch <- *cfg - }(key) - } - - return ch, nil -} - -// Watch watches the Store for changes. -func (r *Remote) Watch() <-chan WatchEvent { - return r.configsCh -} - -// Close closes the Remote store. -func (r *Remote) Close() error { - r.kvMut.Lock() - defer r.kvMut.Unlock() - r.cancelFunc() - return nil -} diff --git a/internal/static/metrics/instance/configstore/remote_test.go b/internal/static/metrics/instance/configstore/remote_test.go deleted file mode 100644 index 682438dd27..0000000000 --- a/internal/static/metrics/instance/configstore/remote_test.go +++ /dev/null @@ -1,271 +0,0 @@ -package configstore - -import ( - "context" - "fmt" - "sort" - "strings" - "testing" - "time" - - "github.com/go-kit/log" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/util" - "github.com/grafana/dskit/kv" - "github.com/prometheus/client_golang/prometheus" - "github.com/stretchr/testify/require" -) - -func TestRemote_List(t *testing.T) { - remote, err := NewRemote(log.NewNopLogger(), prometheus.NewRegistry(), kv.Config{ - Store: "inmemory", - Prefix: "configs/", - }, true) - require.NoError(t, err) - t.Cleanup(func() { - err := remote.Close() - require.NoError(t, err) - }) - - cfgs := []string{"a", "b", "c"} - for _, cfg := range cfgs { - err := remote.kv.CAS(context.Background(), cfg, func(in interface{}) (out interface{}, retry bool, err error) { - return fmt.Sprintf("name: %s", cfg), false, nil - }) - require.NoError(t, err) - } - - list, err := remote.List(context.Background()) - require.NoError(t, err) - sort.Strings(list) - require.Equal(t, cfgs, list) -} - -func TestRemote_Get(t *testing.T) { - remote, err := NewRemote(log.NewNopLogger(), prometheus.NewRegistry(), kv.Config{ - Store: "inmemory", - Prefix: "configs/", - }, true) - require.NoError(t, err) - t.Cleanup(func() { - err := remote.Close() - require.NoError(t, err) - }) - - err = remote.kv.CAS(context.Background(), "someconfig", func(in interface{}) (out interface{}, retry bool, err error) { - return "name: someconfig", false, nil - }) - require.NoError(t, err) - - cfg, err := remote.Get(context.Background(), "someconfig") - require.NoError(t, err) - - expect := instance.DefaultConfig - expect.Name = "someconfig" - require.Equal(t, expect, cfg) -} - -func TestRemote_Put(t *testing.T) { - remote, err := NewRemote(log.NewNopLogger(), prometheus.NewRegistry(), kv.Config{ - Store: "inmemory", - Prefix: "configs/", - }, true) - require.NoError(t, err) - t.Cleanup(func() { - err := remote.Close() - require.NoError(t, err) - }) - - cfg := instance.DefaultConfig - cfg.Name = "newconfig" - - created, err := remote.Put(context.Background(), cfg) - require.NoError(t, err) - require.True(t, created) - - actual, err := remote.Get(context.Background(), "newconfig") - require.NoError(t, err) - require.Equal(t, cfg, actual) - - t.Run("Updating", func(t *testing.T) { - cfg := instance.DefaultConfig - cfg.Name = "newconfig" - cfg.HostFilter = true - - created, err := remote.Put(context.Background(), cfg) - require.NoError(t, err) - require.False(t, created) - }) -} - -func TestRemote_Put_NonUnique(t *testing.T) { - var ( - conflictingA = util.Untab(` -name: conflicting-a -scrape_configs: -- job_name: foobar - `) - conflictingB = util.Untab(` -name: conflicting-b -scrape_configs: -- job_name: fizzbuzz -- job_name: foobar - `) - ) - - conflictingACfg, err := instance.UnmarshalConfig(strings.NewReader(conflictingA)) - require.NoError(t, err) - - conflictingBCfg, err := instance.UnmarshalConfig(strings.NewReader(conflictingB)) - require.NoError(t, err) - - remote, err := NewRemote(log.NewNopLogger(), prometheus.NewRegistry(), kv.Config{ - Store: "inmemory", - Prefix: "configs/", - }, true) - require.NoError(t, err) - t.Cleanup(func() { - err := remote.Close() - require.NoError(t, err) - }) - - created, err := remote.Put(context.Background(), *conflictingACfg) - require.NoError(t, err) - require.True(t, created) - - _, err = remote.Put(context.Background(), *conflictingBCfg) - require.EqualError(t, err, fmt.Sprintf("failed to check uniqueness of config: found multiple scrape configs in config store with job name %q", "foobar")) -} - -func TestRemote_Delete(t *testing.T) { - remote, err := NewRemote(log.NewNopLogger(), prometheus.NewRegistry(), kv.Config{ - Store: "inmemory", - Prefix: "configs/", - }, true) - require.NoError(t, err) - t.Cleanup(func() { - err := remote.Close() - require.NoError(t, err) - }) - - var cfg instance.Config - cfg.Name = "deleteme" - - created, err := remote.Put(context.Background(), cfg) - require.NoError(t, err) - require.True(t, created) - - err = remote.Delete(context.Background(), "deleteme") - require.NoError(t, err) - - _, err = remote.Get(context.Background(), "deleteme") - require.EqualError(t, err, "configuration deleteme does not exist") - - err = remote.Delete(context.Background(), "deleteme") - require.EqualError(t, err, "configuration deleteme does not exist") -} - -func TestRemote_All(t *testing.T) { - remote, err := NewRemote(log.NewNopLogger(), prometheus.NewRegistry(), kv.Config{ - Store: "inmemory", - Prefix: "all-configs/", - }, true) - require.NoError(t, err) - t.Cleanup(func() { - err := remote.Close() - require.NoError(t, err) - }) - - cfgs := []string{"a", "b", "c"} - for _, cfg := range cfgs { - err := remote.kv.CAS(context.Background(), cfg, func(in interface{}) (out interface{}, retry bool, err error) { - return fmt.Sprintf("name: %s", cfg), false, nil - }) - require.NoError(t, err) - } - - configCh, err := remote.All(context.Background(), nil) - require.NoError(t, err) - - var gotConfigs []string - for gotConfig := range configCh { - gotConfigs = append(gotConfigs, gotConfig.Name) - } - sort.Strings(gotConfigs) - - require.Equal(t, cfgs, gotConfigs) -} - -func TestRemote_Watch(t *testing.T) { - remote, err := NewRemote(log.NewNopLogger(), prometheus.NewRegistry(), kv.Config{ - Store: "inmemory", - Prefix: "watch-configs/", - }, true) - require.NoError(t, err) - t.Cleanup(func() { - err := remote.Close() - require.NoError(t, err) - }) - - _, err = remote.Put(context.Background(), instance.Config{Name: "watch"}) - require.NoError(t, err) - - select { - case cfg := <-remote.Watch(): - require.Equal(t, "watch", cfg.Key) - require.NotNil(t, cfg.Config) - require.Equal(t, "watch", cfg.Config.Name) - case <-time.After(3 * time.Second): - require.FailNow(t, "failed to watch for config") - } - - // Make sure Watch gets other updates. - _, err = remote.Put(context.Background(), instance.Config{Name: "watch2"}) - require.NoError(t, err) - - select { - case cfg := <-remote.Watch(): - require.Equal(t, "watch2", cfg.Key) - require.NotNil(t, cfg.Config) - require.Equal(t, "watch2", cfg.Config.Name) - case <-time.After(3 * time.Second): - require.FailNow(t, "failed to watch for config") - } -} - -func TestRemote_ApplyConfig(t *testing.T) { - remote, err := NewRemote(log.NewNopLogger(), prometheus.NewRegistry(), kv.Config{ - Store: "inmemory", - Prefix: "test-applyconfig/", - }, true) - require.NoError(t, err) - t.Cleanup(func() { - err := remote.Close() - require.NoError(t, err) - }) - - err = remote.ApplyConfig(kv.Config{ - Store: "inmemory", - Prefix: "test-applyconfig2/", - }, true) - require.NoError(t, err, "failed to apply a new config") - - err = remote.ApplyConfig(kv.Config{ - Store: "inmemory", - Prefix: "test-applyconfig2/", - }, true) - require.NoError(t, err, "failed to re-apply the current config") - - // Make sure watch still works - _, err = remote.Put(context.Background(), instance.Config{Name: "watch"}) - require.NoError(t, err) - - select { - case cfg := <-remote.Watch(): - require.Equal(t, "watch", cfg.Key) - require.NotNil(t, cfg.Config) - require.Equal(t, "watch", cfg.Config.Name) - case <-time.After(3 * time.Second): - require.FailNow(t, "failed to watch for config") - } -} diff --git a/internal/static/metrics/instance/configstore/store.go b/internal/static/metrics/instance/configstore/store.go deleted file mode 100644 index 799bafc882..0000000000 --- a/internal/static/metrics/instance/configstore/store.go +++ /dev/null @@ -1,49 +0,0 @@ -// Package configstore abstracts the concepts of where instance files get -// retrieved. -package configstore - -import ( - "context" - - "github.com/grafana/agent/internal/static/metrics/instance" -) - -// Store is some interface to retrieving instance configurations. -type Store interface { - // List gets the list of config names. - List(ctx context.Context) ([]string, error) - - // Get gets an individual config by name. - Get(ctx context.Context, key string) (instance.Config, error) - - // Put applies a new instance Config to the store. - // If the config already exists, created will be false to indicate an - // update. - Put(ctx context.Context, c instance.Config) (created bool, err error) - - // Delete deletes a config from the store. - Delete(ctx context.Context, key string) error - - // All retrieves the entire list of instance configs currently - // in the store. A filtering "keep" function can be provided to ignore some - // configs, which can significantly speed up the operation in some cases. - All(ctx context.Context, keep func(key string) bool) (<-chan instance.Config, error) - - // Watch watches for changed instance Configs. - // All callers of Watch receive the same Channel. - // - // It is not guaranteed that Watch will emit all store events, and Watch - // should only be used for best-effort quick convergence with the remote - // store. Watch should always be paired with polling All. - Watch() <-chan WatchEvent - - // Close closes the store. - Close() error -} - -// WatchEvent is returned by Watch. The Key is the name of the config that was -// added, updated, or deleted. If the Config was deleted, Config will be nil. -type WatchEvent struct { - Key string - Config *instance.Config -} diff --git a/internal/static/metrics/instance/configstore/unique.go b/internal/static/metrics/instance/configstore/unique.go deleted file mode 100644 index 203b77ba45..0000000000 --- a/internal/static/metrics/instance/configstore/unique.go +++ /dev/null @@ -1,35 +0,0 @@ -package configstore - -import ( - "github.com/grafana/agent/internal/static/metrics/instance" -) - -// checkUnique validates that cfg is unique from all, ensuring that no two -// configs share a job_name. -func checkUnique(all <-chan instance.Config, cfg *instance.Config) error { - defer func() { - // Drain the channel, which is necessary if we're returning an error. - for range all { - } - }() - - newJobNames := make(map[string]struct{}, len(cfg.ScrapeConfigs)) - for _, sc := range cfg.ScrapeConfigs { - newJobNames[sc.JobName] = struct{}{} - } - - for otherConfig := range all { - // If the other config is the one we're validating, skip it. - if otherConfig.Name == cfg.Name { - continue - } - - for _, otherScrape := range otherConfig.ScrapeConfigs { - if _, exist := newJobNames[otherScrape.JobName]; exist { - return NotUniqueError{ScrapeJob: otherScrape.JobName} - } - } - } - - return nil -} diff --git a/internal/static/metrics/instance/errors.go b/internal/static/metrics/instance/errors.go deleted file mode 100644 index e025abf5bb..0000000000 --- a/internal/static/metrics/instance/errors.go +++ /dev/null @@ -1,44 +0,0 @@ -package instance - -import "fmt" - -// ErrInvalidUpdate is returned whenever Update is called against an instance -// but an invalid field is changed between configs. If ErrInvalidUpdate is -// returned, the instance must be fully stopped and replaced with a new one -// with the new config. -type ErrInvalidUpdate struct { - Inner error -} - -// Error implements the error interface. -func (e ErrInvalidUpdate) Error() string { return e.Inner.Error() } - -// Is returns true if err is an ErrInvalidUpdate. -func (e ErrInvalidUpdate) Is(err error) bool { - switch err.(type) { - case ErrInvalidUpdate, *ErrInvalidUpdate: - return true - default: - return false - } -} - -// As will set the err object to ErrInvalidUpdate provided err -// is a pointer to ErrInvalidUpdate. -func (e ErrInvalidUpdate) As(err interface{}) bool { - switch v := err.(type) { - case *ErrInvalidUpdate: - *v = e - default: - return false - } - return true -} - -// errImmutableField is the error describing a field that cannot be changed. It -// is wrapped inside of a ErrInvalidUpdate. -type errImmutableField struct{ Field string } - -func (e errImmutableField) Error() string { - return fmt.Sprintf("%s cannot be changed dynamically", e.Field) -} diff --git a/internal/static/metrics/instance/group_manager.go b/internal/static/metrics/instance/group_manager.go deleted file mode 100644 index 072675d92f..0000000000 --- a/internal/static/metrics/instance/group_manager.go +++ /dev/null @@ -1,358 +0,0 @@ -package instance - -import ( - "crypto/md5" - "encoding/hex" - "fmt" - "sort" - "sync" - - "github.com/prometheus/prometheus/config" -) - -// A GroupManager wraps around another Manager and groups all incoming Configs -// into a smaller set of configs, causing less managed instances to be spawned. -// -// Configs are grouped by all settings for a Config *except* scrape configs. -// Any difference found in any flag will cause a Config to be placed in another -// group. One exception to this rule is that remote_writes are compared -// unordered, but the sets of remote_writes should otherwise be identical. -// -// GroupManagers drastically improve the performance of the Agent when a -// significant number of instances are spawned, as the overhead of each -// instance having its own service discovery, WAL, and remote_write can be -// significant. -// -// The config names of instances within the group will be represented by -// that group's hash of settings. -type GroupManager struct { - inner Manager - - mtx sync.Mutex - - // groups is a map of group name to the grouped configs. - groups map[string]groupedConfigs - - // groupLookup is a map of config name to group name. - groupLookup map[string]string -} - -// groupedConfigs holds a set of grouped configs, keyed by the config name. -// They are stored in a map rather than a slice to make overriding an existing -// config within the group less error-prone. -type groupedConfigs map[string]Config - -// Copy returns a shallow copy of the groupedConfigs. -func (g groupedConfigs) Copy() groupedConfigs { - res := make(groupedConfigs, len(g)) - for k, v := range g { - res[k] = v - } - return res -} - -// NewGroupManager creates a new GroupManager for combining instances of the -// same "group." -func NewGroupManager(inner Manager) *GroupManager { - return &GroupManager{ - inner: inner, - groups: make(map[string]groupedConfigs), - groupLookup: make(map[string]string), - } -} - -// GetInstance gets the underlying grouped instance for a given name. -func (m *GroupManager) GetInstance(name string) (ManagedInstance, error) { - m.mtx.Lock() - defer m.mtx.Unlock() - - group, ok := m.groupLookup[name] - if !ok { - return nil, fmt.Errorf("instance %s does not exist", name) - } - - inst, err := m.inner.GetInstance(group) - if err != nil { - return nil, fmt.Errorf("failed to get instance for %s: %w", name, err) - } - return inst, nil -} - -// ListInstances returns all currently grouped managed instances. The key -// will be the group's hash of shared settings. -func (m *GroupManager) ListInstances() map[string]ManagedInstance { - return m.inner.ListInstances() -} - -// ListConfigs returns the UNGROUPED instance configs with their original -// settings. To see the grouped instances, call ListInstances instead. -func (m *GroupManager) ListConfigs() map[string]Config { - m.mtx.Lock() - defer m.mtx.Unlock() - - cfgs := make(map[string]Config) - for _, groupedConfigs := range m.groups { - for _, cfg := range groupedConfigs { - cfgs[cfg.Name] = cfg - } - } - return cfgs -} - -// ApplyConfig will determine the group of the Config before applying it to -// the group. If no group exists, one will be created. If a group already -// exists, the group will have its settings merged with the Config and -// will be updated. -func (m *GroupManager) ApplyConfig(c Config) error { - m.mtx.Lock() - defer m.mtx.Unlock() - return m.applyConfig(c) -} - -func (m *GroupManager) applyConfig(c Config) (err error) { - groupName, err := hashConfig(c) - if err != nil { - return fmt.Errorf("failed to get group name for config %s: %w", c.Name, err) - } - - grouped := m.groups[groupName] - if grouped == nil { - grouped = make(groupedConfigs) - } else { - grouped = grouped.Copy() - } - - // Add the config to the group. If the config already exists within this - // group, it'll be overwritten. - grouped[c.Name] = c - mergedConfig, err := groupConfigs(groupName, grouped) - if err != nil { - err = fmt.Errorf("failed to group configs for %s: %w", c.Name, err) - return - } - - // If this config already exists in another group, we have to delete it. - // If we can't delete it from the old group, we also can't apply it. - if oldGroup, ok := m.groupLookup[c.Name]; ok && oldGroup != groupName { - // There's a few cases here where if something fails, it's safer to crash - // out and restart the Agent from scratch than it would be to continue as - // normal. The panics here are for truly exceptional cases, otherwise if - // something is recoverable, we'll return an error like normal. - - // If we can't find the old config, something got messed up when applying - // the config. But it also means that we're not going to be able to restore - // the config if something fails. Preemptively we should panic, since the - // internal state has gotten messed up and can't be fixed. - oldConfig, ok := m.groups[oldGroup][c.Name] - if !ok { - panic("failed to properly move config to new group. THIS IS A BUG!") - } - - err = m.deleteConfig(c.Name) - if err != nil { - err = fmt.Errorf("cannot apply config %s because deleting it from the old group failed: %w", c.Name, err) - return - } - - // Now that the config is deleted, we need to restore it in case applying - // the new one happens to fail. - defer func() { - if err == nil { - return - } - - // If restoring a config fails, we've left the Agent in a really bad - // state: the new config can't be applied and the old config can't be - // brought back. Just crash and let the Agent start fresh. - // - // Restoring the config _shouldn't_ fail here since applies only fail - // if the config is invalid. Since the config was running before, it - // should already be valid. If it does happen to fail, though, the - // internal state is left corrupted since we've completely lost a - // config. - restoreError := m.applyConfig(oldConfig) - if restoreError != nil { - panic(fmt.Sprintf("failed to properly restore config. THIS IS A BUG! error: %s", restoreError)) - } - }() - } - - err = m.inner.ApplyConfig(mergedConfig) - if err != nil { - err = fmt.Errorf("failed to apply grouped configs for config %s: %w", c.Name, err) - return - } - - // If the inner apply succeeded, we can update our group and the lookup. - m.groups[groupName] = grouped - m.groupLookup[c.Name] = groupName - return -} - -// DeleteConfig will remove a Config from its associated group. If there are -// no more Configs within that group after this Config is deleted, the managed -// instance will be stopped. Otherwise, the managed instance will be updated -// with the new grouped Config that doesn't include the removed one. -func (m *GroupManager) DeleteConfig(name string) error { - m.mtx.Lock() - defer m.mtx.Unlock() - return m.deleteConfig(name) -} - -func (m *GroupManager) deleteConfig(name string) error { - groupName, ok := m.groupLookup[name] - if !ok { - return fmt.Errorf("config does not exist") - } - - // Grab a copy of the stored group and delete our entry. We can - // persist it after we successfully remove the config. - group := m.groups[groupName].Copy() - delete(group, name) - - if len(group) == 0 { - // We deleted the last remaining config in that group; we can delete it in - // its entirety now. - if err := m.inner.DeleteConfig(groupName); err != nil { - return fmt.Errorf("failed to delete empty group %s after removing config %s: %w", groupName, name, err) - } - } else { - // We deleted the config but there's still more in the group; apply the new - // group that holds the remainder of the configs (minus the one we just - // deleted). - mergedConfig, err := groupConfigs(groupName, group) - if err != nil { - return fmt.Errorf("failed to regroup configs without %s: %w", name, err) - } - - err = m.inner.ApplyConfig(mergedConfig) - if err != nil { - return fmt.Errorf("failed to apply new group without %s: %w", name, err) - } - } - - // Update the stored group and remove the entry from the lookup table. - if len(group) == 0 { - delete(m.groups, groupName) - } else { - m.groups[groupName] = group - } - - delete(m.groupLookup, name) - return nil -} - -// Stop stops the Manager and all of its managed instances. -func (m *GroupManager) Stop() { - m.mtx.Lock() - defer m.mtx.Unlock() - - m.inner.Stop() - m.groupLookup = make(map[string]string) - m.groups = make(map[string]groupedConfigs) -} - -// hashConfig determines the hash of a Config used for grouping. It ignores -// the name and scrape_configs and also orders remote_writes by name prior to -// hashing. -func hashConfig(c Config) (string, error) { - // We need a deep copy since we're going to mutate the remote_write - // pointers. - groupable, err := c.Clone() - if err != nil { - return "", err - } - - // Ignore name and scrape configs when hashing - groupable.Name = "" - groupable.ScrapeConfigs = nil - - // Assign names to remote_write configs if they're not present already. - // This is also done in AssignDefaults but is duplicated here for the sake - // of simplifying responsibility of GroupManager. - for _, cfg := range groupable.RemoteWrite { - if cfg != nil { - // We don't care if the names are different, just that the other settings - // are the same. Blank out the name here before hashing the remote - // write config. - cfg.Name = "" - - hash, err := getHash(cfg) - if err != nil { - return "", err - } - cfg.Name = hash[:6] - } - } - - // Now sort remote_writes by name and nil-ness. - sort.Slice(groupable.RemoteWrite, func(i, j int) bool { - switch { - case groupable.RemoteWrite[i] == nil: - return true - case groupable.RemoteWrite[j] == nil: - return false - default: - return groupable.RemoteWrite[i].Name < groupable.RemoteWrite[j].Name - } - }) - - bb, err := MarshalConfig(&groupable, false) - if err != nil { - return "", err - } - hash := md5.Sum(bb) - return hex.EncodeToString(hash[:]), nil -} - -// groupConfig creates a grouped Config where all fields are copied from -// the first config except for scrape_configs, which are appended together. -func groupConfigs(groupName string, grouped groupedConfigs) (Config, error) { - if len(grouped) == 0 { - return Config{}, fmt.Errorf("no configs") - } - - // Move the map into a slice and sort it by name so this function - // consistently does the same thing. - cfgs := make([]Config, 0, len(grouped)) - for _, cfg := range grouped { - cfgs = append(cfgs, cfg) - } - sort.Slice(cfgs, func(i, j int) bool { return cfgs[i].Name < cfgs[j].Name }) - - combined, err := cfgs[0].Clone() - if err != nil { - return Config{}, err - } - combined.Name = groupName - combined.ScrapeConfigs = []*config.ScrapeConfig{} - - // Assign all remote_write configs in the group a consistent set of remote_names. - // If the grouped configs are coming from the scraping service, defaults will have - // been applied and the remote names will be prefixed with the old instance config name. - for _, rwc := range combined.RemoteWrite { - // Blank out the existing name before getting the hash so it doesn't take into - // account any existing name. - rwc.Name = "" - - hash, err := getHash(rwc) - if err != nil { - return Config{}, err - } - - rwc.Name = groupName[:6] + "-" + hash[:6] - } - - // Combine all the scrape configs. It's possible that two different ungrouped - // configs had a matching job name, but this will be detected and rejected - // (as it should be) when the underlying Manager eventually validates the - // combined config. - // - // TODO(rfratto): should we prepend job names with the name of the original - // config? (e.g., job_name = "config_name/job_name"). - for _, cfg := range cfgs { - combined.ScrapeConfigs = append(combined.ScrapeConfigs, cfg.ScrapeConfigs...) - } - - return combined, nil -} diff --git a/internal/static/metrics/instance/group_manager_test.go b/internal/static/metrics/instance/group_manager_test.go deleted file mode 100644 index 48b87236f4..0000000000 --- a/internal/static/metrics/instance/group_manager_test.go +++ /dev/null @@ -1,446 +0,0 @@ -package instance - -import ( - "fmt" - "strings" - "testing" - - "github.com/stretchr/testify/require" -) - -func TestGroupManager_ListInstances_Configs(t *testing.T) { - gm := NewGroupManager(newFakeManager()) - - // Create two configs in the same group and one in another - // group. - configs := []string{ - ` -name: configA -scrape_configs: [] -remote_write: []`, - ` -name: configB -scrape_configs: [] -remote_write: []`, - ` -name: configC -scrape_configs: [] -remote_write: -- url: http://localhost:9090`, - } - - for _, cfg := range configs { - c := testUnmarshalConfig(t, cfg) - err := gm.ApplyConfig(c) - require.NoError(t, err) - } - - // ListInstances should return our grouped instances - insts := gm.ListInstances() - require.Equal(t, 2, len(insts)) - - // ...but ListConfigs should return the ungrouped configs. - confs := gm.ListConfigs() - require.Equal(t, 3, len(confs)) - require.Containsf(t, confs, "configA", "configA not in confs") - require.Containsf(t, confs, "configB", "configB not in confs") - require.Containsf(t, confs, "configC", "configC not in confs") -} - -func testUnmarshalConfig(t *testing.T, cfg string) Config { - c, err := UnmarshalConfig(strings.NewReader(cfg)) - require.NoError(t, err) - return *c -} - -func TestGroupManager_ApplyConfig(t *testing.T) { - t.Run("combining configs", func(t *testing.T) { - inner := newFakeManager() - gm := NewGroupManager(inner) - err := gm.ApplyConfig(testUnmarshalConfig(t, ` -name: configA -scrape_configs: [] -remote_write: [] -`)) - require.NoError(t, err) - - err = gm.ApplyConfig(testUnmarshalConfig(t, ` -name: configB -scrape_configs: -- job_name: test_job - static_configs: - - targets: [127.0.0.1:12345] -remote_write: [] -`)) - require.NoError(t, err) - - require.Equal(t, 1, len(gm.groups)) - require.Equal(t, 2, len(gm.groupLookup)) - - // Check the underlying grouped config and make sure it was updated. - expect := testUnmarshalConfig(t, fmt.Sprintf(` -name: %s -scrape_configs: -- job_name: test_job - static_configs: - - targets: [127.0.0.1:12345] -remote_write: [] -`, gm.groupLookup["configA"])) - - innerConfigs := inner.ListConfigs() - require.Equal(t, 1, len(innerConfigs)) - require.Equal(t, expect, innerConfigs[gm.groupLookup["configA"]]) - }) - - t.Run("updating existing config within group", func(t *testing.T) { - inner := newFakeManager() - gm := NewGroupManager(inner) - err := gm.ApplyConfig(testUnmarshalConfig(t, ` -name: configA -scrape_configs: [] -remote_write: [] -`)) - require.NoError(t, err) - require.Equal(t, 1, len(gm.groups)) - require.Equal(t, 1, len(gm.groupLookup)) - - err = gm.ApplyConfig(testUnmarshalConfig(t, ` -name: configA -scrape_configs: -- job_name: test_job - static_configs: - - targets: [127.0.0.1:12345] -remote_write: [] -`)) - require.NoError(t, err) - require.Equal(t, 1, len(gm.groups)) - require.Equal(t, 1, len(gm.groupLookup)) - - // Check the underlying grouped config and make sure it was updated. - expect := testUnmarshalConfig(t, fmt.Sprintf(` -name: %s -scrape_configs: -- job_name: test_job - static_configs: - - targets: [127.0.0.1:12345] -remote_write: [] -`, gm.groupLookup["configA"])) - actual := inner.ListConfigs()[gm.groupLookup["configA"]] - require.Equal(t, expect, actual) - }) - - t.Run("updating existing config to new group", func(t *testing.T) { - inner := newFakeManager() - gm := NewGroupManager(inner) - err := gm.ApplyConfig(testUnmarshalConfig(t, ` -name: configA -scrape_configs: [] -remote_write: [] -`)) - require.NoError(t, err) - require.Equal(t, 1, len(gm.groups)) - require.Equal(t, 1, len(gm.groupLookup)) - oldGroup := gm.groupLookup["configA"] - - // Reapply the config but give it a setting change that would - // force it into a new group. We should still have only one - // group and only one entry in the group lookup table. - err = gm.ApplyConfig(testUnmarshalConfig(t, ` -name: configA -host_filter: true -scrape_configs: [] -remote_write: [] -`)) - require.NoError(t, err) - require.Equal(t, 1, len(gm.groups)) - require.Equal(t, 1, len(gm.groupLookup)) - newGroup := gm.groupLookup["configA"] - - // Check the underlying grouped config and make sure it was updated. - expect := testUnmarshalConfig(t, fmt.Sprintf(` -name: %s -host_filter: true -scrape_configs: [] -remote_write: [] -`, gm.groupLookup["configA"])) - actual := inner.ListConfigs()[newGroup] - require.Equal(t, expect, actual) - - // The old underlying ngroup should be gone. - require.NotContains(t, inner.ListConfigs(), oldGroup) - require.Equal(t, 1, len(inner.ListConfigs())) - }) -} - -func TestGroupManager_ApplyConfig_RemoteWriteName(t *testing.T) { - inner := newFakeManager() - gm := NewGroupManager(inner) - err := gm.ApplyConfig(testUnmarshalConfig(t, ` -name: configA -scrape_configs: [] -remote_write: -- name: rw-cfg-a - url: http://localhost:9009/api/prom/push -`)) - require.NoError(t, err) - - require.Equal(t, 1, len(gm.groups)) - require.Equal(t, 1, len(gm.groupLookup)) - - // Check the underlying grouped config and make sure the group_name - // didn't get copied from the remote_name of A. - innerConfigs := inner.ListConfigs() - require.Equal(t, 1, len(innerConfigs)) - - cfg := innerConfigs[gm.groupLookup["configA"]] - require.NotEqual(t, "rw-cfg-a", cfg.RemoteWrite[0].Name) -} - -func TestGroupManager_DeleteConfig(t *testing.T) { - t.Run("partial delete", func(t *testing.T) { - inner := newFakeManager() - gm := NewGroupManager(inner) - - // Apply two configs in the same group and then delete one. The group - // should still be active with the one config inside of it. - err := gm.ApplyConfig(testUnmarshalConfig(t, ` -name: configA -scrape_configs: -- job_name: test_job - static_configs: - - targets: [127.0.0.1:12345] -remote_write: [] -`)) - require.NoError(t, err) - - err = gm.ApplyConfig(testUnmarshalConfig(t, ` -name: configB -scrape_configs: -- job_name: test_job2 - static_configs: - - targets: [127.0.0.1:12345] -remote_write: [] -`)) - require.NoError(t, err) - - err = gm.DeleteConfig("configA") - require.NoError(t, err) - - expect := testUnmarshalConfig(t, fmt.Sprintf(` -name: %s -scrape_configs: -- job_name: test_job2 - static_configs: - - targets: [127.0.0.1:12345] -remote_write: []`, gm.groupLookup["configB"])) - actual := inner.ListConfigs()[gm.groupLookup["configB"]] - require.Equal(t, expect, actual) - require.Equal(t, 1, len(gm.groups)) - require.Equal(t, 1, len(gm.groupLookup)) - }) - - t.Run("full delete", func(t *testing.T) { - inner := newFakeManager() - gm := NewGroupManager(inner) - - // Apply a single config but delete the entire group. - err := gm.ApplyConfig(testUnmarshalConfig(t, ` -name: configA -scrape_configs: -- job_name: test_job - static_configs: - - targets: [127.0.0.1:12345] -remote_write: [] -`)) - require.NoError(t, err) - - err = gm.DeleteConfig("configA") - require.NoError(t, err) - require.Equal(t, 0, len(inner.ListConfigs())) - require.Equal(t, 0, len(inner.ListInstances())) - require.Equal(t, 0, len(gm.groups)) - require.Equal(t, 0, len(gm.groupLookup)) - }) -} - -func newFakeManager() Manager { - instances := make(map[string]ManagedInstance) - configs := make(map[string]Config) - - return &MockManager{ - ListInstancesFunc: func() map[string]ManagedInstance { - return instances - }, - ListConfigsFunc: func() map[string]Config { - return configs - }, - ApplyConfigFunc: func(c Config) error { - instances[c.Name] = &mockInstance{} - configs[c.Name] = c - return nil - }, - DeleteConfigFunc: func(name string) error { - delete(instances, name) - delete(configs, name) - return nil - }, - StopFunc: func() {}, - } -} - -func Test_hashConfig(t *testing.T) { - t.Run("name and scrape configs are ignored", func(t *testing.T) { - configAText := ` -name: configA -scrape_configs: [] -remote_write: []` - - configBText := ` -name: configB -scrape_configs: -- job_name: test_job - static_configs: - - targets: [127.0.0.1:12345] -remote_write: []` - - hashA, hashB := getHashesFromConfigs(t, configAText, configBText) - require.Equal(t, hashA, hashB) - }) - - t.Run("remote_writes are unordered", func(t *testing.T) { - configAText := ` -name: configA -scrape_configs: [] -remote_write: -- url: http://localhost:9009/api/prom/push1 -- url: http://localhost:9009/api/prom/push2` - - configBText := ` -name: configB -scrape_configs: [] -remote_write: -- url: http://localhost:9009/api/prom/push2 -- url: http://localhost:9009/api/prom/push1` - - hashA, hashB := getHashesFromConfigs(t, configAText, configBText) - require.Equal(t, hashA, hashB) - }) - - t.Run("remote_writes must match", func(t *testing.T) { - configAText := ` -name: configA -scrape_configs: [] -remote_write: -- url: http://localhost:9009/api/prom/push1 -- url: http://localhost:9009/api/prom/push2` - - configBText := ` -name: configB -scrape_configs: [] -remote_write: -- url: http://localhost:9009/api/prom/push1 -- url: http://localhost:9009/api/prom/push1` - - hashA, hashB := getHashesFromConfigs(t, configAText, configBText) - require.NotEqual(t, hashA, hashB) - }) - - t.Run("other fields must match", func(t *testing.T) { - configAText := ` -name: configA -host_filter: true -scrape_configs: [] -remote_write: []` - - configBText := ` -name: configB -host_filter: false -scrape_configs: [] -remote_write: []` - - hashA, hashB := getHashesFromConfigs(t, configAText, configBText) - require.NotEqual(t, hashA, hashB) - }) -} - -func getHashesFromConfigs(t *testing.T, configAText, configBText string) (string, string) { - configA := testUnmarshalConfig(t, configAText) - configB := testUnmarshalConfig(t, configBText) - - hashA, err := hashConfig(configA) - require.NoError(t, err) - - hashB, err := hashConfig(configB) - require.NoError(t, err) - - return hashA, hashB -} - -func Test_groupConfigs(t *testing.T) { - configAText := ` -name: configA -scrape_configs: -- job_name: test_job - static_configs: - - targets: [127.0.0.1:12345] -remote_write: -- url: http://localhost:9009/api/prom/push1 -- url: http://localhost:9009/api/prom/push2` - - configBText := ` -name: configB -scrape_configs: -- job_name: test_job2 - static_configs: - - targets: [127.0.0.1:12345] -remote_write: -- url: http://localhost:9009/api/prom/push2 -- url: http://localhost:9009/api/prom/push1` - - configA := testUnmarshalConfig(t, configAText) - configB := testUnmarshalConfig(t, configBText) - - groupName, err := hashConfig(configA) - require.NoError(t, err) - - expectText := fmt.Sprintf(` -name: %s -scrape_configs: -- job_name: test_job - static_configs: - - targets: [127.0.0.1:12345] -- job_name: test_job2 - static_configs: - - targets: [127.0.0.1:12345] -remote_write: -- url: http://localhost:9009/api/prom/push1 -- url: http://localhost:9009/api/prom/push2`, groupName) - - expect, err := UnmarshalConfig(strings.NewReader(expectText)) - require.NoError(t, err) - - // Generate expected remote_write names - for _, rwConfig := range expect.RemoteWrite { - hash, err := getHash(rwConfig) - require.NoError(t, err) - rwConfig.Name = groupName[:6] + "-" + hash[:6] - } - - group := groupedConfigs{ - "configA": configA, - "configB": configB, - } - actual, err := groupConfigs(groupName, group) - require.NoError(t, err) - require.Equal(t, *expect, actual) - - // Consistency check: groupedConfigs is a map and we want to always have - // groupConfigs return the same thing regardless of how the map - // is iterated over. Run through groupConfigs a bunch of times and - // make sure it always returns the same thing. - for i := 0; i < 100; i++ { - actual, err = groupConfigs(groupName, group) - require.NoError(t, err) - require.Equal(t, *expect, actual) - } -} diff --git a/internal/static/metrics/instance/host_filter.go b/internal/static/metrics/instance/host_filter.go deleted file mode 100644 index 2328f6feff..0000000000 --- a/internal/static/metrics/instance/host_filter.go +++ /dev/null @@ -1,238 +0,0 @@ -package instance - -import ( - "context" - "fmt" - "net" - "sync" - - "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/config" - "github.com/prometheus/prometheus/discovery/kubernetes" - "github.com/prometheus/prometheus/discovery/targetgroup" - "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/model/relabel" -) - -// HostFilterLabelMatchers are the set of labels that will be used to match -// against an incoming target. -var HostFilterLabelMatchers = []string{ - // Consul - "__meta_consul_node", - - // Dockerswarm - "__meta_dockerswarm_node_id", - "__meta_dockerswarm_node_hostname", - "__meta_dockerswarm_node_address", - - // Kubernetes node labels. Labels for `role: service` are omitted as - // service targets have labels merged with discovered pods. - "__meta_kubernetes_pod_node_name", - "__meta_kubernetes_node_name", - - // Generic (applied by host_filter_relabel_configs) - "__host__", -} - -// DiscoveredGroups is a set of groups found via service discovery. -type DiscoveredGroups = map[string][]*targetgroup.Group - -// GroupChannel is a channel that provides discovered target groups. -type GroupChannel = <-chan DiscoveredGroups - -// HostFilter acts as a MITM between the discovery manager and the -// scrape manager, filtering out discovered targets that are not -// running on the same node as the agent itself. -type HostFilter struct { - ctx context.Context - cancel context.CancelFunc - - host string - - inputCh GroupChannel - outputCh chan map[string][]*targetgroup.Group - - relabelMut sync.Mutex - relabels []*relabel.Config -} - -// NewHostFilter creates a new HostFilter. -func NewHostFilter(host string, relabels []*relabel.Config) *HostFilter { - ctx, cancel := context.WithCancel(context.Background()) - f := &HostFilter{ - ctx: ctx, - cancel: cancel, - - host: host, - relabels: relabels, - - outputCh: make(chan map[string][]*targetgroup.Group), - } - return f -} - -// PatchSD patches services discoveries to optimize performance for host -// filtering. The discovered targets will be pruned to as close to the set -// that HostFilter will output as possible. -func (f *HostFilter) PatchSD(scrapes []*config.ScrapeConfig) { - for _, sc := range scrapes { - for _, d := range sc.ServiceDiscoveryConfigs { - switch d := d.(type) { - case *kubernetes.SDConfig: - if d.Role == kubernetes.RolePod { - d.Selectors = []kubernetes.SelectorConfig{{ - Role: kubernetes.RolePod, - Field: fmt.Sprintf("spec.nodeName=%s", f.host), - }} - } - } - } - } -} - -// SetRelabels updates the relabeling rules used by the HostFilter. -func (f *HostFilter) SetRelabels(relabels []*relabel.Config) { - f.relabelMut.Lock() - defer f.relabelMut.Unlock() - f.relabels = relabels -} - -// Run starts the HostFilter. It only exits when the HostFilter is stopped. -// Run will continually read from syncCh and filter groups discovered down to -// targets that are colocated on the same node as the one the HostFilter is -// running in. -func (f *HostFilter) Run(syncCh GroupChannel) { - f.inputCh = syncCh - - for { - select { - case <-f.ctx.Done(): - return - case data := <-f.inputCh: - f.relabelMut.Lock() - relabels := f.relabels - f.relabelMut.Unlock() - - f.outputCh <- FilterGroups(data, f.host, relabels) - } - } -} - -// Stop stops the host filter from processing more target updates. -func (f *HostFilter) Stop() { - f.cancel() -} - -// SyncCh returns a read only channel used by all the clients to receive -// target updates. -func (f *HostFilter) SyncCh() GroupChannel { - return f.outputCh -} - -// FilterGroups takes a set of DiscoveredGroups as input and filters out -// any Target that is not running on the host machine provided by host. -// -// This is done by looking at HostFilterLabelMatchers and __address__. -// -// If the discovered address is localhost or 127.0.0.1, the group is never -// filtered out. -func FilterGroups(in DiscoveredGroups, host string, configs []*relabel.Config) DiscoveredGroups { - out := make(DiscoveredGroups, len(in)) - - for name, groups := range in { - groupList := make([]*targetgroup.Group, 0, len(groups)) - - for _, group := range groups { - newGroup := &targetgroup.Group{ - Targets: make([]model.LabelSet, 0, len(group.Targets)), - Labels: group.Labels, - Source: group.Source, - } - - for _, target := range group.Targets { - allLabels := mergeSets(target, group.Labels) - processedLabels, _ := relabel.Process(toLabelSlice(allLabels), configs...) - - if !shouldFilterTarget(processedLabels, host) { - newGroup.Targets = append(newGroup.Targets, target) - } - } - - groupList = append(groupList, newGroup) - } - - out[name] = groupList - } - - return out -} - -// shouldFilterTarget returns true when the target labels (combined with the set of common -// labels) should be filtered out by FilterGroups. -func shouldFilterTarget(lbls labels.Labels, host string) bool { - shouldFilterTargetByLabelValue := func(labelValue string) bool { - if addr, _, err := net.SplitHostPort(labelValue); err == nil { - labelValue = addr - } - - // Special case: always allow localhost/127.0.0.1 - if labelValue == "localhost" || labelValue == "127.0.0.1" { - return false - } - - return labelValue != host - } - - lset := labels.New(lbls...) - addressLabel := lset.Get(model.AddressLabel) - if addressLabel == "" { - // No address label. This is invalid and will generate an error by the scrape - // manager, so we'll pass it on for now. - return false - } - - // If the __address__ label matches, we can quit early. - if !shouldFilterTargetByLabelValue(addressLabel) { - return false - } - - // Fall back to checking metalabels as long as their values are nonempty. - for _, check := range HostFilterLabelMatchers { - // If any of the checked labels match for not being filtered out, we can - // return before checking any of the other matchers. - if addr := lset.Get(check); addr != "" && !shouldFilterTargetByLabelValue(addr) { - return false - } - } - - // Nothing matches, filter it out. - return true -} - -// mergeSets merges the sets of labels together. Earlier sets take priority for label names. -func mergeSets(sets ...model.LabelSet) model.LabelSet { - sz := 0 - for _, set := range sets { - sz += len(set) - } - result := make(model.LabelSet, sz) - - for _, set := range sets { - for labelName, labelValue := range set { - if _, exist := result[labelName]; exist { - continue - } - result[labelName] = labelValue - } - } - - return result -} - -func toLabelSlice(set model.LabelSet) labels.Labels { - slice := make(labels.Labels, 0, len(set)) - for name, value := range set { - slice = append(slice, labels.Label{Name: string(name), Value: string(value)}) - } - return slice -} diff --git a/internal/static/metrics/instance/host_filter_test.go b/internal/static/metrics/instance/host_filter_test.go deleted file mode 100644 index 8eca3a3f51..0000000000 --- a/internal/static/metrics/instance/host_filter_test.go +++ /dev/null @@ -1,201 +0,0 @@ -package instance - -import ( - "testing" - - "github.com/grafana/agent/internal/util" - "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/config" - "github.com/prometheus/prometheus/discovery/targetgroup" - "github.com/prometheus/prometheus/model/relabel" - "github.com/stretchr/testify/require" - "gopkg.in/yaml.v3" -) - -func makeGroup(labels []model.LabelSet) *targetgroup.Group { - return &targetgroup.Group{ - Targets: labels, - Labels: model.LabelSet{}, - } -} - -func TestFilterGroups(t *testing.T) { - tt := []struct { - name string - labelHost string - inputHost string - shouldRemove bool - }{ - { - name: "complete match", - labelHost: "myhost", - inputHost: "myhost", - shouldRemove: false, - }, - { - name: "mismatch", - labelHost: "notmyhost", - inputHost: "myhost", - shouldRemove: true, - }, - { - name: "match with port", - labelHost: "myhost:12345", - inputHost: "myhost", - shouldRemove: false, - }, - { - name: "mismatch with port", - labelHost: "notmyhost:12345", - inputHost: "myhost", - shouldRemove: true, - }, - } - - // Sets of labels we want to test against. - labels := []model.LabelName{ - model.AddressLabel, - model.LabelName("__meta_consul_node"), - model.LabelName("__meta_dockerswarm_node_id"), - model.LabelName("__meta_dockerswarm_node_hostname"), - model.LabelName("__meta_dockerswarm_node_address"), - model.LabelName("__meta_kubernetes_pod_node_name"), - model.LabelName("__meta_kubernetes_node_name"), - model.LabelName("__host__"), - } - - for _, tc := range tt { - t.Run(tc.name, func(t *testing.T) { - for _, label := range labels { - t.Run(string(label), func(t *testing.T) { - lset := model.LabelSet{ - label: model.LabelValue(tc.labelHost), - } - - // Special case: if label is not model.AddressLabel, we need to give - // it a fake value. model.AddressLabel is always expected to be present and - // is considered an error if it isn't. - if label != model.AddressLabel { - lset[model.AddressLabel] = "fake" - } - - group := makeGroup([]model.LabelSet{lset}) - - groups := DiscoveredGroups{"test": []*targetgroup.Group{group}} - result := FilterGroups(groups, tc.inputHost, nil) - - require.NotNil(t, result["test"]) - if tc.shouldRemove { - require.NotEqual(t, len(result["test"][0].Targets), len(groups["test"][0].Targets)) - } else { - require.Equal(t, len(result["test"][0].Targets), len(groups["test"][0].Targets)) - } - }) - } - }) - } -} - -func TestFilterGroups_Relabel(t *testing.T) { - tt := []struct { - name string - labelHost string - inputHost string - shouldRemove bool - }{ - { - name: "complete match", - labelHost: "myhost", - inputHost: "myhost", - shouldRemove: false, - }, - { - name: "mismatch", - labelHost: "notmyhost", - inputHost: "myhost", - shouldRemove: true, - }, - { - name: "match with port", - labelHost: "myhost:12345", - inputHost: "myhost", - shouldRemove: false, - }, - { - name: "mismatch with port", - labelHost: "notmyhost:12345", - inputHost: "myhost", - shouldRemove: true, - }, - } - - relabelConfig := []*relabel.Config{{ - SourceLabels: model.LabelNames{"__internal_label"}, - Action: relabel.Replace, - Separator: ";", - Regex: relabel.MustNewRegexp("(.*)"), - Replacement: "$1", - TargetLabel: "__host__", - }} - - for _, tc := range tt { - t.Run(tc.name, func(t *testing.T) { - lset := model.LabelSet{ - model.AddressLabel: "fake_target", - "__internal_label": model.LabelValue(tc.labelHost), - } - - group := makeGroup([]model.LabelSet{lset}) - - groups := DiscoveredGroups{"test": []*targetgroup.Group{group}} - result := FilterGroups(groups, tc.inputHost, relabelConfig) - - require.NotNil(t, result["test"]) - if tc.shouldRemove { - require.NotEqual(t, len(result["test"][0].Targets), len(groups["test"][0].Targets)) - } else { - require.Equal(t, len(result["test"][0].Targets), len(groups["test"][0].Targets)) - } - }) - } -} - -func TestHostFilter_PatchSD(t *testing.T) { - rawInput := util.Untab(` -- job_name: default - kubernetes_sd_configs: - - role: service - - role: pod`) - - expect := util.Untab(` -- job_name: default - honor_timestamps: true - metrics_path: /metrics - scheme: http - track_timestamps_staleness: false - follow_redirects: true - enable_http2: true - kubernetes_sd_configs: - - role: service - kubeconfig_file: "" - follow_redirects: true - enable_http2: true - - role: pod - follow_redirects: true - enable_http2: true - kubeconfig_file: "" - selectors: - - role: pod - field: spec.nodeName=myhost - `) - - var input []*config.ScrapeConfig - err := yaml.Unmarshal([]byte(rawInput), &input) - require.NoError(t, err) - - NewHostFilter("myhost", nil).PatchSD(input) - - output, err := yaml.Marshal(input) - require.NoError(t, err) - require.YAMLEq(t, expect, string(output)) -} diff --git a/internal/static/metrics/instance/instance.go b/internal/static/metrics/instance/instance.go index a0de217627..db8e22109c 100644 --- a/internal/static/metrics/instance/instance.go +++ b/internal/static/metrics/instance/instance.go @@ -2,37 +2,18 @@ package instance import ( - "bytes" - "context" "crypto/md5" "encoding/hex" "encoding/json" "errors" "fmt" - "math" - "net/http" - "os" - "path/filepath" - "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/grafana/agent/internal/agentseed" - "github.com/grafana/agent/internal/static/metrics/wal" "github.com/grafana/agent/internal/useragent" - "github.com/grafana/agent/internal/util" - "github.com/oklog/run" - "github.com/prometheus/client_golang/prometheus" - config_util "github.com/prometheus/common/config" "github.com/prometheus/prometheus/config" - "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/model/relabel" - "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/scrape" - "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/storage/remote" - "go.uber.org/atomic" "gopkg.in/yaml.v2" ) @@ -195,578 +176,6 @@ func (c *Config) ApplyDefaults(global GlobalConfig) error { return nil } -// Clone makes a deep copy of the config along with global settings. -func (c *Config) Clone() (Config, error) { - bb, err := MarshalConfig(c, false) - if err != nil { - return Config{}, err - } - cp, err := UnmarshalConfig(bytes.NewReader(bb)) - if err != nil { - return Config{}, err - } - cp.global = c.global - - // Some tests will trip up on this; the marshal/unmarshal cycle might set - // an empty slice to nil. Set it back to an empty slice if we detect this - // happening. - if cp.ScrapeConfigs == nil && c.ScrapeConfigs != nil { - cp.ScrapeConfigs = []*config.ScrapeConfig{} - } - if cp.RemoteWrite == nil && c.RemoteWrite != nil { - cp.RemoteWrite = []*config.RemoteWriteConfig{} - } - - return *cp, nil -} - -type walStorageFactory func(reg prometheus.Registerer) (walStorage, error) - -// Instance is an individual metrics collector and remote_writer. -type Instance struct { - // All fields in the following block may be accessed and modified by - // concurrently running goroutines. - // - // Note that all Prometheus components listed here may be nil at any - // given time; methods reading them should take care to do nil checks. - mut sync.Mutex - cfg Config - wal walStorage - discovery *discoveryService - readyScrapeManager *readyScrapeManager - remoteStore *remote.Storage - storage storage.Storage - - // ready is set to true after the initialization process finishes - ready atomic.Bool - - hostFilter *HostFilter - - logger log.Logger - - reg prometheus.Registerer - newWal walStorageFactory - writeHandler http.Handler -} - -// New creates a new Instance with a directory for storing the WAL. The instance -// will not start until Run is called on the instance. -func New(reg prometheus.Registerer, cfg Config, walDir string, logger log.Logger) (*Instance, error) { - logger = log.With(logger, "instance", cfg.Name) - - instWALDir := filepath.Join(walDir, cfg.Name) - - newWal := func(reg prometheus.Registerer) (walStorage, error) { - return wal.NewStorage(logger, reg, instWALDir) - } - - return newInstance(cfg, reg, logger, newWal) -} - -func newInstance(cfg Config, reg prometheus.Registerer, logger log.Logger, newWal walStorageFactory) (*Instance, error) { - hostname, err := Hostname() - if err != nil { - return nil, fmt.Errorf("failed to get hostname: %w", err) - } - - i := &Instance{ - cfg: cfg, - logger: logger, - hostFilter: NewHostFilter(hostname, cfg.HostFilterRelabelConfigs), - - reg: reg, - newWal: newWal, - - readyScrapeManager: &readyScrapeManager{}, - } - - return i, nil -} - -// Run starts the instance, initializing Prometheus components, and will -// continue to run until an error happens during execution or the provided -// context is cancelled. -// -// Run may be re-called after exiting, as components will be reinitialized each -// time Run is called. -func (i *Instance) Run(ctx context.Context) error { - // i.cfg may change at any point in the middle of this method but not in a way - // that affects any of the code below; rather than grabbing a mutex every time - // we want to read the config, we'll simplify the access and just grab a copy - // now. - i.mut.Lock() - cfg := i.cfg - i.mut.Unlock() - - level.Debug(i.logger).Log("msg", "initializing instance", "name", cfg.Name) - - // trackingReg wraps the register for the instance to make sure that if Run - // exits, any metrics Prometheus registers are removed and can be - // re-registered if Run is called again. - trackingReg := util.WrapWithUnregisterer(i.reg) - defer trackingReg.UnregisterAll() - - if err := i.initialize(ctx, trackingReg, &cfg); err != nil { - level.Error(i.logger).Log("msg", "failed to initialize instance", "err", err) - return fmt.Errorf("failed to initialize instance: %w", err) - } - - // The actors defined here are defined in the order we want them to shut down. - // Primarily, we want to ensure that the following shutdown order is - // maintained: - // 1. The scrape manager stops - // 2. WAL storage is closed - // 3. Remote write storage is closed - // This is done to allow the instance to write stale markers for all active - // series. - rg := runGroupWithContext(ctx) - - { - // Target Discovery - rg.Add(i.discovery.Run, i.discovery.Stop) - } - { - // Truncation loop - ctx, contextCancel := context.WithCancel(context.Background()) - defer contextCancel() - rg.Add( - func() error { - i.truncateLoop(ctx, i.wal, &cfg) - level.Info(i.logger).Log("msg", "truncation loop stopped") - return nil - }, - func(err error) { - level.Info(i.logger).Log("msg", "stopping truncation loop...") - contextCancel() - }, - ) - } - { - sm, err := i.readyScrapeManager.Get() - if err != nil { - level.Error(i.logger).Log("msg", "failed to get scrape manager") - return err - } - - // Scrape manager - rg.Add( - func() error { - err := sm.Run(i.discovery.SyncCh()) - level.Info(i.logger).Log("msg", "scrape manager stopped") - return err - }, - func(err error) { - // The scrape manager is closed first to allow us to write staleness - // markers without receiving new samples from scraping in the meantime. - level.Info(i.logger).Log("msg", "stopping scrape manager...") - sm.Stop() - - // On a graceful shutdown, write staleness markers. If something went - // wrong, then the instance will be relaunched. - if err == nil && cfg.WriteStaleOnShutdown { - level.Info(i.logger).Log("msg", "writing staleness markers...") - err := i.wal.WriteStalenessMarkers(i.getRemoteWriteTimestamp) - if err != nil { - level.Error(i.logger).Log("msg", "error writing staleness markers", "err", err) - } - } - - // Closing the storage closes both the WAL storage and remote wrte - // storage. - level.Info(i.logger).Log("msg", "closing storage...") - if err := i.storage.Close(); err != nil { - level.Error(i.logger).Log("msg", "error stopping storage", "err", err) - } - }, - ) - } - - level.Debug(i.logger).Log("msg", "running instance", "name", cfg.Name) - i.ready.Store(true) - err := rg.Run() - if err != nil { - level.Error(i.logger).Log("msg", "agent instance stopped with error", "err", err) - } - return err -} - -// initialize sets up the various Prometheus components with their initial -// settings. initialize will be called each time the Instance is run. Prometheus -// components cannot be reused after they are stopped so we need to recreate them -// each run. -func (i *Instance) initialize(ctx context.Context, reg prometheus.Registerer, cfg *Config) error { - i.mut.Lock() - defer i.mut.Unlock() - - if cfg.HostFilter { - i.hostFilter.PatchSD(cfg.ScrapeConfigs) - } - - var err error - - i.wal, err = i.newWal(reg) - if err != nil { - return fmt.Errorf("error creating WAL: %w", err) - } - - i.writeHandler = remote.NewWriteHandler(i.logger, reg, i.wal) - - i.discovery, err = i.newDiscoveryManager(ctx, cfg) - if err != nil { - return fmt.Errorf("error creating discovery manager: %w", err) - } - - i.readyScrapeManager = &readyScrapeManager{} - - // Set up the remote storage - remoteLogger := log.With(i.logger, "component", "remote") - i.remoteStore = remote.NewStorage(remoteLogger, reg, i.wal.StartTime, i.wal.Directory(), cfg.RemoteFlushDeadline, i.readyScrapeManager) - uid := agentseed.Get().UID - for _, rw := range cfg.RemoteWrite { - if rw.Headers == nil { - rw.Headers = map[string]string{} - } - rw.Headers[agentseed.HeaderName] = uid - } - err = i.remoteStore.ApplyConfig(&config.Config{ - GlobalConfig: cfg.global.Prometheus, - RemoteWriteConfigs: cfg.RemoteWrite, - }) - if err != nil { - return fmt.Errorf("failed applying config to remote storage: %w", err) - } - - i.storage = storage.NewFanout(i.logger, i.wal, i.remoteStore) - - opts := &scrape.Options{ - ExtraMetrics: cfg.global.ExtraMetrics, - HTTPClientOptions: []config_util.HTTPClientOption{}, - } - - if cfg.global.DisableKeepAlives { - opts.HTTPClientOptions = append(opts.HTTPClientOptions, config_util.WithKeepAlivesDisabled()) - } - if cfg.global.IdleConnTimeout > 0 { - opts.HTTPClientOptions = append(opts.HTTPClientOptions, config_util.WithIdleConnTimeout(cfg.global.IdleConnTimeout)) - } - scrapeManager := newScrapeManager(opts, log.With(i.logger, "component", "scrape manager"), i.storage) - err = scrapeManager.ApplyConfig(&config.Config{ - GlobalConfig: cfg.global.Prometheus, - ScrapeConfigs: cfg.ScrapeConfigs, - }) - if err != nil { - return fmt.Errorf("failed applying config to scrape manager: %w", err) - } - - i.readyScrapeManager.Set(scrapeManager) - - return nil -} - -// Ready returns true if the Instance has been initialized and is ready -// to start scraping and delivering metrics. -func (i *Instance) Ready() bool { - return i.ready.Load() -} - -// Update accepts a new Config for the Instance and will dynamically update any -// running Prometheus components with the new values from Config. Update will -// return an ErrInvalidUpdate if the Update could not be applied. -func (i *Instance) Update(c Config) (err error) { - i.mut.Lock() - defer i.mut.Unlock() - - // It's only (currently) valid to update scrape_configs and remote_write, so - // if any other field has changed here, return the error. - switch { - // This first case should never happen in practice but it's included here for - // completion’s sake. - case i.cfg.Name != c.Name: - err = errImmutableField{Field: "name"} - case i.cfg.HostFilter != c.HostFilter: - err = errImmutableField{Field: "host_filter"} - case i.cfg.WALTruncateFrequency != c.WALTruncateFrequency: - err = errImmutableField{Field: "wal_truncate_frequency"} - case i.cfg.RemoteFlushDeadline != c.RemoteFlushDeadline: - err = errImmutableField{Field: "remote_flush_deadline"} - case i.cfg.WriteStaleOnShutdown != c.WriteStaleOnShutdown: - err = errImmutableField{Field: "write_stale_on_shutdown"} - } - if err != nil { - return ErrInvalidUpdate{Inner: err} - } - - // Check to see if the components exist yet. - if i.discovery == nil || i.remoteStore == nil || i.readyScrapeManager == nil { - return ErrInvalidUpdate{ - Inner: fmt.Errorf("cannot dynamically update because instance is not running"), - } - } - - // NOTE(rfratto): Prometheus applies configs in a specific order to ensure - // flow from service discovery down to the WAL continues working properly. - // - // Keep the following order below: - // - // 1. Local config - // 2. Remote Store - // 3. Scrape Manager - // 4. Discovery Manager - - originalConfig := i.cfg - defer func() { - if err != nil { - i.cfg = originalConfig - } - }() - i.cfg = c - - i.hostFilter.SetRelabels(c.HostFilterRelabelConfigs) - if c.HostFilter { - // N.B.: only call PatchSD if HostFilter is enabled since it - // mutates what targets will be discovered. - i.hostFilter.PatchSD(c.ScrapeConfigs) - } - - err = i.remoteStore.ApplyConfig(&config.Config{ - GlobalConfig: c.global.Prometheus, - RemoteWriteConfigs: c.RemoteWrite, - }) - if err != nil { - return fmt.Errorf("error applying new remote_write configs: %w", err) - } - - sm, err := i.readyScrapeManager.Get() - if err != nil { - return fmt.Errorf("couldn't get scrape manager to apply new scrape configs: %w", err) - } - err = sm.ApplyConfig(&config.Config{ - GlobalConfig: c.global.Prometheus, - ScrapeConfigs: c.ScrapeConfigs, - }) - if err != nil { - return fmt.Errorf("error applying updated configs to scrape manager: %w", err) - } - - sdConfigs := map[string]discovery.Configs{} - for _, v := range c.ScrapeConfigs { - sdConfigs[v.JobName] = v.ServiceDiscoveryConfigs - } - err = i.discovery.Manager.ApplyConfig(sdConfigs) - if err != nil { - return fmt.Errorf("failed applying configs to discovery manager: %w", err) - } - - return nil -} - -// TargetsActive returns the set of active targets from the scrape manager. Returns nil -// if the scrape manager is not ready yet. -func (i *Instance) TargetsActive() map[string][]*scrape.Target { - i.mut.Lock() - defer i.mut.Unlock() - - if i.readyScrapeManager == nil { - return nil - } - - mgr, err := i.readyScrapeManager.Get() - if err == ErrNotReady { - return nil - } else if err != nil { - level.Error(i.logger).Log("msg", "failed to get scrape manager when collecting active targets", "err", err) - return nil - } - return mgr.TargetsActive() -} - -// StorageDirectory returns the directory where this Instance is writing series -// and samples to for the WAL. -func (i *Instance) StorageDirectory() string { - return i.wal.Directory() -} - -// WriteHandler returns an HTTP handler for pushing metrics directly into the -// instance's WAL. -func (i *Instance) WriteHandler() http.Handler { - return i.writeHandler -} - -// Appender returns a storage.Appender from the instance's WAL -func (i *Instance) Appender(ctx context.Context) storage.Appender { - return i.wal.Appender(ctx) -} - -type discoveryService struct { - Manager *discovery.Manager - - RunFunc func() error - StopFunc func(err error) - SyncChFunc func() GroupChannel -} - -func (s *discoveryService) Run() error { return s.RunFunc() } -func (s *discoveryService) Stop(err error) { s.StopFunc(err) } -func (s *discoveryService) SyncCh() GroupChannel { return s.SyncChFunc() } - -// newDiscoveryManager returns an implementation of a runnable service -// that outputs discovered targets to a channel. The implementation -// uses the Prometheus Discovery Manager. Targets will be filtered -// if the instance is configured to perform host filtering. -func (i *Instance) newDiscoveryManager(ctx context.Context, cfg *Config) (*discoveryService, error) { - ctx, cancel := context.WithCancel(ctx) - - logger := log.With(i.logger, "component", "discovery manager") - manager := discovery.NewManager(ctx, logger, discovery.Name("scrape")) - - // TODO(rfratto): refactor this to a function? - // TODO(rfratto): ensure job name name is unique - c := map[string]discovery.Configs{} - for _, v := range cfg.ScrapeConfigs { - c[v.JobName] = v.ServiceDiscoveryConfigs - } - err := manager.ApplyConfig(c) - if err != nil { - cancel() - level.Error(i.logger).Log("msg", "failed applying config to discovery manager", "err", err) - return nil, fmt.Errorf("failed applying config to discovery manager: %w", err) - } - - rg := runGroupWithContext(ctx) - - // Run the manager - rg.Add(func() error { - err := manager.Run() - level.Info(i.logger).Log("msg", "discovery manager stopped") - return err - }, func(err error) { - level.Info(i.logger).Log("msg", "stopping discovery manager...") - cancel() - }) - - syncChFunc := manager.SyncCh - - // If host filtering is enabled, run it and use its channel for discovered - // targets. - if cfg.HostFilter { - rg.Add(func() error { - i.hostFilter.Run(manager.SyncCh()) - level.Info(i.logger).Log("msg", "host filterer stopped") - return nil - }, func(_ error) { - level.Info(i.logger).Log("msg", "stopping host filterer...") - i.hostFilter.Stop() - }) - - syncChFunc = i.hostFilter.SyncCh - } - - return &discoveryService{ - Manager: manager, - - RunFunc: rg.Run, - StopFunc: rg.Stop, - SyncChFunc: syncChFunc, - }, nil -} - -func (i *Instance) truncateLoop(ctx context.Context, wal walStorage, cfg *Config) { - // Track the last timestamp we truncated for to prevent segments from getting - // deleted until at least some new data has been sent. - var lastTs int64 = math.MinInt64 - - for { - select { - case <-ctx.Done(): - return - case <-time.After(cfg.WALTruncateFrequency): - // The timestamp ts is used to determine which series are not receiving - // samples and may be deleted from the WAL. Their most recent append - // timestamp is compared to ts, and if that timestamp is older than ts, - // they are considered inactive and may be deleted. - // - // Subtracting a duration from ts will delay when it will be considered - // inactive and scheduled for deletion. - ts := i.getRemoteWriteTimestamp() - i.cfg.MinWALTime.Milliseconds() - if ts < 0 { - ts = 0 - } - - // Network issues can prevent the result of getRemoteWriteTimestamp from - // changing. We don't want data in the WAL to grow forever, so we set a cap - // on the maximum age data can be. If our ts is older than this cutoff point, - // we'll shift it forward to start deleting very stale data. - if maxTS := timestamp.FromTime(time.Now().Add(-i.cfg.MaxWALTime)); ts < maxTS { - ts = maxTS - } - - if ts == lastTs { - level.Debug(i.logger).Log("msg", "not truncating the WAL, remote_write timestamp is unchanged", "ts", ts) - continue - } - lastTs = ts - - level.Debug(i.logger).Log("msg", "truncating the WAL", "ts", ts) - err := wal.Truncate(ts) - if err != nil { - // The only issue here is larger disk usage and a greater replay time, - // so we'll only log this as a warning. - level.Warn(i.logger).Log("msg", "could not truncate WAL", "err", err) - } - } - } -} - -// getRemoteWriteTimestamp looks up the last successful remote write timestamp. -// This is passed to wal.Storage for its truncation. If no remote write sections -// are configured, getRemoteWriteTimestamp returns the current time. -func (i *Instance) getRemoteWriteTimestamp() int64 { - i.mut.Lock() - defer i.mut.Unlock() - - if len(i.cfg.RemoteWrite) == 0 { - return timestamp.FromTime(time.Now()) - } - - if i.remoteStore == nil { - // Instance still being initialized; start at 0. - return 0 - } - return i.remoteStore.LowestSentTimestamp() -} - -// walStorage is an interface satisfied by wal.Storage, and created for testing. -type walStorage interface { - // walStorage implements Queryable/ChunkQueryable for compatibility, but is unused. - storage.Queryable - storage.ChunkQueryable - - Directory() string - - StartTime() (int64, error) - WriteStalenessMarkers(remoteTsFunc func() int64) error - Appender(context.Context) storage.Appender - Truncate(mint int64) error - - Close() error -} - -// Hostname retrieves the hostname identifying the machine the process is -// running on. It will return the value of $HOSTNAME, if defined, and fall -// back to Go's os.Hostname. -func Hostname() (string, error) { - hostname := os.Getenv("HOSTNAME") - if hostname != "" { - return hostname, nil - } - - hostname, err := os.Hostname() - if err != nil { - return "", fmt.Errorf("failed to get hostname: %w", err) - } - return hostname, nil -} - func getHash(data interface{}) (string, error) { bytes, err := json.Marshal(data) if err != nil { @@ -775,73 +184,3 @@ func getHash(data interface{}) (string, error) { hash := md5.Sum(bytes) return hex.EncodeToString(hash[:]), nil } - -var managerMtx sync.Mutex - -func newScrapeManager(o *scrape.Options, logger log.Logger, app storage.Appendable) *scrape.Manager { - // scrape.NewManager modifies a global variable in Prometheus. To avoid a - // data race of modifying that global, we lock a mutex here briefly. - managerMtx.Lock() - defer managerMtx.Unlock() - return scrape.NewManager(o, logger, app) -} - -type runGroupContext struct { - cancel context.CancelFunc - - g *run.Group -} - -// runGroupWithContext creates a new run.Group that will be stopped if the -// context gets canceled in addition to the normal behavior of stopping -// when any of the actors stop. -func runGroupWithContext(ctx context.Context) *runGroupContext { - ctx, cancel := context.WithCancel(ctx) - - var g run.Group - g.Add(func() error { - <-ctx.Done() - return nil - }, func(_ error) { - cancel() - }) - - return &runGroupContext{cancel: cancel, g: &g} -} - -func (rg *runGroupContext) Add(execute func() error, interrupt func(error)) { - rg.g.Add(execute, interrupt) -} - -func (rg *runGroupContext) Run() error { return rg.g.Run() } -func (rg *runGroupContext) Stop(_ error) { rg.cancel() } - -// ErrNotReady is returned when the scrape manager is used but has not been -// initialized yet. -var ErrNotReady = errors.New("Scrape manager not ready") - -// readyScrapeManager allows a scrape manager to be retrieved. Even if it's set at a later point in time. -type readyScrapeManager struct { - mtx sync.RWMutex - m *scrape.Manager -} - -// Set the scrape manager. -func (rm *readyScrapeManager) Set(m *scrape.Manager) { - rm.mtx.Lock() - defer rm.mtx.Unlock() - - rm.m = m -} - -// Get the scrape manager. If is not ready, return an error. -func (rm *readyScrapeManager) Get() (*scrape.Manager, error) { - rm.mtx.RLock() - defer rm.mtx.RUnlock() - - if rm.m != nil { - return rm.m, nil - } - - return nil, ErrNotReady -} diff --git a/internal/static/metrics/instance/instance_integration_test.go b/internal/static/metrics/instance/instance_integration_test.go deleted file mode 100644 index 71fc2ed8b5..0000000000 --- a/internal/static/metrics/instance/instance_integration_test.go +++ /dev/null @@ -1,281 +0,0 @@ -package instance - -import ( - "context" - "fmt" - "net" - "net/http" - "os" - "strings" - "sync" - "testing" - "time" - - "github.com/go-kit/log" - "github.com/gorilla/mux" - "github.com/grafana/agent/internal/util" - "github.com/grafana/dskit/backoff" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" - "github.com/stretchr/testify/require" - "go.uber.org/atomic" -) - -var slowBackoff = backoff.Config{ - MinBackoff: 1 * time.Second, - MaxBackoff: 1 * time.Minute, - MaxRetries: 10, -} - -// TestInstance_Update performs a full integration test by doing the following: -// -// 1. Launching an HTTP server which can be scraped and also mocks the remote_write -// endpoint. -// 2. Creating an instance config with no scrape_configs or remote_write configs. -// 3. Updates the instance with a scrape_config and remote_write. -// 4. Validates that after 15 seconds, the scrape endpoint and remote_write -// endpoint has been called. -func TestInstance_Update(t *testing.T) { - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) - - walDir := t.TempDir() - - var ( - scraped = atomic.NewBool(false) - pushed = atomic.NewBool(false) - ) - - r := mux.NewRouter() - r.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) { - scraped.Store(true) - promhttp.Handler().ServeHTTP(w, r) - }) - r.HandleFunc("/push", func(w http.ResponseWriter, r *http.Request) { - pushed.Store(true) - // We don't particularly care what was pushed to us, so we'll ignore - // everything here; we just want to make sure the endpoint was invoked. - }) - - // Start a server for exposing the router. - l, err := net.Listen("tcp", "127.0.0.1:0") - require.NoError(t, err) - defer l.Close() - go func() { - _ = http.Serve(l, r) - }() - - // Create a new instance where it's not scraping or writing anything by default. - initialConfig := loadConfig(t, ` -name: integration_test -scrape_configs: [] -remote_write: [] -`) - inst, err := New(prometheus.NewRegistry(), initialConfig, walDir, logger) - require.NoError(t, err) - - instCtx, cancel := context.WithCancel(context.Background()) - var wg sync.WaitGroup - defer func() { - cancel() - wg.Wait() - }() - - wg.Add(1) - go func() { - defer wg.Done() - err := inst.Run(instCtx) - require.NoError(t, err) - }() - - // Update the config with a single scrape_config and remote_write. - newConfig := loadConfig(t, fmt.Sprintf(` -name: integration_test -scrape_configs: - - job_name: test_scrape - scrape_interval: 5s - static_configs: - - targets: ['%[1]s'] -remote_write: - - url: http://%[1]s/push -`, l.Addr())) - - // Wait for the instance to be ready before updating. - util.EventuallyWithBackoff(t, func(t require.TestingT) { - require.True(t, inst.Ready()) - }, slowBackoff) - - // Wait for the instance to update (it might not be ready yet and would - // return an error until everything is initialized), and then wait again for - // the configs to apply and set the scraped and pushed atomic variables, - // indicating that the Prometheus components successfully updated. - util.EventuallyWithBackoff(t, func(t require.TestingT) { - err := inst.Update(newConfig) - if err != nil { - logger.Log("msg", "failed to update instance", "err", err) - } - require.NoError(t, err) - }, slowBackoff) - - util.EventuallyWithBackoff(t, func(t require.TestingT) { - require.True(t, scraped.Load() && pushed.Load()) - }, slowBackoff) -} - -func TestInstance_Update_Failed(t *testing.T) { - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) - - walDir := t.TempDir() - - r := mux.NewRouter() - r.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) { - promhttp.Handler().ServeHTTP(w, r) - }) - r.HandleFunc("/push", func(w http.ResponseWriter, r *http.Request) {}) - - // Start a server for exposing the router. - l, err := net.Listen("tcp", "127.0.0.1:0") - require.NoError(t, err) - defer l.Close() - go func() { - _ = http.Serve(l, r) - }() - - // Create a new instance where it's not scraping or writing anything by default. - initialConfig := loadConfig(t, ` -name: integration_test -scrape_configs: [] -remote_write: [] -`) - inst, err := New(prometheus.NewRegistry(), initialConfig, walDir, logger) - require.NoError(t, err) - - instCtx, cancel := context.WithCancel(context.Background()) - var wg sync.WaitGroup - defer func() { - cancel() - wg.Wait() - }() - - wg.Add(1) - go func() { - defer wg.Done() - err := inst.Run(instCtx) - require.NoError(t, err) - }() - - // Create a new config to use for updating - newConfig := loadConfig(t, fmt.Sprintf(` -name: integration_test -scrape_configs: - - job_name: test_scrape - scrape_interval: 5s - static_configs: - - targets: ['%[1]s'] -remote_write: - - url: http://%[1]s/push -`, l.Addr())) - - // Make sure the instance can successfully update first - util.Eventually(t, func(t require.TestingT) { - err := inst.Update(newConfig) - if err != nil { - logger.Log("msg", "failed to update instance", "err", err) - } - require.NoError(t, err) - }) - - // Now force an update back to the original config to fail - inst.readyScrapeManager.Set(nil) - require.NotNil(t, inst.Update(initialConfig), "update should have failed") - require.Equal(t, newConfig, inst.cfg, "config did not roll back") -} - -// TestInstance_Update_InvalidChanges runs an instance with a blank initial -// config and performs various unacceptable updates that should return an -// error. -func TestInstance_Update_InvalidChanges(t *testing.T) { - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) - - walDir := t.TempDir() - - // Create a new instance where it's not scraping or writing anything by default. - initialConfig := loadConfig(t, ` -name: integration_test -scrape_configs: [] -remote_write: [] -`) - inst, err := New(prometheus.NewRegistry(), initialConfig, walDir, logger) - require.NoError(t, err) - - instCtx, cancel := context.WithCancel(context.Background()) - var wg sync.WaitGroup - defer func() { - cancel() - wg.Wait() - }() - - wg.Add(1) - go func() { - defer wg.Done() - err := inst.Run(instCtx) - require.NoError(t, err) - }() - - // Do a no-op update that succeeds to ensure that the instance is running. - util.Eventually(t, func(t require.TestingT) { - err := inst.Update(initialConfig) - if err != nil { - logger.Log("msg", "failed to update instance", "err", err) - } - require.NoError(t, err) - }) - - tt := []struct { - name string - mut func(c *Config) - expect string - }{ - { - name: "name changed", - mut: func(c *Config) { c.Name = "changed name" }, - expect: "name cannot be changed dynamically", - }, - { - name: "host_filter changed", - mut: func(c *Config) { c.HostFilter = true }, - expect: "host_filter cannot be changed dynamically", - }, - { - name: "wal_truncate_frequency changed", - mut: func(c *Config) { c.WALTruncateFrequency *= 2 }, - expect: "wal_truncate_frequency cannot be changed dynamically", - }, - { - name: "remote_flush_deadline changed", - mut: func(c *Config) { c.RemoteFlushDeadline *= 2 }, - expect: "remote_flush_deadline cannot be changed dynamically", - }, - { - name: "write_stale_on_shutdown changed", - mut: func(c *Config) { c.WriteStaleOnShutdown = true }, - expect: "write_stale_on_shutdown cannot be changed dynamically", - }, - } - - for _, tc := range tt { - t.Run(tc.name, func(t *testing.T) { - mutatedConfig := initialConfig - tc.mut(&mutatedConfig) - - err := inst.Update(mutatedConfig) - require.EqualError(t, err, tc.expect) - }) - } -} - -func loadConfig(t *testing.T, s string) Config { - cfg, err := UnmarshalConfig(strings.NewReader(s)) - require.NoError(t, err) - require.NoError(t, cfg.ApplyDefaults(DefaultGlobalConfig)) - return *cfg -} diff --git a/internal/static/metrics/instance/instance_test.go b/internal/static/metrics/instance/instance_test.go index 0f97aecac2..33e6551f03 100644 --- a/internal/static/metrics/instance/instance_test.go +++ b/internal/static/metrics/instance/instance_test.go @@ -1,28 +1,13 @@ package instance import ( - "context" "fmt" - "net/http/httptest" - "os" - "path" "strings" - "sync" "testing" - "time" - "github.com/go-kit/log" - "github.com/grafana/agent/internal/util" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery" - "github.com/prometheus/prometheus/model/exemplar" - "github.com/prometheus/prometheus/model/histogram" - "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/model/metadata" - "github.com/prometheus/prometheus/storage" "github.com/stretchr/testify/require" ) @@ -184,238 +169,3 @@ remote_write: require.NoError(t, cfg.ApplyDefaults(DefaultGlobalConfig)) require.NotEmpty(t, cfg.RemoteWrite[0].Name) } - -func TestInstance_Path(t *testing.T) { - scrapeAddr, closeSrv := getTestServer(t) - defer closeSrv() - - walDir := t.TempDir() - - globalConfig := getTestGlobalConfig(t) - - cfg := getTestConfig(t, &globalConfig, scrapeAddr) - cfg.WALTruncateFrequency = time.Hour - cfg.RemoteFlushDeadline = time.Hour - - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) - inst, err := New(prometheus.NewRegistry(), cfg, walDir, logger) - require.NoError(t, err) - runInstance(t, inst) - - // / path should exist for WAL - util.Eventually(t, func(t require.TestingT) { - _, err := os.Stat(path.Join(walDir, "test")) - require.NoError(t, err) - }) -} - -// TestInstance tests that discovery and scraping are working by using a mock -// instance of the WAL storage and testing that samples get written to it. -// This test touches most of Instance and is enough for a basic integration test. -func TestInstance(t *testing.T) { - scrapeAddr, closeSrv := getTestServer(t) - defer closeSrv() - - walDir := t.TempDir() - - globalConfig := getTestGlobalConfig(t) - cfg := getTestConfig(t, &globalConfig, scrapeAddr) - cfg.WALTruncateFrequency = time.Hour - cfg.RemoteFlushDeadline = time.Hour - - mockStorage := mockWalStorage{ - series: make(map[storage.SeriesRef]int), - directory: walDir, - } - newWal := func(_ prometheus.Registerer) (walStorage, error) { return &mockStorage, nil } - - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) - inst, err := newInstance(cfg, nil, logger, newWal) - require.NoError(t, err) - runInstance(t, inst) - - // Wait until mockWalStorage has had a series added to it. - util.EventuallyWithBackoff(t, func(t require.TestingT) { - mockStorage.mut.Lock() - defer mockStorage.mut.Unlock() - require.True(t, len(mockStorage.series) > 0) - }, slowBackoff) -} - -// TestInstance_Recreate ensures that creating an instance with the same name twice -// does not cause any duplicate metrics registration that leads to a panic. -func TestInstance_Recreate(t *testing.T) { - scrapeAddr, closeSrv := getTestServer(t) - defer closeSrv() - - walDir := t.TempDir() - - globalConfig := getTestGlobalConfig(t) - - cfg := getTestConfig(t, &globalConfig, scrapeAddr) - cfg.Name = "recreate_test" - cfg.WALTruncateFrequency = time.Hour - cfg.RemoteFlushDeadline = time.Hour - - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) - currentReg := prometheus.NewRegistry() - inst, err := New(currentReg, cfg, walDir, logger) - require.NoError(t, err) - - ctx, cancel := context.WithCancel(context.Background()) - exited := make(chan bool) - go func() { - err := inst.Run(ctx) - close(exited) - - if err != nil { - require.Equal(t, context.Canceled, err) - } - }() - - time.Sleep(1 * time.Second) - cancel() - <-exited - - // Recreate the instance, no panic should happen. - require.NotPanics(t, func() { - inst, err := New(currentReg, cfg, walDir, logger) - require.NoError(t, err) - runInstance(t, inst) - - time.Sleep(1 * time.Second) - }) -} - -func getTestServer(t *testing.T) (addr string, closeFunc func()) { - t.Helper() - - reg := prometheus.NewRegistry() - - testCounter := prometheus.NewCounter(prometheus.CounterOpts{ - Name: "test_metric_total", - }) - testCounter.Inc() - reg.MustRegister(testCounter) - - handler := promhttp.HandlerFor(reg, promhttp.HandlerOpts{}) - httpSrv := httptest.NewServer(handler) - return httpSrv.Listener.Addr().String(), httpSrv.Close -} - -func getTestGlobalConfig(t *testing.T) GlobalConfig { - t.Helper() - - return GlobalConfig{ - Prometheus: config.GlobalConfig{ - ScrapeInterval: model.Duration(time.Millisecond * 50), - ScrapeTimeout: model.Duration(time.Millisecond * 25), - EvaluationInterval: model.Duration(time.Hour), - }, - } -} - -func getTestConfig(t *testing.T, global *GlobalConfig, scrapeAddr string) Config { - t.Helper() - - scrapeCfg := config.DefaultScrapeConfig - scrapeCfg.JobName = "test" - scrapeCfg.ScrapeInterval = global.Prometheus.ScrapeInterval - scrapeCfg.ScrapeTimeout = global.Prometheus.ScrapeTimeout - scrapeCfg.ServiceDiscoveryConfigs = discovery.Configs{ - discovery.StaticConfig{{ - Targets: []model.LabelSet{{ - model.AddressLabel: model.LabelValue(scrapeAddr), - }}, - Labels: model.LabelSet{}, - }}, - } - - cfg := DefaultConfig - cfg.Name = "test" - cfg.ScrapeConfigs = []*config.ScrapeConfig{&scrapeCfg} - cfg.global = *global - - return cfg -} - -type mockWalStorage struct { - storage.Queryable - storage.ChunkQueryable - - directory string - mut sync.Mutex - series map[storage.SeriesRef]int -} - -func (s *mockWalStorage) Directory() string { return s.directory } -func (s *mockWalStorage) StartTime() (int64, error) { return 0, nil } -func (s *mockWalStorage) WriteStalenessMarkers(f func() int64) error { return nil } -func (s *mockWalStorage) Close() error { return nil } -func (s *mockWalStorage) Truncate(mint int64) error { return nil } - -func (s *mockWalStorage) Appender(context.Context) storage.Appender { - return &mockAppender{s: s} -} - -type mockAppender struct { - s *mockWalStorage -} - -func (a *mockAppender) Append(ref storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { - if ref == 0 { - return a.Add(l, t, v) - } - return ref, a.AddFast(ref, t, v) -} - -// Add adds a new series and sets its written count to 1. -func (a *mockAppender) Add(l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { - a.s.mut.Lock() - defer a.s.mut.Unlock() - - hash := l.Hash() - a.s.series[storage.SeriesRef(hash)] = 1 - return storage.SeriesRef(hash), nil -} - -// AddFast increments the number of writes to an existing series. -func (a *mockAppender) AddFast(ref storage.SeriesRef, t int64, v float64) error { - a.s.mut.Lock() - defer a.s.mut.Unlock() - _, ok := a.s.series[ref] - if !ok { - return storage.ErrNotFound - } - - a.s.series[ref]++ - return nil -} - -func (a *mockAppender) AppendExemplar(ref storage.SeriesRef, l labels.Labels, e exemplar.Exemplar) (storage.SeriesRef, error) { - return 0, nil -} - -func (a *mockAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels, m metadata.Metadata) (storage.SeriesRef, error) { - return 0, nil -} - -func (a *mockAppender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { - return 0, nil -} - -func (a *mockAppender) Commit() error { - return nil -} - -func (a *mockAppender) Rollback() error { - return nil -} - -func runInstance(t *testing.T, i *Instance) { - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(func() { cancel() }) - go require.NotPanics(t, func() { - _ = i.Run(ctx) - }) -} diff --git a/internal/static/metrics/instance/manager.go b/internal/static/metrics/instance/manager.go deleted file mode 100644 index 6bb90324fd..0000000000 --- a/internal/static/metrics/instance/manager.go +++ /dev/null @@ -1,379 +0,0 @@ -package instance - -import ( - "context" - "errors" - "fmt" - "net/http" - "sync" - "time" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" - "github.com/prometheus/prometheus/scrape" - "github.com/prometheus/prometheus/storage" -) - -var ( - instanceAbnormalExits = promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "agent_metrics_instance_abnormal_exits_total", - Help: "Total number of times a Prometheus instance exited unexpectedly, causing it to be restarted.", - }, []string{"instance_name"}) - - currentActiveInstances = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "agent_metrics_active_instances", - Help: "Current number of active instances being used by the agent.", - }) - - // DefaultBasicManagerConfig is the default config for the BasicManager. - DefaultBasicManagerConfig = BasicManagerConfig{ - InstanceRestartBackoff: 5 * time.Second, - } -) - -// Manager represents a set of methods for manipulating running instances at -// runtime. -type Manager interface { - // GetInstance retrieves a ManagedInstance by name. - GetInstance(name string) (ManagedInstance, error) - - // ListInstances returns all currently managed instances running - // within the Manager. The key will be the instance name from their config. - ListInstances() map[string]ManagedInstance - - // ListConfigs returns the config objects associated with a managed - // instance. The key will be the Name field from Config. - ListConfigs() map[string]Config - - // ApplyConfig creates a new Config or updates an existing Config if - // one with Config.Name already exists. - ApplyConfig(Config) error - - // DeleteConfig deletes a given managed instance based on its Config.Name. - DeleteConfig(name string) error - - // Stop stops the Manager and all managed instances. - Stop() -} - -// ManagedInstance is implemented by Instance. It is defined as an interface -// for the sake of testing from Manager implementations. -type ManagedInstance interface { - Run(ctx context.Context) error - Ready() bool - Update(c Config) error - TargetsActive() map[string][]*scrape.Target - StorageDirectory() string - Appender(ctx context.Context) storage.Appender - WriteHandler() http.Handler -} - -// BasicManagerConfig controls the operations of a BasicManager. -type BasicManagerConfig struct { - InstanceRestartBackoff time.Duration -} - -// BasicManager creates a new BasicManager, implementing the Manager interface. -// BasicManager will directly launch instances and perform no extra processing. -// -// Other implementations of Manager usually wrap a BasicManager. -type BasicManager struct { - cfgMut sync.Mutex - cfg BasicManagerConfig - logger log.Logger - - // Take care when locking mut: if you hold onto a lock of mut while calling - // Stop on a process, you will deadlock. - mut sync.Mutex - processes map[string]*managedProcess - - launch Factory -} - -// managedProcess represents a goroutine running a ManagedInstance. cancel -// requests that the goroutine should shutdown. done will be closed after the -// goroutine exists. -type managedProcess struct { - cfg Config - inst ManagedInstance - cancel context.CancelFunc - done chan bool -} - -func (p managedProcess) Stop() { - p.cancel() - <-p.done -} - -// Factory should return an unstarted instance given some config. -type Factory func(c Config) (ManagedInstance, error) - -// NewBasicManager creates a new BasicManager. The launch function will be -// invoked any time a new Config is applied. -// -// The lifecycle of any ManagedInstance returned by the launch function will -// be handled by the BasicManager. Instances will be automatically restarted -// if stopped, updated if the config changes, or removed when the Config is -// deleted. -func NewBasicManager(cfg BasicManagerConfig, logger log.Logger, launch Factory) *BasicManager { - return &BasicManager{ - cfg: cfg, - logger: logger, - processes: make(map[string]*managedProcess), - launch: launch, - } -} - -// UpdateManagerConfig updates the BasicManagerConfig. -func (m *BasicManager) UpdateManagerConfig(c BasicManagerConfig) { - m.cfgMut.Lock() - defer m.cfgMut.Unlock() - m.cfg = c -} - -// GetInstance returns the given instance by name. -func (m *BasicManager) GetInstance(name string) (ManagedInstance, error) { - m.mut.Lock() - defer m.mut.Unlock() - - process, ok := m.processes[name] - if !ok { - return nil, fmt.Errorf("instance %s does not exist", name) - } - return process.inst, nil -} - -// ListInstances returns the current active instances managed by BasicManager. -func (m *BasicManager) ListInstances() map[string]ManagedInstance { - m.mut.Lock() - defer m.mut.Unlock() - - res := make(map[string]ManagedInstance, len(m.processes)) - for name, process := range m.processes { - if process == nil { - continue - } - res[name] = process.inst - } - return res -} - -// ListConfigs lists the current active configs managed by BasicManager. -func (m *BasicManager) ListConfigs() map[string]Config { - m.mut.Lock() - defer m.mut.Unlock() - - res := make(map[string]Config, len(m.processes)) - for name, process := range m.processes { - res[name] = process.cfg - } - return res -} - -// ApplyConfig takes a Config and either starts a new managed instance or -// updates an existing managed instance. The value for Name in c is used to -// uniquely identify the Config and determine whether the Config has an -// existing associated managed instance. -func (m *BasicManager) ApplyConfig(c Config) error { - m.mut.Lock() - defer m.mut.Unlock() - - // If the config already exists, we need to update it. - proc, ok := m.processes[c.Name] - if ok { - err := proc.inst.Update(c) - - // If the instance could not be dynamically updated, we need to force the - // update by restarting it. If it failed for another reason, something - // serious went wrong and we'll completely give up without stopping the - // existing job. - if errors.Is(err, ErrInvalidUpdate{}) { - level.Info(m.logger).Log("msg", "could not dynamically update instance, will manually restart", "instance", c.Name, "reason", err) - - // NOTE: we don't return here; we fall through to spawn the new instance. - proc.Stop() - } else if err != nil { - return fmt.Errorf("failed to update instance %s: %w", c.Name, err) - } else { - level.Info(m.logger).Log("msg", "dynamically updated instance", "instance", c.Name) - - proc.cfg = c - return nil - } - } - - // Spawn a new process for the new config. - err := m.spawnProcess(c) - if err != nil { - return err - } - - currentActiveInstances.Inc() - return nil -} - -func (m *BasicManager) spawnProcess(c Config) error { - inst, err := m.launch(c) - if err != nil { - return err - } - - ctx, cancel := context.WithCancel(context.Background()) - done := make(chan bool) - - proc := &managedProcess{ - cancel: cancel, - done: done, - cfg: c, - inst: inst, - } - m.processes[c.Name] = proc - - go func() { - m.runProcess(ctx, c.Name, inst) - close(done) - - // Now that the process has stopped, we can remove it from our managed - // list. - // - // However, it's possible that a new Config may have been applied and - // overwrote the initial value in our map. We only want to delete the - // process from the map if it hasn't changed from what we initially - // set it to. - // - // We only use the instance for comparing (which will never change) because - // the instance may have dynamically been given a new config since this - // goroutine started. - m.mut.Lock() - if storedProc, exist := m.processes[c.Name]; exist && storedProc.inst == inst { - delete(m.processes, c.Name) - } - m.mut.Unlock() - - currentActiveInstances.Dec() - }() - - return nil -} - -// runProcess runs and instance and keeps it alive until it is explicitly stopped -// by cancelling the context. -func (m *BasicManager) runProcess(ctx context.Context, name string, inst ManagedInstance) { - for { - err := inst.Run(ctx) - if err != nil && err != context.Canceled { - backoff := m.instanceRestartBackoff() - - instanceAbnormalExits.WithLabelValues(name).Inc() - level.Error(m.logger).Log("msg", "instance stopped abnormally, restarting after backoff period", "err", err, "backoff", backoff, "instance", name) - time.Sleep(backoff) - } else { - level.Info(m.logger).Log("msg", "stopped instance", "instance", name) - break - } - } -} - -func (m *BasicManager) instanceRestartBackoff() time.Duration { - m.cfgMut.Lock() - defer m.cfgMut.Unlock() - return m.cfg.InstanceRestartBackoff -} - -// DeleteConfig removes a managed instance by its config name. Returns an error -// if there is no such managed instance with the given name. -func (m *BasicManager) DeleteConfig(name string) error { - m.mut.Lock() - proc, ok := m.processes[name] - if !ok { - m.mut.Unlock() - return errors.New("config does not exist") - } - m.mut.Unlock() - - // spawnProcess is responsible for removing the process from the map after it - // stops so we don't need to delete anything from m.processes here. - proc.Stop() - return nil -} - -// Stop stops the BasicManager and stops all active processes for configs. -func (m *BasicManager) Stop() { - var wg sync.WaitGroup - - // We don't need to change m.processes here; processes remove themselves - // from the map (in spawnProcess). - m.mut.Lock() - wg.Add(len(m.processes)) - for _, proc := range m.processes { - go func(proc *managedProcess) { - proc.Stop() - wg.Done() - }(proc) - } - m.mut.Unlock() - - wg.Wait() -} - -// MockManager exposes methods of the Manager interface as struct fields. -// Useful for tests. -type MockManager struct { - GetInstanceFunc func(name string) (ManagedInstance, error) - ListInstancesFunc func() map[string]ManagedInstance - ListConfigsFunc func() map[string]Config - ApplyConfigFunc func(Config) error - DeleteConfigFunc func(name string) error - StopFunc func() -} - -// GetInstance implements Manager. -func (m MockManager) GetInstance(name string) (ManagedInstance, error) { - if m.GetInstanceFunc != nil { - return m.GetInstanceFunc(name) - } - panic("GetInstanceFunc not implemented") -} - -// ListInstances implements Manager. -func (m MockManager) ListInstances() map[string]ManagedInstance { - if m.ListInstancesFunc != nil { - return m.ListInstancesFunc() - } - panic("ListInstancesFunc not implemented") -} - -// ListConfigs implements Manager. -func (m MockManager) ListConfigs() map[string]Config { - if m.ListConfigsFunc != nil { - return m.ListConfigsFunc() - } - panic("ListConfigsFunc not implemented") -} - -// ApplyConfig implements Manager. -func (m MockManager) ApplyConfig(c Config) error { - if m.ApplyConfigFunc != nil { - return m.ApplyConfigFunc(c) - } - panic("ApplyConfigFunc not implemented") -} - -// DeleteConfig implements Manager. -func (m MockManager) DeleteConfig(name string) error { - if m.DeleteConfigFunc != nil { - return m.DeleteConfigFunc(name) - } - panic("DeleteConfigFunc not implemented") -} - -// Stop implements Manager. -func (m MockManager) Stop() { - if m.StopFunc != nil { - m.StopFunc() - return - } - panic("StopFunc not implemented") -} diff --git a/internal/static/metrics/instance/manager_test.go b/internal/static/metrics/instance/manager_test.go deleted file mode 100644 index 6afed26732..0000000000 --- a/internal/static/metrics/instance/manager_test.go +++ /dev/null @@ -1,158 +0,0 @@ -package instance - -import ( - "context" - "fmt" - "net/http" - "os" - "testing" - - "github.com/go-kit/log" - "github.com/prometheus/prometheus/scrape" - "github.com/prometheus/prometheus/storage" - "github.com/stretchr/testify/require" -) - -func TestBasicManager_ApplyConfig(t *testing.T) { - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) - - baseMock := mockInstance{ - RunFunc: func(ctx context.Context) error { - logger.Log("msg", "starting an instance") - <-ctx.Done() - return nil - }, - UpdateFunc: func(c Config) error { - return nil - }, - TargetsActiveFunc: func() map[string][]*scrape.Target { - return nil - }, - } - - t.Run("dynamic update successful", func(t *testing.T) { - spawnedCount := 0 - spawner := func(c Config) (ManagedInstance, error) { - spawnedCount++ - - newMock := baseMock - return &newMock, nil - } - - cm := NewBasicManager(DefaultBasicManagerConfig, logger, spawner) - - for i := 0; i < 10; i++ { - err := cm.ApplyConfig(Config{Name: "test"}) - require.NoError(t, err) - } - - require.Equal(t, 1, spawnedCount) - }) - - t.Run("dynamic update unsuccessful", func(t *testing.T) { - spawnedCount := 0 - spawner := func(c Config) (ManagedInstance, error) { - spawnedCount++ - - newMock := baseMock - newMock.UpdateFunc = func(c Config) error { - return ErrInvalidUpdate{ - Inner: fmt.Errorf("cannot dynamically update for testing reasons"), - } - } - return &newMock, nil - } - - cm := NewBasicManager(DefaultBasicManagerConfig, logger, spawner) - - for i := 0; i < 10; i++ { - err := cm.ApplyConfig(Config{Name: "test"}) - require.NoError(t, err) - } - - require.Equal(t, 10, spawnedCount) - }) - - t.Run("dynamic update errored", func(t *testing.T) { - spawnedCount := 0 - spawner := func(c Config) (ManagedInstance, error) { - spawnedCount++ - - newMock := baseMock - newMock.UpdateFunc = func(c Config) error { - return fmt.Errorf("something really bad happened") - } - return &newMock, nil - } - - cm := NewBasicManager(DefaultBasicManagerConfig, logger, spawner) - - // Creation should succeed - err := cm.ApplyConfig(Config{Name: "test"}) - require.NoError(t, err) - - // ...but the update should fail - err = cm.ApplyConfig(Config{Name: "test"}) - require.Error(t, err, "something really bad happened") - require.Equal(t, 1, spawnedCount) - }) -} - -type mockInstance struct { - RunFunc func(ctx context.Context) error - ReadyFunc func() bool - UpdateFunc func(c Config) error - TargetsActiveFunc func() map[string][]*scrape.Target - StorageDirectoryFunc func() string - AppenderFunc func() storage.Appender - WriteHandlerFunc func() http.Handler -} - -func (m mockInstance) Run(ctx context.Context) error { - if m.RunFunc != nil { - return m.RunFunc(ctx) - } - panic("RunFunc not provided") -} - -func (m mockInstance) Ready() bool { - if m.ReadyFunc != nil { - return m.ReadyFunc() - } - panic("ReadyFunc not provided") -} - -func (m mockInstance) Update(c Config) error { - if m.UpdateFunc != nil { - return m.UpdateFunc(c) - } - panic("UpdateFunc not provided") -} - -func (m mockInstance) TargetsActive() map[string][]*scrape.Target { - if m.TargetsActiveFunc != nil { - return m.TargetsActiveFunc() - } - panic("TargetsActiveFunc not provided") -} - -func (m mockInstance) StorageDirectory() string { - if m.StorageDirectoryFunc != nil { - return m.StorageDirectoryFunc() - } - panic("StorageDirectoryFunc not provided") -} - -func (m mockInstance) WriteHandler() http.Handler { - if m.WriteHandlerFunc != nil { - return m.WriteHandlerFunc() - } - panic("GetWriteHandlerFunc not provided") -} - -func (m mockInstance) Appender(_ context.Context) storage.Appender { - if m.AppenderFunc != nil { - return m.AppenderFunc() - } - panic("AppenderFunc not provided") -} diff --git a/internal/static/metrics/instance/modal_manager.go b/internal/static/metrics/instance/modal_manager.go index 18abb4f1ed..7308b351aa 100644 --- a/internal/static/metrics/instance/modal_manager.go +++ b/internal/static/metrics/instance/modal_manager.go @@ -2,12 +2,6 @@ package instance import ( "fmt" - "sync" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" ) // Mode controls how instances are created. @@ -42,175 +36,3 @@ func (m *Mode) UnmarshalYAML(unmarshal func(interface{}) error) error { return fmt.Errorf("unsupported instance_mode '%s'. supported values 'shared', 'distinct'", plain) } } - -// ModalManager runs instances by either grouping them or running them fully -// separately. -type ModalManager struct { - mut sync.RWMutex - mode Mode - configs map[string]Config - - changedConfigs *prometheus.CounterVec - currentActiveConfigs prometheus.Gauge - - log log.Logger - - // The ModalManager wraps around a "final" Manager that is intended to - // launch and manage instances based on Configs. This is specified here by the - // "wrapped" Manager. - // - // However, there may be another manager performing formations on the configs - // before they are passed through to wrapped. This is specified by the "active" - // Manager. - // - // If no transformations on Configs are needed, active will be identical to - // wrapped. - wrapped, active Manager -} - -// NewModalManager creates a new ModalManager. -func NewModalManager(reg prometheus.Registerer, l log.Logger, next Manager, mode Mode) (*ModalManager, error) { - changedConfigs := promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Name: "agent_metrics_configs_changed_total", - Help: "Total number of dynamically updated configs", - }, []string{"event"}) - currentActiveConfigs := promauto.With(reg).NewGauge(prometheus.GaugeOpts{ - Name: "agent_metrics_active_configs", - Help: "Current number of active configs being used by the agent.", - }) - - mm := ModalManager{ - wrapped: next, - log: l, - changedConfigs: changedConfigs, - currentActiveConfigs: currentActiveConfigs, - configs: make(map[string]Config), - } - if err := mm.SetMode(mode); err != nil { - return nil, err - } - return &mm, nil -} - -// SetMode updates the mode ModalManager is running in. Changing the mode is -// an expensive operation; all underlying configs must be stopped and then -// reapplied. -func (m *ModalManager) SetMode(newMode Mode) error { - if newMode == "" { - newMode = DefaultMode - } - - m.mut.Lock() - defer m.mut.Unlock() - - var ( - prevMode = m.mode - prevActive = m.active - ) - - if prevMode == newMode { - return nil - } - - // Set the active Manager based on the new mode. "distinct" means no transformations - // need to be applied and we can use the wrapped Manager directly. Otherwise, we need - // to create a new Manager to apply transformations. - switch newMode { - case ModeDistinct: - m.active = m.wrapped - case ModeShared: - m.active = NewGroupManager(m.wrapped) - default: - panic("unknown mode " + m.mode) - } - m.mode = newMode - - // Remove all configs from the previous active Manager. - if prevActive != nil { - prevActive.Stop() - } - - // Re-apply configs to the new active Manager. - var firstError error - for name, cfg := range m.configs { - err := m.active.ApplyConfig(cfg) - if err != nil { - level.Error(m.log).Log("msg", "failed to apply config when changing modes", "name", name, "prev_mode", prevMode, "new_mode", newMode, "err", err) - } - if firstError == nil && err != nil { - firstError = err - } - } - - return firstError -} - -// GetInstance implements Manager. -func (m *ModalManager) GetInstance(name string) (ManagedInstance, error) { - m.mut.RLock() - defer m.mut.RUnlock() - return m.active.GetInstance(name) -} - -// ListInstances implements Manager. -func (m *ModalManager) ListInstances() map[string]ManagedInstance { - m.mut.RLock() - defer m.mut.RUnlock() - return m.active.ListInstances() -} - -// ListConfigs implements Manager. -func (m *ModalManager) ListConfigs() map[string]Config { - m.mut.RLock() - defer m.mut.RUnlock() - return m.active.ListConfigs() -} - -// ApplyConfig implements Manager. -func (m *ModalManager) ApplyConfig(c Config) error { - m.mut.Lock() - defer m.mut.Unlock() - - if err := m.active.ApplyConfig(c); err != nil { - return err - } - - if _, existingConfig := m.configs[c.Name]; !existingConfig { - m.currentActiveConfigs.Inc() - m.changedConfigs.WithLabelValues("created").Inc() - } else { - m.changedConfigs.WithLabelValues("updated").Inc() - } - - m.configs[c.Name] = c - - return nil -} - -// DeleteConfig implements Manager. -func (m *ModalManager) DeleteConfig(name string) error { - m.mut.Lock() - defer m.mut.Unlock() - - if err := m.active.DeleteConfig(name); err != nil { - return err - } - - if _, existingConfig := m.configs[name]; existingConfig { - m.currentActiveConfigs.Dec() - delete(m.configs, name) - } - - m.changedConfigs.WithLabelValues("deleted").Inc() - return nil -} - -// Stop implements Manager. -func (m *ModalManager) Stop() { - m.mut.Lock() - defer m.mut.Unlock() - - m.active.Stop() - m.currentActiveConfigs.Set(0) - m.configs = make(map[string]Config) -} diff --git a/internal/static/metrics/instance/noop.go b/internal/static/metrics/instance/noop.go deleted file mode 100644 index f9f86b8713..0000000000 --- a/internal/static/metrics/instance/noop.go +++ /dev/null @@ -1,49 +0,0 @@ -package instance - -import ( - "context" - "net/http" - - "github.com/prometheus/prometheus/scrape" - "github.com/prometheus/prometheus/storage" -) - -// NoOpInstance implements the Instance interface in pkg/prom -// but does not do anything. Useful for tests. -type NoOpInstance struct{} - -// Run implements Instance. -func (NoOpInstance) Run(ctx context.Context) error { - <-ctx.Done() - return nil -} - -// Ready implements Instance. -func (NoOpInstance) Ready() bool { - return true -} - -// Update implements Instance. -func (NoOpInstance) Update(_ Config) error { - return nil -} - -// TargetsActive implements Instance. -func (NoOpInstance) TargetsActive() map[string][]*scrape.Target { - return nil -} - -// StorageDirectory implements Instance. -func (NoOpInstance) StorageDirectory() string { - return "" -} - -// WriteHandler implements Instance. -func (NoOpInstance) WriteHandler() http.Handler { - return nil -} - -// Appender implements Instance -func (NoOpInstance) Appender(_ context.Context) storage.Appender { - return nil -} diff --git a/internal/static/server/logger.go b/internal/static/server/logger.go deleted file mode 100644 index 0068775ac2..0000000000 --- a/internal/static/server/logger.go +++ /dev/null @@ -1,118 +0,0 @@ -package server - -import ( - "sync" - - "github.com/go-kit/log" - util_log "github.com/grafana/agent/internal/util/log" - dskit "github.com/grafana/dskit/log" -) - -// Logger implements Go Kit's log.Logger interface. It supports being -// dynamically updated at runtime. -type Logger struct { - // mut protects against race conditions accessing l, which can be modified - // and accessed concurrently if ApplyConfig and Log are called at the same - // time. - mut sync.RWMutex - l log.Logger - - // HookLogger is used to temporarily hijack logs for support bundles. - HookLogger HookLogger - - // makeLogger will default to defaultLogger. It's a struct - // member to make testing work properly. - makeLogger func(*Config) (log.Logger, error) -} - -// HookLogger is used to temporarily redirect -type HookLogger struct { - mut sync.RWMutex - enabled bool - logger log.Logger -} - -// NewLogger creates a new Logger. -func NewLogger(cfg *Config) *Logger { - return newLogger(cfg, defaultLogger) -} - -// NewLoggerFromLevel creates a new logger from logging.Level and logging.Format. -func NewLoggerFromLevel(lvl dskit.Level, fmt string) *Logger { - logger, err := makeDefaultLogger(lvl, fmt) - if err != nil { - panic(err) - } - return &Logger{ - l: logger, - } -} - -func newLogger(cfg *Config, ctor func(*Config) (log.Logger, error)) *Logger { - l := Logger{makeLogger: ctor} - if err := l.ApplyConfig(cfg); err != nil { - panic(err) - } - return &l -} - -// ApplyConfig applies configuration changes to the logger. -func (l *Logger) ApplyConfig(cfg *Config) error { - l.mut.Lock() - defer l.mut.Unlock() - - newLogger, err := l.makeLogger(cfg) - if err != nil { - return err - } - - l.l = newLogger - return nil -} - -func defaultLogger(cfg *Config) (log.Logger, error) { - return makeDefaultLogger(cfg.LogLevel.Level, cfg.LogFormat) -} - -func makeDefaultLogger(lvl dskit.Level, fmt string) (log.Logger, error) { - var l log.Logger - - l, err := util_log.NewPrometheusLogger(lvl, fmt) - if err != nil { - return nil, err - } - - // There are two wrappers on the log so skip two extra stacks vs default - return log.With(l, "caller", log.Caller(5)), nil -} - -// Log logs a log line. -func (l *Logger) Log(kvps ...interface{}) error { - l.mut.RLock() - defer l.mut.RUnlock() - err := l.HookLogger.Log(kvps...) - if err != nil { - return err - } - return l.l.Log(kvps...) -} - -// Log implements log.Logger. -func (hl *HookLogger) Log(kvps ...interface{}) error { - hl.mut.RLock() - defer hl.mut.RUnlock() - if hl.enabled { - return hl.logger.Log(kvps...) - } - return nil -} - -// Set where HookedLogger should tee logs to. -// If a nil logger is passed, the HookedLogger is disabled. -func (hl *HookLogger) Set(l log.Logger) { - hl.mut.Lock() - defer hl.mut.Unlock() - - hl.enabled = l != nil - hl.logger = l -} diff --git a/internal/static/server/logger_test.go b/internal/static/server/logger_test.go deleted file mode 100644 index 083933e495..0000000000 --- a/internal/static/server/logger_test.go +++ /dev/null @@ -1,58 +0,0 @@ -package server - -import ( - "bytes" - "testing" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/stretchr/testify/require" - "gopkg.in/yaml.v2" -) - -func TestLogger_DefaultParameters(t *testing.T) { - makeLogger := func(cfg *Config) (log.Logger, error) { - var l log.Logger - require.Equal(t, "info", cfg.LogLevel.String()) - require.Equal(t, "logfmt", cfg.LogFormat) - return l, nil - } - defaultCfg := DefaultConfig() - newLogger(&defaultCfg, makeLogger).makeLogger(&defaultCfg) -} - -func TestLogger_ApplyConfig(t *testing.T) { - var buf bytes.Buffer - makeLogger := func(cfg *Config) (log.Logger, error) { - l := log.NewLogfmtLogger(log.NewSyncWriter(&buf)) - if cfg.LogFormat == "json" { - l = log.NewJSONLogger(log.NewSyncWriter(&buf)) - } - l = level.NewFilter(l, cfg.LogLevel.Level.Option) - return l, nil - } - - var cfg Config - cfgText := `log_level: error` - - err := yaml.Unmarshal([]byte(cfgText), &cfg) - require.NoError(t, err) - - l := newLogger(&cfg, makeLogger) - level.Debug(l).Log("msg", "this should not appear") - - cfgText = ` -log_level: debug -log_format: json` - err = yaml.Unmarshal([]byte(cfgText), &cfg) - require.NoError(t, err) - - err = l.ApplyConfig(&cfg) - require.NoError(t, err) - - level.Debug(l).Log("msg", "this should appear") - require.JSONEq(t, `{ - "level":"debug", - "msg":"this should appear" - }`, buf.String()) -} diff --git a/internal/static/server/logger_windows.go b/internal/static/server/logger_windows.go deleted file mode 100644 index c84bd0888c..0000000000 --- a/internal/static/server/logger_windows.go +++ /dev/null @@ -1,110 +0,0 @@ -package server - -import ( - "runtime" - "strings" - - "github.com/go-kit/log/level" - - "github.com/go-kit/log" - el "golang.org/x/sys/windows/svc/eventlog" -) - -// Default name for the Grafana Agent under Windows -const ServiceName = "Grafana Agent" - -// NewWindowsEventLogger creates a new logger that writes to the event log -func NewWindowsEventLogger(cfg *Config) *Logger { - return newLogger(cfg, makeWindowsEventLogger) -} - -func makeWindowsEventLogger(cfg *Config) (log.Logger, error) { - // Set up the log in windows events - err := el.InstallAsEventCreate(ServiceName, el.Error|el.Info|el.Warning) - - // Agent should expect an error of 'already exists' if the Event Log sink has already previously been installed - if err != nil && !strings.Contains(err.Error(), "already exists") { - return nil, err - } - il, err := el.Open(ServiceName) - if err != nil { - return nil, err - } - - // Ensure the logger gets closed when the GC runs. It's valid to have more than one win logger open concurrently. - runtime.SetFinalizer(il, func(l *el.Log) { - l.Close() - }) - - // These are set up to be writers for each Windows log level - // Set up this way so we can utilize all the benefits of logformatter - infoLogger := newWinLogWrapper(cfg.LogFormat, func(p []byte) error { - return il.Info(1, string(p)) - }) - warningLogger := newWinLogWrapper(cfg.LogFormat, func(p []byte) error { - return il.Warning(1, string(p)) - }) - - errorLogger := newWinLogWrapper(cfg.LogFormat, func(p []byte) error { - return il.Error(1, string(p)) - }) - - wl := &winLogger{ - errorLogger: errorLogger, - infoLogger: infoLogger, - warningLogger: warningLogger, - } - return level.NewFilter(wl, cfg.LogLevel.Level.Option), nil -} - -// Looks through the key value pairs in the log for level and extract the value -func getLevel(keyvals ...interface{}) level.Value { - for i := 0; i < len(keyvals); i++ { - if vo, ok := keyvals[i].(level.Value); ok { - return vo - } - } - return nil -} - -func newWinLogWrapper(format string, write func(p []byte) error) log.Logger { - infoWriter := &winLogWriter{writer: write} - infoLogger := log.NewLogfmtLogger(infoWriter) - if format == "json" { - infoLogger = log.NewJSONLogger(infoWriter) - } - return infoLogger -} - -type winLogger struct { - errorLogger log.Logger - infoLogger log.Logger - warningLogger log.Logger -} - -func (w *winLogger) Log(keyvals ...interface{}) error { - lvl := getLevel(keyvals...) - // 3 different loggers are used so that agent can utilize the formatting features of go-kit logging - // if agent did not use this then the windows logger uses different function calls for different levels - // this is paired with the fact that the io.Writer interface only gives a byte array. - switch lvl { - case level.DebugValue(): - return w.infoLogger.Log(keyvals...) - case level.InfoValue(): - return w.infoLogger.Log(keyvals...) - case level.WarnValue(): - return w.warningLogger.Log(keyvals...) - case level.ErrorValue(): - return w.errorLogger.Log(keyvals...) - default: - return w.infoLogger.Log(keyvals...) - } -} - -type winLogWriter struct { - writer func(p []byte) error -} - -func (i *winLogWriter) Write(p []byte) (n int, err error) { - return len(p), i.writer(p) -} diff --git a/internal/static/server/server.go b/internal/static/server/server.go index 26f6210003..adae8c6a30 100644 --- a/internal/static/server/server.go +++ b/internal/static/server/server.go @@ -6,438 +6,9 @@ package server import ( "context" - "errors" - "fmt" "net" - "net/http" - _ "net/http/pprof" // anonymous import to get the pprof handler registered - "sync" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/gorilla/mux" - "github.com/grafana/ckit/memconn" - "github.com/grafana/dskit/middleware" - _ "github.com/grafana/pyroscope-go/godeltaprof/http/pprof" // anonymous import to get the godeltaprof handler registered - grpc_middleware "github.com/grpc-ecosystem/go-grpc-middleware" - "github.com/hashicorp/go-multierror" - "github.com/oklog/run" - otgrpc "github.com/opentracing-contrib/go-grpc" - "github.com/opentracing/opentracing-go" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" - "golang.org/x/net/netutil" - "google.golang.org/grpc" - "google.golang.org/grpc/keepalive" ) // DialContextFunc is a function matching the signature of // net.Dialer.DialContext. type DialContextFunc func(ctx context.Context, network string, addr string) (net.Conn, error) - -// Server wraps an HTTP and gRPC server with some common initialization. -// -// Unless instrumentation is disabled in the Servers config, Prometheus metrics -// will be automatically generated for the server. -type Server struct { - flagsMut sync.Mutex - flags Flags - - // Listeners for in-memory connections. These never use TLS. - httpMemListener *memconn.Listener - grpcMemListener *memconn.Listener - - // Listeners to use for connections. These will use TLS when TLS is enabled. - httpListener net.Listener - grpcListener net.Listener - - updateHTTPTLS func(TLSConfig) error - updateGRPCTLS func(TLSConfig) error - - HTTP *mux.Router - HTTPServer *http.Server - GRPC *grpc.Server - - // DialContext creates a connection to the given network/address. If address - // matches the Server's internal HTTP or gRPC address, an internal in-memory - // connection will be opened. - DialContext DialContextFunc -} - -type metrics struct { - tcpConnections *prometheus.GaugeVec - tcpConnectionsLimit *prometheus.GaugeVec - requestDuration *prometheus.HistogramVec - receivedMessageSize *prometheus.HistogramVec - sentMessageSize *prometheus.HistogramVec - inflightRequests *prometheus.GaugeVec -} - -func newMetrics(r prometheus.Registerer) (*metrics, error) { - var m metrics - - // Create metrics for the server - m.tcpConnections = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "agent_tcp_connections", - Help: "Current number of accepted TCP connections.", - }, []string{"protocol"}) - m.tcpConnectionsLimit = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "agent_tcp_connections_limit", - Help: "The maximum number of TCP connections that can be accepted (0 = unlimited)", - }, []string{"protocol"}) - m.requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Name: "agent_request_duration_seconds", - Help: "Time in seconds spent serving HTTP requests.", - }, []string{"method", "route", "status_code", "ws"}) - m.receivedMessageSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Name: "agent_request_message_bytes", - Help: "Size (in bytes) of messages received in the request.", - Buckets: middleware.BodySizeBuckets, - }, []string{"method", "route"}) - m.sentMessageSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Name: "agent_response_message_bytes", - Help: "Size (in bytes) of messages sent in response.", - Buckets: middleware.BodySizeBuckets, - }, []string{"method", "route"}) - m.inflightRequests = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "agent_inflight_requests", - Help: "Current number of inflight requests.", - }, []string{"method", "route"}) - - if r != nil { - // Register all of our metrics - cc := []prometheus.Collector{ - m.tcpConnections, m.tcpConnectionsLimit, m.requestDuration, m.receivedMessageSize, - m.sentMessageSize, m.inflightRequests, - } - for _, c := range cc { - if err := r.Register(c); err != nil { - return nil, fmt.Errorf("failed registering server metrics: %w", err) - } - } - } - return &m, nil -} - -// New creates a new Server with the given config. -// -// r is used to register Server-specific metrics. If r is nil, no metrics will -// be registered. -// -// g is used for collecting metrics from the instrumentation handlers, when -// enabled. If g is nil, a /metrics endpoint will not be registered. -func New(l log.Logger, r prometheus.Registerer, g prometheus.Gatherer, cfg Config, flags Flags) (srv *Server, err error) { - if l == nil { - l = log.NewNopLogger() - } - - switch { - case flags.HTTP.InMemoryAddr == "": - return nil, fmt.Errorf("in memory HTTP address must be configured") - case flags.GRPC.InMemoryAddr == "": - return nil, fmt.Errorf("in memory gRPC address must be configured") - case flags.HTTP.InMemoryAddr == flags.GRPC.InMemoryAddr: - return nil, fmt.Errorf("in memory HTTP and gRPC address must be different") - } - - m, err := newMetrics(r) - if err != nil { - return nil, err - } - - // Create listeners first so we can fail early if the port is in use. - httpListener, err := newHTTPListener(&flags.HTTP, m) - if err != nil { - return nil, err - } - defer func() { - if err != nil { - _ = httpListener.Close() - } - }() - grpcListener, err := newGRPCListener(&flags.GRPC, m) - if err != nil { - return nil, err - } - defer func() { - if err != nil { - _ = httpListener.Close() - } - }() - - // Configure TLS - var ( - updateHTTPTLS func(TLSConfig) error - updateGRPCTLS func(TLSConfig) error - ) - if flags.HTTP.UseTLS { - httpTLSListener, err := newTLSListener(httpListener, cfg.HTTP.TLSConfig, l) - if err != nil { - return nil, fmt.Errorf("generating HTTP TLS config: %w", err) - } - httpListener = httpTLSListener - updateHTTPTLS = httpTLSListener.ApplyConfig - } - if flags.GRPC.UseTLS { - grpcTLSListener, err := newTLSListener(grpcListener, cfg.GRPC.TLSConfig, l) - if err != nil { - return nil, fmt.Errorf("generating GRPC TLS config: %w", err) - } - grpcListener = grpcTLSListener - updateGRPCTLS = grpcTLSListener.ApplyConfig - } - - level.Info(l).Log( - "msg", "server listening on addresses", - "http", httpListener.Addr(), "grpc", grpcListener.Addr(), - "http_tls_enabled", flags.HTTP.UseTLS, "grpc_tls_enabled", flags.GRPC.UseTLS, - ) - - // Build servers - grpcServer := newGRPCServer(l, &flags.GRPC, m) - httpServer, router, err := newHTTPServer(l, g, &flags, m) - if err != nil { - return nil, err - } - - // Build in-memory listeners and dial function - var ( - httpMemListener = memconn.NewListener(nil) - grpcMemListener = memconn.NewListener(nil) - ) - dialFunc := func(ctx context.Context, network string, address string) (net.Conn, error) { - switch address { - case flags.HTTP.InMemoryAddr: - return httpMemListener.DialContext(ctx) - case flags.GRPC.InMemoryAddr: - return grpcMemListener.DialContext(ctx) - default: - return (&net.Dialer{}).DialContext(ctx, network, address) - } - } - - return &Server{ - flags: flags, - httpListener: httpListener, - grpcListener: grpcListener, - httpMemListener: httpMemListener, - grpcMemListener: grpcMemListener, - - updateHTTPTLS: updateHTTPTLS, - updateGRPCTLS: updateGRPCTLS, - - HTTP: router, - HTTPServer: httpServer, - GRPC: grpcServer, - DialContext: dialFunc, - }, nil -} - -func newHTTPListener(opts *HTTPFlags, m *metrics) (net.Listener, error) { - httpAddress := opts.ListenAddress - if httpAddress == "" { - return nil, fmt.Errorf("http address not set") - } - httpListener, err := net.Listen(opts.ListenNetwork, httpAddress) - if err != nil { - return nil, fmt.Errorf("creating HTTP listener: %w", err) - } - httpListener = middleware.CountingListener(httpListener, m.tcpConnections.WithLabelValues("http")) - - m.tcpConnectionsLimit.WithLabelValues("http").Set(float64(opts.ConnLimit)) - if opts.ConnLimit > 0 { - httpListener = netutil.LimitListener(httpListener, opts.ConnLimit) - } - return httpListener, nil -} - -func newGRPCListener(opts *GRPCFlags, m *metrics) (net.Listener, error) { - grpcAddress := opts.ListenAddress - if grpcAddress == "" { - return nil, fmt.Errorf("gRPC address not set") - } - grpcListener, err := net.Listen(opts.ListenNetwork, grpcAddress) - if err != nil { - return nil, fmt.Errorf("creating gRPC listener: %w", err) - } - grpcListener = middleware.CountingListener(grpcListener, m.tcpConnections.WithLabelValues("grpc")) - - m.tcpConnectionsLimit.WithLabelValues("grpc").Set(float64(opts.ConnLimit)) - if opts.ConnLimit > 0 { - grpcListener = netutil.LimitListener(grpcListener, opts.ConnLimit) - } - return grpcListener, nil -} - -func newGRPCServer(l log.Logger, opts *GRPCFlags, m *metrics) *grpc.Server { - serverLog := middleware.GRPCServerLog{ - WithRequest: true, - Log: l, - } - grpcOptions := []grpc.ServerOption{ - grpc.UnaryInterceptor(grpc_middleware.ChainUnaryServer( - serverLog.UnaryServerInterceptor, - otgrpc.OpenTracingServerInterceptor(opentracing.GlobalTracer()), - middleware.UnaryServerInstrumentInterceptor(m.requestDuration), - )), - grpc.StreamInterceptor(grpc_middleware.ChainStreamServer( - serverLog.StreamServerInterceptor, - otgrpc.OpenTracingStreamServerInterceptor(opentracing.GlobalTracer()), - middleware.StreamServerInstrumentInterceptor(m.requestDuration), - )), - grpc.KeepaliveParams(keepalive.ServerParameters{ - MaxConnectionIdle: opts.MaxConnectionIdle, - MaxConnectionAge: opts.MaxConnectionAge, - MaxConnectionAgeGrace: opts.MaxConnectionAgeGrace, - Time: opts.KeepaliveTime, - Timeout: opts.KeepaliveTimeout, - }), - grpc.KeepaliveEnforcementPolicy(keepalive.EnforcementPolicy{ - MinTime: opts.MinTimeBetweenPings, - PermitWithoutStream: opts.PingWithoutStreamAllowed, - }), - grpc.MaxRecvMsgSize(opts.MaxRecvMsgSize), - grpc.MaxSendMsgSize(opts.MaxSendMsgSize), - grpc.MaxConcurrentStreams(uint32(opts.MaxConcurrentStreams)), - grpc.StatsHandler(middleware.NewStatsHandler(m.receivedMessageSize, m.sentMessageSize, m.inflightRequests)), - } - - return grpc.NewServer(grpcOptions...) -} - -func newHTTPServer(l log.Logger, g prometheus.Gatherer, opts *Flags, m *metrics) (*http.Server, *mux.Router, error) { - router := mux.NewRouter() - if opts.RegisterInstrumentation && g != nil { - router.Handle("/metrics", promhttp.HandlerFor(g, promhttp.HandlerOpts{ - EnableOpenMetrics: true, - })) - router.PathPrefix("/debug/pprof").Handler(http.DefaultServeMux) - } - - var sourceIPs *middleware.SourceIPExtractor - if opts.LogSourceIPs { - var err error - sourceIPs, err = middleware.NewSourceIPs(opts.LogSourceIPsHeader, opts.LogSourceIPsRegex) - if err != nil { - return nil, nil, fmt.Errorf("error setting up source IP extraction: %v", err) - } - } - - httpMiddleware := []middleware.Interface{ - middleware.Tracer{ - RouteMatcher: router, - SourceIPs: sourceIPs, - }, - middleware.Log{ - Log: l, - SourceIPs: sourceIPs, - }, - middleware.Instrument{ - RouteMatcher: router, - Duration: m.requestDuration, - RequestBodySize: m.receivedMessageSize, - ResponseBodySize: m.sentMessageSize, - InflightRequests: m.inflightRequests, - }, - } - - httpServer := &http.Server{ - ReadTimeout: opts.HTTP.ReadTimeout, - WriteTimeout: opts.HTTP.WriteTimeout, - IdleTimeout: opts.HTTP.IdleTimeout, - Handler: middleware.Merge(httpMiddleware...).Wrap(router), - } - - return httpServer, router, nil -} - -// HTTPAddress returns the HTTP net.Addr of this Server. -func (s *Server) HTTPAddress() net.Addr { return s.httpListener.Addr() } - -// GRPCAddress returns the GRPC net.Addr of this Server. -func (s *Server) GRPCAddress() net.Addr { return s.grpcListener.Addr() } - -// ApplyConfig applies changes to the Server block. -func (s *Server) ApplyConfig(cfg Config) error { - s.flagsMut.Lock() - defer s.flagsMut.Unlock() - - // N.B. LogLevel/LogFormat support dynamic updating but are never used in - // *Server, so they're ignored here. - - if s.updateHTTPTLS != nil { - if err := s.updateHTTPTLS(cfg.HTTP.TLSConfig); err != nil { - return fmt.Errorf("updating HTTP TLS settings: %w", err) - } - } - if s.updateGRPCTLS != nil { - if err := s.updateGRPCTLS(cfg.GRPC.TLSConfig); err != nil { - return fmt.Errorf("updating gRPC TLS settings: %w", err) - } - } - - return nil -} - -// Run the server until en error is received or the given context is canceled. -// Run may not be re-called after it exits. -func (s *Server) Run(ctx context.Context) error { - ctx, cancel := context.WithCancel(ctx) - defer cancel() - - var g run.Group - - g.Add(func() error { - <-ctx.Done() - return nil - }, func(_ error) { - cancel() - }) - - httpListeners := []net.Listener{ - s.httpListener, - s.httpMemListener, - } - for i := range httpListeners { - listener := httpListeners[i] - g.Add(func() error { - err := s.HTTPServer.Serve(listener) - if errors.Is(err, http.ErrServerClosed) { - err = nil - } - return err - }, func(_ error) { - ctx, cancel := context.WithTimeout(context.Background(), s.flags.GracefulShutdownTimeout) - defer cancel() - _ = s.HTTPServer.Shutdown(ctx) - }) - } - - grpcListeners := []net.Listener{ - s.grpcListener, - s.grpcMemListener, - } - for i := range grpcListeners { - listener := grpcListeners[i] - g.Add(func() error { - err := s.GRPC.Serve(listener) - if errors.Is(err, grpc.ErrServerStopped) { - err = nil - } - return err - }, func(_ error) { - s.GRPC.GracefulStop() - }) - } - - return g.Run() -} - -// Close forcibly closes the server's listeners. -func (s *Server) Close() error { - errs := multierror.Append( - s.httpListener.Close(), - s.grpcListener.Close(), - ) - return errs.ErrorOrNil() -} diff --git a/internal/static/server/server_test.go b/internal/static/server/server_test.go deleted file mode 100644 index effa46b72e..0000000000 --- a/internal/static/server/server_test.go +++ /dev/null @@ -1,193 +0,0 @@ -package server - -import ( - "context" - "crypto/tls" - "fmt" - "net" - "net/http" - "testing" - - "github.com/go-kit/log" - "github.com/stretchr/testify/require" - "google.golang.org/grpc" - "google.golang.org/grpc/credentials" - "google.golang.org/grpc/credentials/insecure" - "google.golang.org/grpc/health" - "google.golang.org/grpc/health/grpc_health_v1" -) - -const anyLocalhost = "127.0.0.1:0" - -func TestServer(t *testing.T) { - cfg := newTestConfig() - flags := newTestFlags() - srv := runExampleServer(t, cfg, flags) - - // Validate HTTP - resp, err := http.Get(fmt.Sprintf("http://%s/testing", srv.HTTPAddress())) - require.NoError(t, err) - require.Equal(t, http.StatusOK, resp.StatusCode) - _ = resp.Body.Close() - - // Validate gRPC - creds := grpc.WithTransportCredentials(insecure.NewCredentials()) - cc, err := grpc.Dial(srv.GRPCAddress().String(), creds) - require.NoError(t, err) - _, err = grpc_health_v1.NewHealthClient(cc).Check(context.Background(), &grpc_health_v1.HealthCheckRequest{}) - require.NoError(t, err) -} - -func TestServer_InMemory(t *testing.T) { - cfg := newTestConfig() - flags := newTestFlags() - srv := runExampleServer(t, cfg, flags) - - // Validate HTTP - var httpClient http.Client - httpClient.Transport = &http.Transport{DialContext: srv.DialContext} - resp, err := httpClient.Get(fmt.Sprintf("http://%s/testing", flags.HTTP.InMemoryAddr)) - require.NoError(t, err) - require.Equal(t, http.StatusOK, resp.StatusCode) - _ = resp.Body.Close() - - // Validate gRPC - grpcDialer := grpc.WithContextDialer(func(ctx context.Context, s string) (net.Conn, error) { - return srv.DialContext(ctx, "", s) - }) - cc, err := grpc.Dial(flags.GRPC.InMemoryAddr, grpc.WithTransportCredentials(insecure.NewCredentials()), grpcDialer) - require.NoError(t, err) - _, err = grpc_health_v1.NewHealthClient(cc).Check(context.Background(), &grpc_health_v1.HealthCheckRequest{}) - require.NoError(t, err) -} - -func newTestConfig() Config { - cfg := DefaultConfig() - return cfg -} - -func newTestFlags() Flags { - flags := DefaultFlags - flags.HTTP.ListenAddress = anyLocalhost - flags.GRPC.ListenAddress = anyLocalhost - return flags -} - -func runExampleServer(t *testing.T, cfg Config, flags Flags) *Server { - t.Helper() - - srv, err := New(log.NewNopLogger(), nil, nil, cfg, flags) - require.NoError(t, err) - - // Set up some expected services for us to test against. - srv.HTTP.HandleFunc("/testing", func(w http.ResponseWriter, _ *http.Request) { - w.WriteHeader(http.StatusOK) - }) - grpc_health_v1.RegisterHealthServer(srv.GRPC, health.NewServer()) - - // Run our server. - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) - go func() { - require.NoError(t, srv.Run(ctx)) - }() - - return srv -} - -func TestServer_TLS(t *testing.T) { - cfg := newTestConfig() - flags := newTestFlags() - - flags.HTTP.UseTLS = true - flags.GRPC.UseTLS = true - - tlsConfig := TLSConfig{ - TLSCertPath: "testdata/example-cert.pem", - TLSKeyPath: "testdata/example-key.pem", - } - cfg.HTTP.TLSConfig = tlsConfig - cfg.GRPC.TLSConfig = tlsConfig - - srv := runExampleServer(t, cfg, flags) - - // Validate HTTPS - cli := http.Client{ - Transport: &http.Transport{ - TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, - }, - } - resp, err := cli.Get(fmt.Sprintf("https://%s/testing", srv.HTTPAddress())) - require.NoError(t, err) - require.Equal(t, http.StatusOK, resp.StatusCode) - _ = resp.Body.Close() - - // Validate gRPC TLS - creds := credentials.NewTLS(&tls.Config{InsecureSkipVerify: true}) - cc, err := grpc.Dial(srv.GRPCAddress().String(), grpc.WithTransportCredentials(creds)) - require.NoError(t, err) - _, err = grpc_health_v1.NewHealthClient(cc).Check(context.Background(), &grpc_health_v1.HealthCheckRequest{}) - require.NoError(t, err) -} - -// TestRunReturnsError validates that Run exits with an error when the -// HTTP/GRPC servers stop unexpectedly. -func TestRunReturnsError(t *testing.T) { - cfg := newTestConfig() - flags := newTestFlags() - - t.Run("http", func(t *testing.T) { - srv, err := New(nil, nil, nil, cfg, flags) - require.NoError(t, err) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - errChan := make(chan error, 1) - go func() { - errChan <- srv.Run(ctx) - }() - - require.NoError(t, srv.httpListener.Close()) - require.NotNil(t, <-errChan) - }) - - t.Run("grpc", func(t *testing.T) { - srv, err := New(nil, nil, nil, cfg, flags) - require.NoError(t, err) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - errChan := make(chan error, 1) - go func() { - errChan <- srv.Run(ctx) - }() - - require.NoError(t, srv.grpcListener.Close()) - require.NotNil(t, <-errChan) - }) -} - -func TestServer_ApplyConfig(t *testing.T) { - t.Run("no changes", func(t *testing.T) { - cfg := newTestConfig() - flags := newTestFlags() - - srv, err := New(nil, nil, nil, cfg, flags) - require.NoError(t, err) - - require.NoError(t, srv.ApplyConfig(cfg)) - }) - - t.Run("valid changes", func(t *testing.T) { - cfg := newTestConfig() - flags := newTestFlags() - - srv, err := New(nil, nil, nil, cfg, flags) - require.NoError(t, err) - - cfg.LogLevel.Set("debug") - require.NoError(t, srv.ApplyConfig(cfg)) - }) -} diff --git a/internal/static/server/signal_context.go b/internal/static/server/signal_context.go deleted file mode 100644 index 21ac6376ce..0000000000 --- a/internal/static/server/signal_context.go +++ /dev/null @@ -1,41 +0,0 @@ -package server - -import ( - "context" - - "github.com/go-kit/log" - "github.com/grafana/dskit/signals" - "go.uber.org/atomic" -) - -var signalContexts atomic.Int64 - -// SignalContext wraps a ctx which will be canceled if an interrupt is -// received. -// -// It is invalid to have two simultaneous SignalContexts per binary. -func SignalContext(ctx context.Context, l log.Logger) (context.Context, context.CancelFunc) { - if !signalContexts.CompareAndSwap(0, 1) { - panic("bug: multiple SignalContexts found") - } - - if l == nil { - l = log.NewNopLogger() - } - - ctx, cancel := context.WithCancel(ctx) - - handler := signals.NewHandler(l) - go func() { - handler.Loop() - signalContexts.Store(0) - cancel() - }() - go func() { - <-ctx.Done() - handler.Stop() - signalContexts.Store(0) - }() - - return ctx, cancel -} diff --git a/internal/static/server/tls.go b/internal/static/server/tls.go index 0e78edac58..5ab671f820 100644 --- a/internal/static/server/tls.go +++ b/internal/static/server/tls.go @@ -2,15 +2,9 @@ package server import ( "crypto/tls" - "crypto/x509" "errors" "fmt" - "net" - "os" - "sync" "time" - - "github.com/go-kit/log" ) // TLSConfig holds dynamic configuration options for TLS. @@ -142,152 +136,6 @@ func (tv *TLSVersion) MarshalYAML() (interface{}, error) { return fmt.Sprintf("%v", tv), nil } -// tlsListener is a net.Listener for establishing TLS connections. tlsListener -// supports dynamically updating the TLS settings used to establish -// connections. -type tlsListener struct { - mut sync.RWMutex - cfg TLSConfig - tlsConfig *tls.Config - log log.Logger - - innerListener net.Listener - - windowsCertHandler *WinCertStoreHandler -} - -// newTLSListener creates and configures a new tlsListener. -func newTLSListener(inner net.Listener, c TLSConfig, log log.Logger) (*tlsListener, error) { - tl := &tlsListener{ - innerListener: inner, - log: log, - } - return tl, tl.ApplyConfig(c) -} - -// Accept implements net.Listener and returns the next connection. Connections -func (l *tlsListener) Accept() (net.Conn, error) { - nc, err := l.innerListener.Accept() - if err != nil { - return nc, err - } - - l.mut.RLock() - defer l.mut.RUnlock() - return tls.Server(nc, l.tlsConfig), nil -} - -// Close implements net.Listener and closes the tlsListener, preventing any new -// connections from being formed. Existing connections will be kept alive. -func (l *tlsListener) Close() error { - if l.windowsCertHandler != nil { - l.windowsCertHandler.Stop() - } - return l.innerListener.Close() -} - -// Addr implements net.Listener and returns the listener's network address. -func (l *tlsListener) Addr() net.Addr { - return l.innerListener.Addr() -} - -// ApplyConfig updates the tlsListener with new settings for creating TLS -// connections. -// -// Existing TLS connections will be kept alive after updating the TLS settings. -// New connections cannot be established while ApplyConfig is running. -func (l *tlsListener) ApplyConfig(c TLSConfig) error { - l.mut.Lock() - defer l.mut.Unlock() - if c.WindowsCertificateFilter != nil { - return l.applyWindowsCertificateStore(c) - } - return l.applyNormalTLS(c) -} - -func (l *tlsListener) applyNormalTLS(c TLSConfig) error { - if l.windowsCertHandler != nil { - panic("windows certificate handler is set this should never happen") - } - // Convert our TLSConfig into a new *tls.Config. - // - // While *tls.Config supports callbacks and doesn't need to be fully - // replaced, some of our dynamic settings from TLSConfig can't be dynamically - // updated (e.g., ciphers, min/max version, etc.). - // - // To make life easier on ourselves we just replace the whole thing with a new TLS listener. - - // Make sure that the certificates exist - if c.TLSCertPath == "" { - return fmt.Errorf("missing certificate file") - } - if c.TLSKeyPath == "" { - return fmt.Errorf("missing key file") - } - _, err := tls.LoadX509KeyPair(c.TLSCertPath, c.TLSKeyPath) - if err != nil { - return fmt.Errorf("failed to load key pair: %w", err) - } - - newConfig := &tls.Config{ - MinVersion: (uint16)(c.MinVersion), - MaxVersion: (uint16)(c.MaxVersion), - PreferServerCipherSuites: c.PreferServerCipherSuites, - - GetCertificate: l.getCertificate, - } - - var cf []uint16 - for _, c := range c.CipherSuites { - cf = append(cf, (uint16)(c)) - } - if len(cf) > 0 { - newConfig.CipherSuites = cf - } - - var cp []tls.CurveID - for _, c := range c.CurvePreferences { - cp = append(cp, (tls.CurveID)(c)) - } - if len(cp) > 0 { - newConfig.CurvePreferences = cp - } - - if c.ClientCAs != "" { - clientCAPool := x509.NewCertPool() - clientCAFile, err := os.ReadFile(c.ClientCAs) - if err != nil { - return err - } - clientCAPool.AppendCertsFromPEM(clientCAFile) - newConfig.ClientCAs = clientCAPool - } - - clientAuth, err := GetClientAuthFromString(c.ClientAuth) - if err != nil { - return err - } - newConfig.ClientAuth = clientAuth - if c.ClientCAs != "" && newConfig.ClientAuth == tls.NoClientCert { - return fmt.Errorf("Client CAs have been configured without a ClientAuth policy") - } - - l.tlsConfig = newConfig - l.cfg = c - return nil -} - -func (l *tlsListener) getCertificate(*tls.ClientHelloInfo) (*tls.Certificate, error) { - l.mut.RLock() - defer l.mut.RUnlock() - - cert, err := tls.LoadX509KeyPair(l.cfg.TLSCertPath, l.cfg.TLSKeyPath) - if err != nil { - return nil, fmt.Errorf("failed to load key pair: %w", err) - } - return &cert, nil -} - func GetClientAuthFromString(clientAuth string) (tls.ClientAuthType, error) { switch clientAuth { case "RequestClientCert": diff --git a/internal/static/server/tls_certstore_stub.go b/internal/static/server/tls_certstore_stub.go index 6b6dd8430b..ba1defe3d1 100644 --- a/internal/static/server/tls_certstore_stub.go +++ b/internal/static/server/tls_certstore_stub.go @@ -2,12 +2,6 @@ package server -import "fmt" - -func (l *tlsListener) applyWindowsCertificateStore(_ TLSConfig) error { - return fmt.Errorf("cannot use Windows certificate store on non-Windows platforms") -} - type WinCertStoreHandler struct { } diff --git a/internal/static/server/tls_certstore_windows.go b/internal/static/server/tls_certstore_windows.go index c80406114b..fca1965748 100644 --- a/internal/static/server/tls_certstore_windows.go +++ b/internal/static/server/tls_certstore_windows.go @@ -59,66 +59,6 @@ func NewWinCertStoreHandler(cfg WindowsCertificateFilter, clientAuth tls.ClientA return cn, nil } -func (l *tlsListener) applyWindowsCertificateStore(c TLSConfig) error { - - // Restrict normal TLS options when using windows certificate store - if c.TLSCertPath != "" { - return fmt.Errorf("at most one of cert_file and windows_certificate_filter can be configured") - } - if c.TLSKeyPath != "" { - return fmt.Errorf("at most one of cert_key and windows_certificate_filter can be configured") - } - if c.WindowsCertificateFilter.Server == nil { - return fmt.Errorf("windows certificate filter requires a server block defined") - } - - var subjectRegEx *regexp.Regexp - var err error - if c.WindowsCertificateFilter.Client != nil && c.WindowsCertificateFilter.Client.SubjectRegEx != "" { - subjectRegEx, err = regexp.Compile(c.WindowsCertificateFilter.Client.SubjectRegEx) - if err != nil { - return fmt.Errorf("error compiling subject common name regular expression: %w", err) - } - } - - // If there is an existing windows certhandler stop it. - if l.windowsCertHandler != nil { - l.windowsCertHandler.Stop() - } - - cn := &WinCertStoreHandler{ - cfg: *c.WindowsCertificateFilter, - subjectRegEx: subjectRegEx, - log: l.log, - shutdown: make(chan struct{}), - } - - err = cn.refreshCerts() - if err != nil { - return err - } - - config := &tls.Config{ - VerifyPeerCertificate: cn.VerifyPeer, - GetCertificate: cn.CertificateHandler, - MaxVersion: uint16(c.MaxVersion), - MinVersion: uint16(c.MinVersion), - } - - ca, err := GetClientAuthFromString(c.ClientAuth) - if err != nil { - return err - } - config.ClientAuth = ca - cn.clientAuth = ca - // Kick off the refresh handler - go cn.startUpdateTimer() - l.windowsCertHandler = cn - l.tlsConfig = config - l.cfg = c - return nil -} - // Run runs the filter refresh. Stop should be called when done. func (c *WinCertStoreHandler) Run() { go c.startUpdateTimer() diff --git a/internal/static/server/tls_test.go b/internal/static/server/tls_test.go deleted file mode 100644 index de9a2402c0..0000000000 --- a/internal/static/server/tls_test.go +++ /dev/null @@ -1,68 +0,0 @@ -package server - -import ( - "crypto/tls" - "fmt" - "io" - "log" - "net" - "net/http" - "net/url" - "testing" - - kitlog "github.com/go-kit/log" - "github.com/stretchr/testify/require" -) - -func Test_tlsListener(t *testing.T) { - rawLis, err := net.Listen("tcp", "127.0.0.1:0") - require.NoError(t, err) - - tlsConfig := TLSConfig{ - TLSCertPath: "testdata/example-cert.pem", - TLSKeyPath: "testdata/example-key.pem", - ClientAuth: "NoClientCert", - } - tlsLis, err := newTLSListener(rawLis, tlsConfig, kitlog.NewNopLogger()) - require.NoError(t, err) - - httpSrv := &http.Server{ - ErrorLog: log.New(io.Discard, "", 0), - } - go func() { - _ = httpSrv.Serve(tlsLis) - }() - defer func() { - httpSrv.Close() - }() - - httpTransport := &http.Transport{ - TLSClientConfig: &tls.Config{ - InsecureSkipVerify: true, - }, - } - cli := http.Client{Transport: httpTransport} - - resp, err := cli.Get(fmt.Sprintf("https://%s", tlsLis.Addr())) - if err == nil { - resp.Body.Close() - } - require.NoError(t, err) - - // Update our TLSConfig to require a client cert. - tlsConfig.ClientAuth = "RequireAndVerifyClientCert" - require.NoError(t, tlsLis.ApplyConfig(tlsConfig)) - - // Close our idle connections so our next request forces a new dial. - httpTransport.CloseIdleConnections() - - // Create a second connection which should now fail because we don't supply a - resp, err = cli.Get(fmt.Sprintf("https://%s", tlsLis.Addr())) - if err == nil { - resp.Body.Close() - } - - var urlError *url.Error - require.ErrorAs(t, err, &urlError) - require.Contains(t, urlError.Err.Error(), "tls:") -} diff --git a/internal/static/supportbundle/supportbundle.go b/internal/static/supportbundle/supportbundle.go deleted file mode 100644 index 3963c2a9cc..0000000000 --- a/internal/static/supportbundle/supportbundle.go +++ /dev/null @@ -1,235 +0,0 @@ -package supportbundle - -import ( - "archive/zip" - "bytes" - "context" - "fmt" - "io" - "net/http" - "path/filepath" - "runtime" - "runtime/pprof" - "strings" - "sync" - "time" - - "github.com/grafana/agent/internal/build" - "github.com/grafana/agent/internal/static/server" - "github.com/mackerelio/go-osstat/uptime" - "gopkg.in/yaml.v3" -) - -// Bundle collects all the data that is exposed as a support bundle. -type Bundle struct { - meta []byte - config []byte - agentMetrics []byte - agentMetricsInstances []byte - agentMetricsTargets []byte - agentLogsInstances []byte - agentLogsTargets []byte - heapBuf *bytes.Buffer - goroutineBuf *bytes.Buffer - blockBuf *bytes.Buffer - mutexBuf *bytes.Buffer - cpuBuf *bytes.Buffer -} - -// Metadata contains general runtime information about the current Agent. -type Metadata struct { - BuildVersion string `yaml:"build_version"` - OS string `yaml:"os"` - Architecture string `yaml:"architecture"` - Uptime float64 `yaml:"uptime"` - Payload map[string]interface{} `yaml:"payload"` -} - -// Used to enforce single-flight requests to Export -var mut sync.Mutex - -// Export gathers the information required for the support bundle. -func Export(ctx context.Context, enabledFeatures []string, cfg []byte, srvAddress string, dialContext server.DialContextFunc) (*Bundle, error) { - mut.Lock() - defer mut.Unlock() - // The block profiler is disabled by default. Temporarily enable recording - // of all blocking events. Also, temporarily record all mutex contentions, - // and defer restoring of earlier mutex profiling fraction. - runtime.SetBlockProfileRate(1) - old := runtime.SetMutexProfileFraction(1) - defer func() { - runtime.SetBlockProfileRate(0) - runtime.SetMutexProfileFraction(old) - }() - - // Gather runtime metadata. - ut, err := uptime.Get() - if err != nil { - return nil, err - } - m := Metadata{ - BuildVersion: build.Version, - OS: runtime.GOOS, - Architecture: runtime.GOARCH, - Uptime: ut.Seconds(), - Payload: map[string]interface{}{"enabled-features": enabledFeatures}, - } - meta, err := yaml.Marshal(m) - if err != nil { - return nil, fmt.Errorf("failed to marshal support bundle metadata: %s", err) - } - - var httpClient http.Client - httpClient.Transport = &http.Transport{DialContext: dialContext} - // Gather Agent's own metrics. - resp, err := httpClient.Get("http://" + srvAddress + "/metrics") - if err != nil { - return nil, fmt.Errorf("failed to get internal Agent metrics: %s", err) - } - agentMetrics, err := io.ReadAll(resp.Body) - if err != nil { - return nil, fmt.Errorf("failed to read internal Agent metrics: %s", err) - } - - // Collect the Agent metrics instances and target statuses. - resp, err = httpClient.Get("http://" + srvAddress + "/agent/api/v1/metrics/instances") - if err != nil { - return nil, fmt.Errorf("failed to get internal Agent metrics: %s", err) - } - agentMetricsInstances, err := io.ReadAll(resp.Body) - if err != nil { - return nil, fmt.Errorf("failed to read internal Agent metrics: %s", err) - } - resp, err = httpClient.Get("http://" + srvAddress + "/agent/api/v1/metrics/targets") - if err != nil { - return nil, fmt.Errorf("failed to get Agent metrics targets: %s", err) - } - agentMetricsTargets, err := io.ReadAll(resp.Body) - if err != nil { - return nil, fmt.Errorf("failed to read Agent metrics targets: %s", err) - } - - // Collect the Agent's logs instances and target statuses. - resp, err = httpClient.Get("http://" + srvAddress + "/agent/api/v1/logs/instances") - if err != nil { - return nil, fmt.Errorf("failed to get Agent logs instances: %s", err) - } - agentLogsInstances, err := io.ReadAll(resp.Body) - if err != nil { - return nil, fmt.Errorf("failed to read Agent logs instances: %s", err) - } - - resp, err = httpClient.Get("http://" + srvAddress + "/agent/api/v1/logs/targets") - if err != nil { - return nil, fmt.Errorf("failed to get Agent logs targets: %s", err) - } - agentLogsTargets, err := io.ReadAll(resp.Body) - if err != nil { - return nil, fmt.Errorf("failed to read Agent logs targets: %s", err) - } - - // Export pprof data. - var ( - cpuBuf bytes.Buffer - heapBuf bytes.Buffer - goroutineBuf bytes.Buffer - blockBuf bytes.Buffer - mutexBuf bytes.Buffer - ) - err = pprof.StartCPUProfile(&cpuBuf) - if err != nil { - return nil, err - } - deadline, _ := ctx.Deadline() - // Sleep for the remaining of the context deadline, but leave some time for - // the rest of the bundle to be exported successfully. - time.Sleep(time.Until(deadline) - 200*time.Millisecond) - pprof.StopCPUProfile() - - p := pprof.Lookup("heap") - if err := p.WriteTo(&heapBuf, 0); err != nil { - return nil, err - } - p = pprof.Lookup("goroutine") - if err := p.WriteTo(&goroutineBuf, 0); err != nil { - return nil, err - } - p = pprof.Lookup("block") - if err := p.WriteTo(&blockBuf, 0); err != nil { - return nil, err - } - p = pprof.Lookup("mutex") - if err := p.WriteTo(&mutexBuf, 0); err != nil { - return nil, err - } - - // Finally, bundle everything up to be served, either as a zip from - // memory, or exported to a directory. - bundle := &Bundle{ - meta: meta, - config: cfg, - agentMetrics: agentMetrics, - agentMetricsInstances: agentMetricsInstances, - agentMetricsTargets: agentMetricsTargets, - agentLogsInstances: agentLogsInstances, - agentLogsTargets: agentLogsTargets, - heapBuf: &heapBuf, - goroutineBuf: &goroutineBuf, - blockBuf: &blockBuf, - mutexBuf: &mutexBuf, - cpuBuf: &cpuBuf, - } - - return bundle, nil -} - -// Serve the collected data and logs as a zip file over the given -// http.ResponseWriter. -func Serve(rw http.ResponseWriter, b *Bundle, logsBuf *bytes.Buffer) error { - zw := zip.NewWriter(rw) - rw.Header().Set("Content-Type", "application/zip") - rw.Header().Set("Content-Disposition", "attachment; filename=\"agent-support-bundle.zip\"") - - zipStructure := map[string][]byte{ - "agent-metadata.yaml": b.meta, - "agent-config.yaml": b.config, - "agent-metrics.txt": b.agentMetrics, - "agent-metrics-instances.json": b.agentMetricsInstances, - "agent-metrics-targets.json": b.agentMetricsTargets, - "agent-logs-instances.json": b.agentLogsInstances, - "agent-logs-targets.json": b.agentLogsTargets, - "agent-logs.txt": logsBuf.Bytes(), - "pprof/cpu.pprof": b.cpuBuf.Bytes(), - "pprof/heap.pprof": b.heapBuf.Bytes(), - "pprof/goroutine.pprof": b.goroutineBuf.Bytes(), - "pprof/mutex.pprof": b.mutexBuf.Bytes(), - "pprof/block.pprof": b.blockBuf.Bytes(), - } - - for fn, b := range zipStructure { - if b != nil { - path := append([]string{"agent-support-bundle"}, strings.Split(fn, "/")...) - if err := writeByteSlice(zw, b, path...); err != nil { - return err - } - } - } - - err := zw.Close() - if err != nil { - return fmt.Errorf("failed to flush the zip writer: %v", err) - } - return nil -} - -func writeByteSlice(zw *zip.Writer, b []byte, fn ...string) error { - f, err := zw.Create(filepath.Join(fn...)) - if err != nil { - return err - } - _, err = f.Write(b) - if err != nil { - return err - } - return nil -} diff --git a/internal/static/traces/automaticloggingprocessor/automaticloggingprocessor.go b/internal/static/traces/automaticloggingprocessor/automaticloggingprocessor.go index c347e5cbb5..0624e1a54d 100644 --- a/internal/static/traces/automaticloggingprocessor/automaticloggingprocessor.go +++ b/internal/static/traces/automaticloggingprocessor/automaticloggingprocessor.go @@ -4,25 +4,14 @@ import ( "context" "errors" "fmt" - "strconv" "time" "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/go-logfmt/logfmt" - "github.com/grafana/agent/internal/static/logs" - "github.com/grafana/agent/internal/static/traces/contextkeys" - "github.com/grafana/agent/internal/util" util_log "github.com/grafana/agent/internal/util/log" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/pkg/logproto" - "github.com/prometheus/common/model" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/consumer" - "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/ptrace" "go.opentelemetry.io/collector/processor" - semconv "go.opentelemetry.io/collector/semconv/v1.6.1" "go.uber.org/atomic" ) @@ -35,19 +24,14 @@ const ( defaultTraceIDKey = "tid" defaultTimeout = time.Millisecond - - typeSpan = "span" - typeRoot = "root" - typeProcess = "process" ) type automaticLoggingProcessor struct { nextConsumer consumer.Traces - cfg *AutomaticLoggingConfig - logToStdout bool - logsInstance *logs.Instance - done atomic.Bool + cfg *AutomaticLoggingConfig + logToStdout bool + done atomic.Bool labels map[string]struct{} @@ -105,77 +89,7 @@ func newTraceProcessor(nextConsumer consumer.Traces, cfg *AutomaticLoggingConfig } func (p *automaticLoggingProcessor) ConsumeTraces(ctx context.Context, td ptrace.Traces) error { - rsLen := td.ResourceSpans().Len() - for i := 0; i < rsLen; i++ { - rs := td.ResourceSpans().At(i) - ssLen := rs.ScopeSpans().Len() - - var svc string - svcAtt, ok := rs.Resource().Attributes().Get(semconv.AttributeServiceName) - if ok { - svc = svcAtt.Str() - } - - for j := 0; j < ssLen; j++ { - ss := rs.ScopeSpans().At(j) - spanLen := ss.Spans().Len() - - lastTraceID := "" - for k := 0; k < spanLen; k++ { - span := ss.Spans().At(k) - traceID := span.TraceID().String() - - if p.cfg.Spans { - keyValues := append(p.spanKeyVals(span), p.processKeyVals(rs.Resource(), svc)...) - p.exportToLogsInstance(typeSpan, traceID, p.spanLabels(keyValues), keyValues...) - } - - if p.cfg.Roots && span.ParentSpanID().IsEmpty() { - keyValues := append(p.spanKeyVals(span), p.processKeyVals(rs.Resource(), svc)...) - p.exportToLogsInstance(typeRoot, traceID, p.spanLabels(keyValues), keyValues...) - } - - if p.cfg.Processes && lastTraceID != traceID { - lastTraceID = traceID - keyValues := p.processKeyVals(rs.Resource(), svc) - p.exportToLogsInstance(typeProcess, traceID, p.spanLabels(keyValues), keyValues...) - } - } - } - } - - return p.nextConsumer.ConsumeTraces(ctx, td) -} - -func (p *automaticLoggingProcessor) spanLabels(keyValues []interface{}) model.LabelSet { - if len(keyValues) == 0 { - return model.LabelSet{} - } - ls := make(map[model.LabelName]model.LabelValue, len(keyValues)/2) - var ( - k, v string - ok bool - ) - for i := 0; i < len(keyValues); i += 2 { - if k, ok = keyValues[i].(string); !ok { - // Should never happen, all keys are strings - level.Error(p.logger).Log("msg", "error casting label key to string", "key", keyValues[i]) - continue - } - // Try to cast value to string - if v, ok = keyValues[i+1].(string); !ok { - // If it's not a string, format it to its string representation - v = fmt.Sprintf("%v", keyValues[i+1]) - } - if _, ok := p.labels[k]; ok { - // Loki does not accept "." as a valid character for labels - // Dots . are replaced by underscores _ - k = util.SanitizeLabelName(k) - - ls[model.LabelName(k)] = model.LabelValue(v) - } - } - return ls + return nil } func (p *automaticLoggingProcessor) Capabilities() consumer.Capabilities { @@ -184,16 +98,8 @@ func (p *automaticLoggingProcessor) Capabilities() consumer.Capabilities { // Start is invoked during service startup. func (p *automaticLoggingProcessor) Start(ctx context.Context, _ component.Host) error { - if !p.logToStdout { - logs, ok := ctx.Value(contextkeys.Logs).(*logs.Logs) - if !ok { - return fmt.Errorf("key does not contain a logs instance") - } - p.logsInstance = logs.Instance(p.cfg.LogsName) - if p.logsInstance == nil { - return fmt.Errorf("logs instance %s not found", p.cfg.LogsName) - } - } + // NOTE(rfratto): automaticloggingprocesor only exists for config conversions + // so we don't need any logic here. return nil } @@ -204,109 +110,6 @@ func (p *automaticLoggingProcessor) Shutdown(context.Context) error { return nil } -func (p *automaticLoggingProcessor) processKeyVals(resource pcommon.Resource, svc string) []interface{} { - atts := make([]interface{}, 0, 2) // 2 for service name - rsAtts := resource.Attributes() - - // Add an attribute with the service name - atts = append(atts, p.cfg.Overrides.ServiceKey) - atts = append(atts, svc) - - for _, name := range p.cfg.ProcessAttributes { - att, ok := rsAtts.Get(name) - if ok { - // name/key val pairs - atts = append(atts, name) - atts = append(atts, attributeValue(att)) - } - } - - return atts -} - -func (p *automaticLoggingProcessor) spanKeyVals(span ptrace.Span) []interface{} { - atts := make([]interface{}, 0, 8) // 8 for name, duration, service name and status - - atts = append(atts, p.cfg.Overrides.SpanNameKey) - atts = append(atts, span.Name()) - - atts = append(atts, p.cfg.Overrides.DurationKey) - atts = append(atts, spanDuration(span)) - - // Skip STATUS_CODE_UNSET to be less spammy - if span.Status().Code() != ptrace.StatusCodeUnset { - atts = append(atts, p.cfg.Overrides.StatusKey) - atts = append(atts, span.Status().Code()) - } - - for _, name := range p.cfg.SpanAttributes { - att, ok := span.Attributes().Get(name) - if ok { - atts = append(atts, name) - atts = append(atts, attributeValue(att)) - } - } - - return atts -} - -func (p *automaticLoggingProcessor) exportToLogsInstance(kind string, traceID string, labels model.LabelSet, keyvals ...interface{}) { - if p.done.Load() { - return - } - - keyvals = append(keyvals, []interface{}{p.cfg.Overrides.TraceIDKey, traceID}...) - line, err := logfmt.MarshalKeyvals(keyvals...) - if err != nil { - level.Warn(p.logger).Log("msg", "unable to marshal keyvals", "err", err) - return - } - - // if we're logging to stdout, log and bail - if p.logToStdout { - level.Info(p.logger).Log(keyvals...) - return - } - - // Add logs instance label - labels[model.LabelName(p.cfg.Overrides.LogsTag)] = model.LabelValue(kind) - - sent := p.logsInstance.SendEntry(api.Entry{ - Labels: labels, - Entry: logproto.Entry{ - Timestamp: time.Now(), - Line: string(line), - }, - }, p.cfg.Timeout) - - if !sent { - level.Warn(p.logger).Log("msg", "failed to autolog to logs pipeline", "kind", kind, "traceid", traceID) - } -} - -func spanDuration(span ptrace.Span) string { - dur := int64(span.EndTimestamp() - span.StartTimestamp()) - return strconv.FormatInt(dur, 10) + "ns" -} - -func attributeValue(att pcommon.Value) interface{} { - switch att.Type() { - case pcommon.ValueTypeStr: - return att.Str() - case pcommon.ValueTypeInt: - return att.Int() - case pcommon.ValueTypeDouble: - return att.Double() - case pcommon.ValueTypeBool: - return att.Bool() - case pcommon.ValueTypeMap: - return att.Map() - case pcommon.ValueTypeSlice: - return att.Slice() - } - return nil -} - func override(cfgValue string, defaultValue string) string { if cfgValue == "" { return defaultValue diff --git a/internal/static/traces/automaticloggingprocessor/automaticloggingprocessor_test.go b/internal/static/traces/automaticloggingprocessor/automaticloggingprocessor_test.go index 310a09c0ad..b02b7ba7f9 100644 --- a/internal/static/traces/automaticloggingprocessor/automaticloggingprocessor_test.go +++ b/internal/static/traces/automaticloggingprocessor/automaticloggingprocessor_test.go @@ -3,174 +3,14 @@ package automaticloggingprocessor import ( "context" "testing" - "time" "github.com/grafana/agent/internal/static/logs" "github.com/grafana/agent/internal/util" - "github.com/prometheus/common/model" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/component/componenttest" - "go.opentelemetry.io/collector/pdata/pcommon" - "go.opentelemetry.io/collector/pdata/ptrace" "gopkg.in/yaml.v3" ) -func TestSpanKeyVals(t *testing.T) { - tests := []struct { - spanName string - spanAttrs map[string]interface{} - spanStart time.Time - spanEnd time.Time - cfg AutomaticLoggingConfig - expected []interface{} - }{ - { - expected: []interface{}{ - "span", "", - "dur", "0ns", - "status", ptrace.StatusCode(1), - }, - }, - { - spanName: "test", - expected: []interface{}{ - "span", "test", - "dur", "0ns", - "status", ptrace.StatusCode(1), - }, - }, - { - expected: []interface{}{ - "span", "", - "dur", "0ns", - "status", ptrace.StatusCode(1), - }, - }, - { - spanStart: time.Unix(0, 0), - spanEnd: time.Unix(0, 10), - expected: []interface{}{ - "span", "", - "dur", "10ns", - "status", ptrace.StatusCode(1), - }, - }, - { - spanStart: time.Unix(0, 10), - spanEnd: time.Unix(0, 100), - expected: []interface{}{ - "span", "", - "dur", "90ns", - "status", ptrace.StatusCode(1), - }, - }, - { - spanAttrs: map[string]interface{}{ - "xstr": "test", - }, - expected: []interface{}{ - "span", "", - "dur", "0ns", - "status", ptrace.StatusCode(1), - }, - }, - { - spanAttrs: map[string]interface{}{ - "xstr": "test", - }, - cfg: AutomaticLoggingConfig{ - SpanAttributes: []string{"xstr"}, - }, - expected: []interface{}{ - "span", "", - "dur", "0ns", - "status", ptrace.StatusCode(1), - "xstr", "test", - }, - }, - { - cfg: AutomaticLoggingConfig{ - Overrides: OverrideConfig{ - SpanNameKey: "a", - DurationKey: "c", - StatusKey: "d", - }, - }, - expected: []interface{}{ - "a", "", - "c", "0ns", - "d", ptrace.StatusCode(1), - }, - }, - } - - for _, tc := range tests { - tc.cfg.Backend = BackendStdout - tc.cfg.Spans = true - p, err := newTraceProcessor(&automaticLoggingProcessor{}, &tc.cfg) - require.NoError(t, err) - - span := ptrace.NewSpan() - span.SetName(tc.spanName) - span.Attributes().FromRaw(tc.spanAttrs) - span.SetStartTimestamp(pcommon.NewTimestampFromTime(tc.spanStart)) - span.SetEndTimestamp(pcommon.NewTimestampFromTime(tc.spanEnd)) - span.Status().SetCode(ptrace.StatusCodeOk) - - actual := p.(*automaticLoggingProcessor).spanKeyVals(span) - assert.Equal(t, tc.expected, actual) - } -} - -func TestProcessKeyVals(t *testing.T) { - tests := []struct { - processAttrs map[string]interface{} - svc string - cfg AutomaticLoggingConfig - expected []interface{} - }{ - { - expected: []interface{}{ - "svc", "", - }, - }, - { - processAttrs: map[string]interface{}{ - "xstr": "test", - }, - expected: []interface{}{ - "svc", "", - }, - }, - { - processAttrs: map[string]interface{}{ - "xstr": "test", - }, - cfg: AutomaticLoggingConfig{ - ProcessAttributes: []string{"xstr"}, - }, - expected: []interface{}{ - "svc", "", - "xstr", "test", - }, - }, - } - - for _, tc := range tests { - tc.cfg.Backend = BackendStdout - tc.cfg.Spans = true - p, err := newTraceProcessor(&automaticLoggingProcessor{}, &tc.cfg) - require.NoError(t, err) - - process := pcommon.NewResource() - process.Attributes().FromRaw(tc.processAttrs) - - actual := p.(*automaticLoggingProcessor).processKeyVals(process, tc.svc) - assert.Equal(t, tc.expected, actual) - } -} - func TestBadConfigs(t *testing.T) { tests := []struct { cfg *AutomaticLoggingConfig @@ -276,81 +116,3 @@ func TestLokiNameMigration(t *testing.T) { require.NoError(t, err) require.YAMLEq(t, expect, string(bb)) } - -func TestLabels(t *testing.T) { - tests := []struct { - name string - labels []string - keyValues []interface{} - expectedLabels model.LabelSet - }{ - { - name: "happy case", - labels: []string{"loki", "svc"}, - keyValues: []interface{}{"loki", "loki", "svc", "gateway", "duration", "1s"}, - expectedLabels: map[model.LabelName]model.LabelValue{ - "loki": "loki", - "svc": "gateway", - }, - }, - { - name: "happy case with dots", - labels: []string{"loki", "service.name"}, - keyValues: []interface{}{"loki", "loki", "service.name", "gateway", "duration", "1s"}, - expectedLabels: map[model.LabelName]model.LabelValue{ - "loki": "loki", - "service_name": "gateway", - }, - }, - { - name: "no labels", - labels: []string{}, - keyValues: []interface{}{"loki", "loki", "svc", "gateway", "duration", "1s"}, - expectedLabels: map[model.LabelName]model.LabelValue{}, - }, - { - name: "label not present in keyValues", - labels: []string{"loki", "svc"}, - keyValues: []interface{}{"loki", "loki", "duration", "1s"}, - expectedLabels: map[model.LabelName]model.LabelValue{ - "loki": "loki", - }, - }, - { - name: "label value is not type string", - labels: []string{"loki"}, - keyValues: []interface{}{"loki", 42, "duration", "1s"}, - expectedLabels: map[model.LabelName]model.LabelValue{ - "loki": "42", - }, - }, - { - name: "stringifies value if possible", - labels: []string{"status"}, - keyValues: []interface{}{"status", ptrace.StatusCode(1)}, - expectedLabels: map[model.LabelName]model.LabelValue{ - "status": model.LabelValue(ptrace.StatusCode(1).String()), - }, - }, - { - name: "no keyValues", - labels: []string{"status"}, - keyValues: []interface{}{}, - expectedLabels: map[model.LabelName]model.LabelValue{}, - }, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - cfg := &AutomaticLoggingConfig{ - Spans: true, - Labels: tc.labels, - } - p, err := newTraceProcessor(&automaticLoggingProcessor{}, cfg) - require.NoError(t, err) - - ls := p.(*automaticLoggingProcessor).spanLabels(tc.keyValues) - assert.Equal(t, tc.expectedLabels, ls) - }) - } -} diff --git a/internal/static/traces/instance.go b/internal/static/traces/instance.go deleted file mode 100644 index 0c2e3fcb19..0000000000 --- a/internal/static/traces/instance.go +++ /dev/null @@ -1,194 +0,0 @@ -package traces - -import ( - "context" - "fmt" - "sync" - "time" - - "go.opentelemetry.io/collector/component" - "go.opentelemetry.io/collector/connector" - otelexporter "go.opentelemetry.io/collector/exporter" - "go.opentelemetry.io/collector/extension" - "go.opentelemetry.io/collector/otelcol" - "go.opentelemetry.io/collector/processor" - "go.opentelemetry.io/collector/receiver" - "go.opentelemetry.io/collector/service" - "go.uber.org/zap" - "go.uber.org/zap/zapcore" - - "github.com/grafana/agent/internal/build" - "github.com/grafana/agent/internal/static/logs" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/static/traces/automaticloggingprocessor" - "github.com/grafana/agent/internal/static/traces/contextkeys" - "github.com/grafana/agent/internal/static/traces/servicegraphprocessor" - "github.com/grafana/agent/internal/static/traces/traceutils" - "github.com/grafana/agent/internal/util" - prom_client "github.com/prometheus/client_golang/prometheus" - "go.opentelemetry.io/otel/trace/noop" -) - -// Instance wraps the OpenTelemetry collector to enable tracing pipelines -type Instance struct { - mut sync.Mutex - cfg InstanceConfig - logger *zap.Logger - - factories otelcol.Factories - service *service.Service -} - -// NewInstance creates and starts an instance of tracing pipelines. -func NewInstance(logsSubsystem *logs.Logs, reg prom_client.Registerer, cfg InstanceConfig, logger *zap.Logger, promInstanceManager instance.Manager) (*Instance, error) { - instance := &Instance{} - instance.logger = logger - - if err := instance.ApplyConfig(logsSubsystem, promInstanceManager, reg, cfg); err != nil { - return nil, err - } - return instance, nil -} - -// ApplyConfig updates the configuration of the Instance. -func (i *Instance) ApplyConfig(logsSubsystem *logs.Logs, promInstanceManager instance.Manager, reg prom_client.Registerer, cfg InstanceConfig) error { - i.mut.Lock() - defer i.mut.Unlock() - - if util.CompareYAML(cfg, i.cfg) { - // No config change - return nil - } - i.cfg = cfg - - // Shut down any existing pipeline - i.stop() - - err := i.buildAndStartPipeline(context.Background(), cfg, logsSubsystem, promInstanceManager, reg) - if err != nil { - return fmt.Errorf("failed to create pipeline: %w", err) - } - - return nil -} - -// Stop stops the OpenTelemetry collector subsystem -func (i *Instance) Stop() { - i.mut.Lock() - defer i.mut.Unlock() - - i.stop() -} - -func (i *Instance) stop() { - shutdownCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() - - if i.service != nil { - err := i.service.Shutdown(shutdownCtx) - if err != nil { - i.logger.Error("failed to stop Otel service", zap.Error(err)) - } - } -} - -func (i *Instance) buildAndStartPipeline(ctx context.Context, cfg InstanceConfig, logs *logs.Logs, instManager instance.Manager, reg prom_client.Registerer) error { - // create component factories - otelConfig, err := cfg.OtelConfig() - if err != nil { - return fmt.Errorf("failed to load otelConfig from agent traces config: %w", err) - } - for _, rw := range cfg.RemoteWrite { - if rw.InsecureSkipVerify { - i.logger.Warn("Configuring TLS with insecure_skip_verify. Use tls_config.insecure_skip_verify instead") - } - if rw.TLSConfig != nil && rw.TLSConfig.ServerName != "" { - i.logger.Warn("Configuring unsupported tls_config.server_name") - } - } - - if cfg.SpanMetrics != nil && len(cfg.SpanMetrics.MetricsInstance) != 0 { - ctx = context.WithValue(ctx, contextkeys.Metrics, instManager) - } - - if cfg.LoadBalancing == nil && (cfg.TailSampling != nil || cfg.ServiceGraphs != nil) { - i.logger.Warn("Configuring tail_sampling and/or service_graphs without load_balancing." + - "Load balancing via trace ID is required for those features to work properly in multi agent deployments") - } - - if cfg.LoadBalancing == nil && cfg.SpanMetrics != nil { - i.logger.Warn("Configuring spanmetrics without load_balancing." + - "Load balancing via service name is required for spanmetrics to work properly in multi agent deployments") - } - - if cfg.AutomaticLogging != nil && cfg.AutomaticLogging.Backend != automaticloggingprocessor.BackendStdout { - ctx = context.WithValue(ctx, contextkeys.Logs, logs) - } - - factories, err := tracingFactories() - if err != nil { - return fmt.Errorf("failed to load tracing factories: %w", err) - } - i.factories = factories - - appinfo := component.BuildInfo{ - Command: "agent", - Description: "agent", - Version: build.Version, - } - - err = util.SetupStaticModeOtelFeatureGates() - if err != nil { - return err - } - - promExporter, err := traceutils.PrometheusExporter(reg) - if err != nil { - return fmt.Errorf("error creating otel prometheus exporter: %w", err) - } - - i.service, err = service.New(ctx, service.Settings{ - BuildInfo: appinfo, - Receivers: receiver.NewBuilder(otelConfig.Receivers, i.factories.Receivers), - Processors: processor.NewBuilder(otelConfig.Processors, i.factories.Processors), - Exporters: otelexporter.NewBuilder(otelConfig.Exporters, i.factories.Exporters), - Connectors: connector.NewBuilder(otelConfig.Connectors, i.factories.Connectors), - Extensions: extension.NewBuilder(otelConfig.Extensions, i.factories.Extensions), - OtelMetricViews: servicegraphprocessor.OtelMetricViews(), - OtelMetricReader: promExporter, - DisableProcessMetrics: true, - UseExternalMetricsServer: true, - TracerProvider: noop.NewTracerProvider(), - //TODO: Plug in an AsyncErrorChannel to shut down the Agent in case of a fatal event - LoggingOptions: []zap.Option{ - zap.WrapCore(func(zapcore.Core) zapcore.Core { - return i.logger.Core() - }), - }, - }, otelConfig.Service) - if err != nil { - return fmt.Errorf("failed to create Otel service: %w", err) - } - - err = i.service.Start(ctx) - if err != nil { - return fmt.Errorf("failed to start Otel service: %w", err) - } - - return err -} - -// ReportFatalError implements component.Host -func (i *Instance) ReportFatalError(err error) { - i.logger.Error("fatal error reported", zap.Error(err)) -} - -// GetFactory implements component.Host -func (i *Instance) GetFactory(kind component.Kind, componentType component.Type) component.Factory { - switch kind { - case component.KindReceiver: - return i.factories.Receivers[componentType] - default: - return nil - } -} diff --git a/internal/static/traces/remotewriteexporter/exporter.go b/internal/static/traces/remotewriteexporter/exporter.go index 5f99af577a..cec6e77d56 100644 --- a/internal/static/traces/remotewriteexporter/exporter.go +++ b/internal/static/traces/remotewriteexporter/exporter.go @@ -2,115 +2,28 @@ package remotewriteexporter import ( "context" - "fmt" - "strconv" - "strings" - "sync" - "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/static/traces/contextkeys" - util "github.com/grafana/agent/internal/util/log" - "github.com/prometheus/prometheus/model/labels" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/consumer" "go.opentelemetry.io/collector/exporter" - "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" ) -const ( - nameLabelKey = "__name__" - sumSuffix = "sum" - countSuffix = "count" - bucketSuffix = "bucket" - leStr = "le" - infBucket = "+Inf" - noSuffix = "" -) - -type datapoint struct { - ts int64 - v float64 - l labels.Labels -} - -type remoteWriteExporter struct { - mtx sync.Mutex - - close chan struct{} - closed chan struct{} - - manager instance.Manager - promInstance string - - constLabels labels.Labels - namespace string - - seriesMap map[uint64]*datapoint - staleTime int64 - lastFlush int64 - loopInterval time.Duration - - logger log.Logger -} +type remoteWriteExporter struct{} func newRemoteWriteExporter(cfg *Config) (exporter.Metrics, error) { - logger := log.With(util.Logger, "component", "traces remote write exporter") - - ls := make(labels.Labels, 0, len(cfg.ConstLabels)) - - for name, value := range cfg.ConstLabels { - ls = append(ls, labels.Label{Name: name, Value: value}) - } - - staleTime := (15 * time.Minute).Milliseconds() - if cfg.StaleTime > 0 { - staleTime = cfg.StaleTime.Milliseconds() - } - - loopInterval := time.Second - if cfg.LoopInterval > 0 { - loopInterval = cfg.LoopInterval - } - - return &remoteWriteExporter{ - mtx: sync.Mutex{}, - close: make(chan struct{}), - closed: make(chan struct{}), - constLabels: ls, - namespace: cfg.Namespace, - promInstance: cfg.PromInstance, - seriesMap: make(map[uint64]*datapoint), - staleTime: staleTime, - loopInterval: loopInterval, - logger: logger, - }, nil + // NOTE(rfratto): remotewriteexporter has been kept for config conversions, + // but is never used, so the implementation of the component has been + // removed. + return &remoteWriteExporter{}, nil } func (e *remoteWriteExporter) Start(ctx context.Context, _ component.Host) error { - manager, ok := ctx.Value(contextkeys.Metrics).(instance.Manager) - if !ok || manager == nil { - return fmt.Errorf("key does not contain a InstanceManager instance") - } - e.manager = manager - - go e.appenderLoop() - return nil } func (e *remoteWriteExporter) Shutdown(ctx context.Context) error { - close(e.close) - - select { - case <-e.closed: - return nil - case <-ctx.Done(): - return ctx.Err() - } + return nil } func (e *remoteWriteExporter) Capabilities() consumer.Capabilities { @@ -118,202 +31,5 @@ func (e *remoteWriteExporter) Capabilities() consumer.Capabilities { } func (e *remoteWriteExporter) ConsumeMetrics(ctx context.Context, md pmetric.Metrics) error { - select { - case <-e.closed: - return nil - default: - } - - resourceMetrics := md.ResourceMetrics() - for i := 0; i < resourceMetrics.Len(); i++ { - resourceMetric := resourceMetrics.At(i) - scopeMetricsSlice := resourceMetric.ScopeMetrics() - for j := 0; j < scopeMetricsSlice.Len(); j++ { - metricSlice := scopeMetricsSlice.At(j).Metrics() - for k := 0; k < metricSlice.Len(); k++ { - switch metric := metricSlice.At(k); metric.Type() { - case pmetric.MetricTypeGauge: - dataPoints := metric.Sum().DataPoints() - if err := e.handleNumberDataPoints(metric.Name(), dataPoints); err != nil { - return err - } - case pmetric.MetricTypeSum: - if metric.Sum().AggregationTemporality() != pmetric.AggregationTemporalityCumulative { - continue // Only cumulative metrics are supported - } - dataPoints := metric.Sum().DataPoints() - if err := e.handleNumberDataPoints(metric.Name(), dataPoints); err != nil { - return err - } - case pmetric.MetricTypeHistogram: - if metric.Histogram().AggregationTemporality() != pmetric.AggregationTemporalityCumulative { - continue // Only cumulative metrics are supported - } - dataPoints := metric.Histogram().DataPoints() - e.handleHistogramDataPoints(metric.Name(), dataPoints) - case pmetric.MetricTypeSummary: - return fmt.Errorf("unsupported metric data type %s", metric.Type()) - default: - return fmt.Errorf("unsupported metric data type %s", metric.Type()) - } - } - } - } - - return nil -} - -func (e *remoteWriteExporter) handleNumberDataPoints(name string, dataPoints pmetric.NumberDataPointSlice) error { - for ix := 0; ix < dataPoints.Len(); ix++ { - dataPoint := dataPoints.At(ix) - lbls := e.createLabelSet(name, noSuffix, dataPoint.Attributes(), labels.Labels{}) - if err := e.appendNumberDataPoint(dataPoint, lbls); err != nil { - return fmt.Errorf("failed to process datapoints %s", err) - } - } - return nil -} - -func (e *remoteWriteExporter) appendNumberDataPoint(dataPoint pmetric.NumberDataPoint, labels labels.Labels) error { - var val float64 - switch dataPoint.ValueType() { - case pmetric.NumberDataPointValueTypeDouble: - val = dataPoint.DoubleValue() - case pmetric.NumberDataPointValueTypeInt: - val = float64(dataPoint.IntValue()) - default: - return fmt.Errorf("unknown data point type: %s", dataPoint.ValueType()) - } - ts := e.timestamp() - - e.appendDatapointForSeries(labels, ts, val) - return nil } - -func (e *remoteWriteExporter) handleHistogramDataPoints(name string, dataPoints pmetric.HistogramDataPointSlice) { - for ix := 0; ix < dataPoints.Len(); ix++ { - dataPoint := dataPoints.At(ix) - ts := e.timestamp() - - // Append sum value - sumLabels := e.createLabelSet(name, sumSuffix, dataPoint.Attributes(), labels.Labels{}) - e.appendDatapointForSeries(sumLabels, ts, dataPoint.Sum()) - - // Append count value - countLabels := e.createLabelSet(name, countSuffix, dataPoint.Attributes(), labels.Labels{}) - e.appendDatapointForSeries(countLabels, ts, float64(dataPoint.Count())) - - var cumulativeCount uint64 - for ix := 0; ix < dataPoint.ExplicitBounds().Len(); ix++ { - eb := dataPoint.ExplicitBounds().At(ix) - - if ix >= dataPoint.BucketCounts().Len() { - break - } - cumulativeCount += dataPoint.BucketCounts().At(ix) - boundStr := strconv.FormatFloat(eb, 'f', -1, 64) - bucketLabels := e.createLabelSet(name, bucketSuffix, dataPoint.Attributes(), labels.Labels{{Name: leStr, Value: boundStr}}) - e.appendDatapointForSeries(bucketLabels, ts, float64(cumulativeCount)) - } - - // add le=+Inf bucket - cumulativeCount += dataPoint.BucketCounts().At(dataPoint.BucketCounts().Len() - 1) - infBucketLabels := e.createLabelSet(name, bucketSuffix, dataPoint.Attributes(), labels.Labels{{Name: leStr, Value: infBucket}}) - e.appendDatapointForSeries(infBucketLabels, ts, float64(cumulativeCount)) - } -} - -func (e *remoteWriteExporter) appendDatapointForSeries(l labels.Labels, ts int64, v float64) { - e.mtx.Lock() - defer e.mtx.Unlock() - - series := l.Hash() - if lastDatapoint, ok := e.seriesMap[series]; ok { - if lastDatapoint.ts >= ts { - return - } - lastDatapoint.ts = ts - lastDatapoint.v = v - return - } - - e.seriesMap[series] = &datapoint{l: l, ts: ts, v: v} -} - -func (e *remoteWriteExporter) appenderLoop() { - t := time.NewTicker(e.loopInterval) - - for { - select { - case <-t.C: - e.mtx.Lock() - inst, err := e.manager.GetInstance(e.promInstance) - if err != nil { - level.Error(e.logger).Log("msg", "failed to get prom instance", "err", err) - continue - } - appender := inst.Appender(context.Background()) - - now := time.Now().UnixMilli() - for _, dp := range e.seriesMap { - // If the datapoint hasn't been updated since the last loop, don't append it - if dp.ts < e.lastFlush { - // If the datapoint is older than now - staleTime, it is stale and gets removed. - if now-dp.ts > e.staleTime { - delete(e.seriesMap, dp.l.Hash()) - } - continue - } - - if _, err := appender.Append(0, dp.l, dp.ts, dp.v); err != nil { - level.Error(e.logger).Log("msg", "failed to append datapoint", "err", err) - } - } - - if err := appender.Commit(); err != nil { - level.Error(e.logger).Log("msg", "failed to commit appender", "err", err) - } - - e.lastFlush = now - e.mtx.Unlock() - - case <-e.close: - close(e.closed) - return - } - } -} - -func (e *remoteWriteExporter) createLabelSet(name, suffix string, labelMap pcommon.Map, customLabels labels.Labels) labels.Labels { - ls := make(labels.Labels, 0, labelMap.Len()+1+len(e.constLabels)+len(customLabels)) - // Labels from spanmetrics processor - labelMap.Range(func(k string, v pcommon.Value) bool { - ls = append(ls, labels.Label{ - Name: strings.Replace(k, ".", "_", -1), - Value: v.Str(), - }) - return true - }) - // Metric name label - ls = append(ls, labels.Label{ - Name: nameLabelKey, - Value: metricName(e.namespace, name, suffix), - }) - // Const labels - ls = append(ls, e.constLabels...) - // Custom labels - ls = append(ls, customLabels...) - return ls -} - -func (e *remoteWriteExporter) timestamp() int64 { - return time.Now().UnixMilli() -} - -func metricName(namespace, metric, suffix string) string { - if len(suffix) != 0 { - return fmt.Sprintf("%s_%s_%s", namespace, metric, suffix) - } - return fmt.Sprintf("%s_%s", namespace, metric) -} diff --git a/internal/static/traces/remotewriteexporter/exporter_test.go b/internal/static/traces/remotewriteexporter/exporter_test.go deleted file mode 100644 index 63025ec120..0000000000 --- a/internal/static/traces/remotewriteexporter/exporter_test.go +++ /dev/null @@ -1,183 +0,0 @@ -package remotewriteexporter - -import ( - "context" - "testing" - "time" - - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/static/traces/contextkeys" - "github.com/prometheus/prometheus/model/exemplar" - "github.com/prometheus/prometheus/model/histogram" - "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/model/metadata" - "github.com/prometheus/prometheus/storage" - "github.com/stretchr/testify/require" - "go.opentelemetry.io/collector/pdata/pcommon" - "go.opentelemetry.io/collector/pdata/pmetric" -) - -const ( - callsMetric = "traces_spanmetrics_calls_total" - sumMetric = "traces_spanmetrics_latency_sum" - countMetric = "traces_spanmetrics_latency_count" - bucketMetric = "traces_spanmetrics_latency_bucket" -) - -func TestRemoteWriteExporter_ConsumeMetrics(t *testing.T) { - var ( - countValue uint64 = 20 - sumValue float64 = 100 - bucketCounts = []uint64{1, 2, 3, 4, 5, 6} - explicitBounds = []float64{1, 2.5, 5, 7.5, 10} - ts = time.Date(2020, 1, 2, 3, 4, 5, 6, time.UTC) - ) - - cfg := Config{ - ConstLabels: nil, - Namespace: "traces", - PromInstance: "traces", - } - exp, err := newRemoteWriteExporter(&cfg) - require.NoError(t, err) - - manager := &mockManager{} - ctx := context.WithValue(context.Background(), contextkeys.Metrics, manager) - require.NoError(t, exp.Start(ctx, nil)) - - metrics := pmetric.NewMetrics() - ilm := metrics.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty() - ilm.Scope().SetName("spanmetrics") - - // Append sum metric - sm := ilm.Metrics().AppendEmpty() - sm.SetEmptySum() - sm.SetName("spanmetrics_calls_total") - sm.Sum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) - - sdp := sm.Sum().DataPoints().AppendEmpty() - sdp.SetTimestamp(pcommon.NewTimestampFromTime(ts.UTC())) - sdp.SetDoubleValue(sumValue) - - // Append histogram - hm := ilm.Metrics().AppendEmpty() - hm.SetEmptyHistogram() - hm.SetName("spanmetrics_latency") - hm.Histogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) - - hdp := hm.Histogram().DataPoints().AppendEmpty() - hdp.SetTimestamp(pcommon.NewTimestampFromTime(ts.UTC())) - hdp.BucketCounts().FromRaw(bucketCounts) - hdp.ExplicitBounds().FromRaw(explicitBounds) - hdp.SetCount(countValue) - hdp.SetSum(sumValue) - - err = exp.ConsumeMetrics(context.TODO(), metrics) - require.NoError(t, err) - - time.Sleep(5 * time.Second) - - require.NoError(t, exp.Shutdown(context.TODO())) - - // Verify calls - calls := manager.instance.GetAppended(callsMetric) - require.Equal(t, len(calls), 1) - require.Equal(t, calls[0].v, sumValue) - require.Equal(t, calls[0].l, labels.Labels{{Name: nameLabelKey, Value: "traces_spanmetrics_calls_total"}}) - - // Verify _sum - sum := manager.instance.GetAppended(sumMetric) - require.Equal(t, len(sum), 1) - require.Equal(t, sum[0].v, sumValue) - require.Equal(t, sum[0].l, labels.Labels{{Name: nameLabelKey, Value: "traces_spanmetrics_latency_" + sumSuffix}}) - - // Check _count - count := manager.instance.GetAppended(countMetric) - require.Equal(t, len(count), 1) - require.Equal(t, count[0].v, float64(countValue)) - require.Equal(t, count[0].l, labels.Labels{{Name: nameLabelKey, Value: "traces_spanmetrics_latency_" + countSuffix}}) - - // Check _bucket - buckets := manager.instance.GetAppended(bucketMetric) - require.Equal(t, len(buckets), len(bucketCounts)) -} - -type mockManager struct { - instance *mockInstance -} - -func (m *mockManager) GetInstance(string) (instance.ManagedInstance, error) { - if m.instance == nil { - m.instance = &mockInstance{} - } - return m.instance, nil -} - -func (m *mockManager) ListInstances() map[string]instance.ManagedInstance { return nil } - -func (m *mockManager) ListConfigs() map[string]instance.Config { return nil } - -func (m *mockManager) ApplyConfig(_ instance.Config) error { return nil } - -func (m *mockManager) DeleteConfig(_ string) error { return nil } - -func (m *mockManager) Stop() {} - -type mockInstance struct { - instance.NoOpInstance - appender *mockAppender -} - -func (m *mockInstance) Appender(_ context.Context) storage.Appender { - if m.appender == nil { - m.appender = &mockAppender{} - } - return m.appender -} - -func (m *mockInstance) GetAppended(n string) []metric { - return m.appender.GetAppended(n) -} - -type metric struct { - l labels.Labels - t int64 - v float64 -} - -type mockAppender struct { - appendedMetrics []metric -} - -var _ storage.Appender = (*mockAppender)(nil) - -func (a *mockAppender) GetAppended(n string) []metric { - var ms []metric - for _, m := range a.appendedMetrics { - if n == m.l.Get(nameLabelKey) { - ms = append(ms, m) - } - } - return ms -} - -func (a *mockAppender) Append(_ storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { - a.appendedMetrics = append(a.appendedMetrics, metric{l: l, t: t, v: v}) - return 0, nil -} - -func (a *mockAppender) Commit() error { return nil } - -func (a *mockAppender) Rollback() error { return nil } - -func (a *mockAppender) AppendExemplar(_ storage.SeriesRef, _ labels.Labels, _ exemplar.Exemplar) (storage.SeriesRef, error) { - return 0, nil -} - -func (a *mockAppender) UpdateMetadata(_ storage.SeriesRef, _ labels.Labels, _ metadata.Metadata) (storage.SeriesRef, error) { - return 0, nil -} - -func (a *mockAppender) AppendHistogram(_ storage.SeriesRef, _ labels.Labels, _ int64, _ *histogram.Histogram, _ *histogram.FloatHistogram) (storage.SeriesRef, error) { - return 0, nil -} diff --git a/internal/static/traces/traces.go b/internal/static/traces/traces.go deleted file mode 100644 index 3226e8084d..0000000000 --- a/internal/static/traces/traces.go +++ /dev/null @@ -1,111 +0,0 @@ -package traces - -import ( - "fmt" - "sync" - - "github.com/go-kit/log" - "github.com/grafana/agent/internal/static/logs" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/util/zapadapter" - prom_client "github.com/prometheus/client_golang/prometheus" - "go.uber.org/zap" -) - -// Traces wraps the OpenTelemetry collector to enable tracing pipelines -type Traces struct { - mut sync.Mutex - instances map[string]*Instance - - logger *zap.Logger - reg prom_client.Registerer - - promInstanceManager instance.Manager -} - -// New creates and starts trace collection. -func New(logsSubsystem *logs.Logs, promInstanceManager instance.Manager, reg prom_client.Registerer, cfg Config, l log.Logger) (*Traces, error) { - traces := &Traces{ - instances: make(map[string]*Instance), - logger: newLogger(l), - reg: reg, - promInstanceManager: promInstanceManager, - } - if err := traces.ApplyConfig(logsSubsystem, promInstanceManager, cfg); err != nil { - return nil, err - } - return traces, nil -} - -// Instance is used to retrieve a named Traces instance -func (t *Traces) Instance(name string) *Instance { - t.mut.Lock() - defer t.mut.Unlock() - - return t.instances[name] -} - -// ApplyConfig updates Traces with a new Config. -func (t *Traces) ApplyConfig(logsSubsystem *logs.Logs, promInstanceManager instance.Manager, cfg Config) error { - t.mut.Lock() - defer t.mut.Unlock() - - newInstances := make(map[string]*Instance, len(cfg.Configs)) - - for _, c := range cfg.Configs { - var ( - instReg = prom_client.WrapRegistererWith(prom_client.Labels{"traces_config": c.Name}, t.reg) - ) - - // If an old instance exists, update it and move it to the new map. - if old, ok := t.instances[c.Name]; ok { - err := old.ApplyConfig(logsSubsystem, promInstanceManager, instReg, c) - if err != nil { - return err - } - - newInstances[c.Name] = old - continue - } - - var ( - instLogger = t.logger.With(zap.String("traces_config", c.Name)) - ) - - inst, err := NewInstance(logsSubsystem, instReg, c, instLogger, t.promInstanceManager) - if err != nil { - return fmt.Errorf("failed to create tracing instance %s: %w", c.Name, err) - } - newInstances[c.Name] = inst - } - - // Any instance in l.instances that isn't in newInstances has been removed - // from the config. Stop them before replacing the map. - for key, i := range t.instances { - if _, exist := newInstances[key]; exist { - continue - } - i.Stop() - } - t.instances = newInstances - - return nil -} - -// Stop stops the OpenTelemetry collector subsystem -func (t *Traces) Stop() { - t.mut.Lock() - defer t.mut.Unlock() - - for _, i := range t.instances { - i.Stop() - } -} - -func newLogger(l log.Logger) *zap.Logger { - logger := zapadapter.New(l) - logger = logger.With(zap.String("component", "traces")) - logger.Info("Traces Logger Initialized") - - return logger -} diff --git a/internal/static/traces/traces_test.go b/internal/static/traces/traces_test.go deleted file mode 100644 index 5fc3fa4d88..0000000000 --- a/internal/static/traces/traces_test.go +++ /dev/null @@ -1,193 +0,0 @@ -package traces - -import ( - "fmt" - "strings" - "testing" - "time" - - "github.com/grafana/agent/internal/static/server" - "github.com/grafana/agent/internal/static/traces/traceutils" - "github.com/grafana/agent/internal/util" - "github.com/grafana/dskit/log" - "github.com/opentracing/opentracing-go" - "github.com/prometheus/client_golang/prometheus" - "github.com/stretchr/testify/require" - jaegercfg "github.com/uber/jaeger-client-go/config" - "go.opentelemetry.io/collector/pdata/ptrace" - "gopkg.in/yaml.v2" -) - -func TestTraces(t *testing.T) { - tracesCh := make(chan ptrace.Traces) - tracesAddr := traceutils.NewTestServer(t, func(t ptrace.Traces) { - tracesCh <- t - }) - - tracesCfgText := util.Untab(fmt.Sprintf(` -configs: -- name: default - receivers: - jaeger: - protocols: - thrift_compact: - remote_write: - - endpoint: %s - insecure: true - batch: - timeout: 100ms - send_batch_size: 1 - `, tracesAddr)) - - var cfg Config - dec := yaml.NewDecoder(strings.NewReader(tracesCfgText)) - dec.SetStrict(true) - err := dec.Decode(&cfg) - require.NoError(t, err) - - var loggingLevel log.Level - require.NoError(t, loggingLevel.Set("debug")) - - traces, err := New(nil, nil, prometheus.NewRegistry(), cfg, &server.HookLogger{}) - require.NoError(t, err) - t.Cleanup(traces.Stop) - - tr := testJaegerTracer(t) - span := tr.StartSpan("test-span") - span.Finish() - - select { - case <-time.After(30 * time.Second): - require.Fail(t, "failed to receive a span after 30 seconds") - case tr := <-tracesCh: - require.Equal(t, 1, tr.SpanCount()) - // Nothing to do, send succeeded. - } -} - -func TestTraceWithSpanmetricsConfig(t *testing.T) { - tracesCfgText := util.Untab(` -configs: -- name: test - receivers: - zipkin: - endpoint: 0.0.0.0:9999 - remote_write: - - endpoint: 0.0.0.0:5555 - insecure: false - tls_config: - insecure_skip_verify: true - spanmetrics: - handler_endpoint: 0.0.0.0:9090 - const_labels: - key1: "value1" - key2: "value2" - `) - - var cfg Config - dec := yaml.NewDecoder(strings.NewReader(tracesCfgText)) - dec.SetStrict(true) - err := dec.Decode(&cfg) - require.NoError(t, err) - - var loggingLevel log.Level - require.NoError(t, loggingLevel.Set("debug")) - - traces, err := New(nil, nil, prometheus.NewRegistry(), cfg, &server.HookLogger{}) - require.NoError(t, err) - t.Cleanup(traces.Stop) -} - -func TestTrace_ApplyConfig(t *testing.T) { - tracesCh := make(chan ptrace.Traces) - tracesAddr := traceutils.NewTestServer(t, func(t ptrace.Traces) { - tracesCh <- t - }) - - tracesCfgText := util.Untab(` -configs: -- name: default - receivers: - jaeger: - protocols: - thrift_compact: - remote_write: - - endpoint: 127.0.0.1:80 # deliberately the wrong endpoint - insecure: true - batch: - timeout: 100ms - send_batch_size: 1 - service_graphs: - enabled: true -`) - - var cfg Config - dec := yaml.NewDecoder(strings.NewReader(tracesCfgText)) - dec.SetStrict(true) - err := dec.Decode(&cfg) - require.NoError(t, err) - - traces, err := New(nil, nil, prometheus.NewRegistry(), cfg, &server.HookLogger{}) - require.NoError(t, err) - t.Cleanup(traces.Stop) - - // Fix the config and apply it before sending spans. - tracesCfgText = util.Untab(fmt.Sprintf(` -configs: -- name: default - receivers: - jaeger: - protocols: - thrift_compact: - remote_write: - - endpoint: %s - insecure: true - batch: - timeout: 100ms - send_batch_size: 1 - `, tracesAddr)) - - var fixedConfig Config - dec = yaml.NewDecoder(strings.NewReader(tracesCfgText)) - dec.SetStrict(true) - err = dec.Decode(&fixedConfig) - require.NoError(t, err) - - err = traces.ApplyConfig(nil, nil, fixedConfig) - require.NoError(t, err) - - tr := testJaegerTracer(t) - span := tr.StartSpan("test-span") - span.Finish() - - select { - case <-time.After(30 * time.Second): - require.Fail(t, "failed to receive a span after 30 seconds") - case tr := <-tracesCh: - require.Equal(t, 1, tr.SpanCount()) - // Nothing to do, send succeeded. - } -} - -func testJaegerTracer(t *testing.T) opentracing.Tracer { - t.Helper() - - jaegerConfig := jaegercfg.Configuration{ - ServiceName: "TestTraces", - Sampler: &jaegercfg.SamplerConfig{ - Type: "const", - Param: 1, - }, - Reporter: &jaegercfg.ReporterConfig{ - LocalAgentHostPort: "127.0.0.1:6831", - LogSpans: true, - }, - } - tr, closer, err := jaegerConfig.NewTracer() - require.NoError(t, err) - t.Cleanup(func() { - require.NoError(t, closer.Close()) - }) - - return tr -} diff --git a/internal/util/log/log.go b/internal/util/log/log.go index 9983946e61..8cd0948d57 100644 --- a/internal/util/log/log.go +++ b/internal/util/log/log.go @@ -7,123 +7,9 @@ package log import ( - "fmt" - "os" - "github.com/go-kit/log" - "github.com/go-kit/log/level" - dskit "github.com/grafana/dskit/log" - "github.com/grafana/dskit/server" - "github.com/prometheus/client_golang/prometheus" ) var ( Logger = log.NewNopLogger() - - logMessages = prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "log_messages_total", - Help: "Total number of log messages.", - }, []string{"level"}) - - supportedLevels = []level.Value{ - level.DebugValue(), - level.InfoValue(), - level.WarnValue(), - level.ErrorValue(), - } ) - -func init() { - prometheus.MustRegister(logMessages) -} - -// InitLogger initialises the global gokit logger (util_log.Logger) and overrides the -// default logger for the server. -func InitLogger(cfg *server.Config) { - l, err := NewPrometheusLogger(cfg.LogLevel, cfg.LogFormat) - if err != nil { - panic(err) - } - - // when use util_log.Logger, skip 3 stack frames. - Logger = log.With(l, "caller", log.Caller(3)) - - // cfg.Log wraps log function, skip 4 stack frames to get caller information. - // this works in go 1.12, but doesn't work in versions earlier. - // it will always shows the wrapper function generated by compiler - // marked in old versions. - cfg.Log = log.With(l, "caller", log.Caller(4)) -} - -// PrometheusLogger exposes Prometheus counters for each of go-kit's log levels. -type PrometheusLogger struct { - logger log.Logger -} - -// NewPrometheusLogger creates a new instance of PrometheusLogger which exposes -// Prometheus counters for various log levels. -func NewPrometheusLogger(l dskit.Level, format string) (log.Logger, error) { - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) - if format == "json" { - logger = log.NewJSONLogger(log.NewSyncWriter(os.Stderr)) - } - logger = level.NewFilter(logger, LevelFilter(l.String())) - - // Initialise counters for all supported levels: - for _, level := range supportedLevels { - logMessages.WithLabelValues(level.String()) - } - - logger = &PrometheusLogger{ - logger: logger, - } - - // return a Logger without caller information, shouldn't use directly - logger = log.With(logger, "ts", log.DefaultTimestampUTC) - return logger, nil -} - -// Log increments the appropriate Prometheus counter depending on the log level. -func (pl *PrometheusLogger) Log(kv ...interface{}) error { - pl.logger.Log(kv...) - l := "unknown" - for i := 1; i < len(kv); i += 2 { - if v, ok := kv[i].(level.Value); ok { - l = v.String() - break - } - } - logMessages.WithLabelValues(l).Inc() - return nil -} - -// CheckFatal prints an error and exits with error code 1 if err is non-nil -func CheckFatal(location string, err error) { - if err != nil { - logger := level.Error(Logger) - if location != "" { - logger = log.With(logger, "msg", "error "+location) - } - // %+v gets the stack trace from errors using github.com/pkg/errors - logger.Log("err", fmt.Sprintf("%+v", err)) - os.Exit(1) - } -} - -// TODO(dannyk): remove once weaveworks/common updates to go-kit/log -// -// -> we can then revert to using Level.Gokit -func LevelFilter(l string) level.Option { - switch l { - case "debug": - return level.AllowDebug() - case "info": - return level.AllowInfo() - case "warn": - return level.AllowWarn() - case "error": - return level.AllowError() - default: - return level.AllowAll() - } -} diff --git a/internal/util/otel_feature_gate.go b/internal/util/otel_feature_gate.go index d2f4797668..643f1e4773 100644 --- a/internal/util/otel_feature_gate.go +++ b/internal/util/otel_feature_gate.go @@ -7,43 +7,7 @@ import ( _ "go.opentelemetry.io/collector/obsreport" ) -// Enables a set of feature gates in Otel's Global Feature Gate Registry. -func EnableOtelFeatureGates(fgNames ...string) error { - fgReg := featuregate.GlobalRegistry() - - for _, fg := range fgNames { - err := fgReg.Set(fg, true) - if err != nil { - return fmt.Errorf("error setting Otel feature gate: %w", err) - } - } - - return nil -} - var ( - // useOtelForInternalMetrics is required so that the Collector service configures Collector components using the Otel SDK - // instead of OpenCensus. If this is not specified, then the OtelMetricViews and OtelMetricReader parameters which we - // pass to service.New() below will not be taken into account. This would mean that metrics from custom components such as - // the one in pkg/traces/servicegraphprocessor would not work. - // - // disableHighCardinalityMetrics is required so that we don't include labels containing ports and IP addresses in gRPC metrics. - // Example metric with high cardinality... - // rpc_server_duration_bucket{net_sock_peer_addr="127.0.0.1",net_sock_peer_port="59947",rpc_grpc_status_code="0",rpc_method="Export",rpc_service="opentelemetry.proto.collector.trace.v1.TraceService",rpc_system="grpc",traces_config="default",le="7500"} 294 - // ... the same metric when disableHighCardinalityMetrics is switched on looks like this: - // rpc_server_duration_bucket{rpc_grpc_status_code="0",rpc_method="Export",rpc_service="opentelemetry.proto.collector.trace.v1.TraceService",rpc_system="grpc",traces_config="default",le="7500"} 32 - // For more context: - // https://opentelemetry.io/docs/specs/otel/metrics/semantic_conventions/rpc-metrics/ - // https://github.com/open-telemetry/opentelemetry-go-contrib/pull/2700 - // https://github.com/open-telemetry/opentelemetry-collector/pull/6788/files - // - // TODO: Remove "telemetry.useOtelForInternalMetrics" when Collector components - // use OpenTelemetry metrics by default. - staticModeOtelFeatureGates = []string{ - "telemetry.useOtelForInternalMetrics", - "telemetry.disableHighCardinalityMetrics", - } - // Enable the "telemetry.useOtelForInternalMetrics" Collector feature gate. // Currently, Collector components uses OpenCensus metrics by default. // Those metrics cannot be integrated with Agent Flow, @@ -56,12 +20,21 @@ var ( } ) -// Enables a set of feature gates which should always be enabled for Static mode. -func SetupStaticModeOtelFeatureGates() error { - return EnableOtelFeatureGates(staticModeOtelFeatureGates...) -} - // Enables a set of feature gates which should always be enabled for Flow mode. func SetupFlowModeOtelFeatureGates() error { return EnableOtelFeatureGates(flowModeOtelFeatureGates...) } + +// Enables a set of feature gates in Otel's Global Feature Gate Registry. +func EnableOtelFeatureGates(fgNames ...string) error { + fgReg := featuregate.GlobalRegistry() + + for _, fg := range fgNames { + err := fgReg.Set(fg, true) + if err != nil { + return fmt.Errorf("error setting Otel feature gate: %w", err) + } + } + + return nil +} diff --git a/internal/util/otel_feature_gate_test.go b/internal/util/otel_feature_gate_test.go index d4b49ea92c..e3809de8cb 100644 --- a/internal/util/otel_feature_gate_test.go +++ b/internal/util/otel_feature_gate_test.go @@ -15,9 +15,6 @@ func Test_FeatureGates(t *testing.T) { fgSet := make(map[string]struct{}) - for _, fg := range staticModeOtelFeatureGates { - fgSet[fg] = struct{}{} - } for _, fg := range flowModeOtelFeatureGates { fgSet[fg] = struct{}{} } @@ -34,7 +31,6 @@ func Test_FeatureGates(t *testing.T) { require.Falsef(t, g.IsEnabled(), "feature gate %s is enabled - should it be removed from the Agent?", g.ID()) }) - require.NoError(t, SetupStaticModeOtelFeatureGates()) require.NoError(t, SetupFlowModeOtelFeatureGates()) reg.VisitAll(func(g *featuregate.Gate) { diff --git a/internal/util/sanitize.go b/internal/util/sanitize.go deleted file mode 100644 index f47595b3aa..0000000000 --- a/internal/util/sanitize.go +++ /dev/null @@ -1,10 +0,0 @@ -package util - -import "regexp" - -var invalidLabelCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`) - -// SanitizeLabelName sanitizes a label name for Prometheus. -func SanitizeLabelName(name string) string { - return invalidLabelCharRE.ReplaceAllString(name, "_") -} diff --git a/internal/util/structwalk/structwalk.go b/internal/util/structwalk/structwalk.go deleted file mode 100644 index a1cce56948..0000000000 --- a/internal/util/structwalk/structwalk.go +++ /dev/null @@ -1,77 +0,0 @@ -// Package structwalk allows you to "walk" the hierarchy of a struct. It is -// very similar to github.com/mitchellh/reflectwalk but allows you to change -// the visitor mid-walk. -package structwalk - -import ( - "reflect" - - "github.com/mitchellh/reflectwalk" -) - -// Walk traverses the hierarchy of o in depth-first order. It starts by calling -// v.Visit(o). If the visitor w returned by v.Visit(o) is not nil, Walk is -// invoked recursively with visitor w for each of the structs inside of o, -// followed by a call to w.Visit(nil). -// -// o must be non-nil. -func Walk(v Visitor, o interface{}) { - sw := structWalker{v: v} - _ = reflectwalk.Walk(o, &sw) -} - -// Visitor will have its Visit method invoked for each struct value encountered -// by Walk. If w returned from Visit is non-nil, Walk will then visit each child -// of value with w. The final call after visiting all children will be to -// w.Visit(nil). -type Visitor interface { - Visit(value interface{}) (w Visitor) -} - -type structWalker struct { - cur interface{} - v Visitor -} - -// Struct invoke the Visitor for v and its children. -func (sw *structWalker) Struct(v reflect.Value) error { - // structWalker will walk absolutely all fields, even unexported fields or - // types. We can only interface exported fields, so we need to abort early - // for anything that's not supported. - if !v.CanInterface() { - return nil - } - - // Get the interface to the value. reflectwalk will fully derefernce all - // structs, so if it's possible for us to get address it into a pointer, - // we will use that for visiting. - var ( - rawValue = v.Interface() - ptrValue = rawValue - ) - if v.Kind() != reflect.Ptr && v.CanAddr() { - ptrValue = v.Addr().Interface() - } - - // Struct will recursively call reflectwalk.Walk with a new walker, which - // means that sw.Struct will be called twice for the same value. We want - // to ignore calls to Struct with the same value so we don't recurse - // infinitely. - if sw.cur != nil && reflect.DeepEqual(rawValue, sw.cur) { - return nil - } - - // Visit our struct and create a new walker with the returned Visitor. - w := sw.v.Visit(ptrValue) - if w == nil { - return reflectwalk.SkipEntry - } - _ = reflectwalk.Walk(rawValue, &structWalker{cur: rawValue, v: w}) - w.Visit(nil) - - return reflectwalk.SkipEntry -} - -func (sw *structWalker) StructField(reflect.StructField, reflect.Value) error { - return nil -} diff --git a/internal/util/structwalk/structwalk_test.go b/internal/util/structwalk/structwalk_test.go deleted file mode 100644 index 44d1263f22..0000000000 --- a/internal/util/structwalk/structwalk_test.go +++ /dev/null @@ -1,63 +0,0 @@ -package structwalk - -import ( - "testing" - - "github.com/stretchr/testify/require" -) - -type LevelA struct { - Field1 bool - Field2 string - Field3 int - Nested LevelB -} - -type LevelB struct { - Level1 bool - Level2 string - Field3 int - Nested LevelC -} - -type LevelC struct { - Level1 bool - Level2 string - Field3 int -} - -func TestWalk(t *testing.T) { - var ( - iteration int - fv FuncVisitor - ) - fv = func(val interface{}) Visitor { - iteration++ - - // After visiting all 3 structs, should receive a w.Visit(nil) for each level - if iteration >= 4 { - require.Nil(t, val) - return nil - } - - switch iteration { - case 1: - require.IsType(t, LevelA{}, val) - case 2: - require.IsType(t, LevelB{}, val) - case 3: - require.IsType(t, LevelC{}, val) - default: - require.FailNow(t, "unexpected iteration") - } - - return fv - } - - var val LevelA - Walk(fv, val) -} - -type FuncVisitor func(v interface{}) Visitor - -func (fv FuncVisitor) Visit(v interface{}) Visitor { return fv(v) } diff --git a/internal/util/subset/subset.go b/internal/util/subset/subset.go deleted file mode 100644 index 6f6561b2ed..0000000000 --- a/internal/util/subset/subset.go +++ /dev/null @@ -1,120 +0,0 @@ -// Package subset implements functions to check if one value is a subset of -// another. -package subset - -import ( - "fmt" - "reflect" - - "gopkg.in/yaml.v2" -) - -// Assert checks whether target is a subset of source. source and target must -// be the same type. target is a subset of source when: -// -// - If target and source are slices or arrays, then target must have the same -// number of elements as source. Each element in target must be a subset of -// the corresponding element from source. -// -// - If target and source are maps, each key in source must exist in target. -// The value for each element in target must be a subset of the corresponding -// element from source. -// -// - Otherwise, target and source must be deeply equal. -// -// An instance of Error will be returned when target is not a subset of source. -// -// Subset checking is primarily useful when doing things like YAML assertions, -// where you only want to ensure that a subset of YAML is defined as expected. -func Assert(source, target interface{}) error { - return assert(reflect.ValueOf(source), reflect.ValueOf(target)) -} - -func assert(source, target reflect.Value) error { - // Deference interface/pointers for direct comparison - for canElem(source) { - source = source.Elem() - } - for canElem(target) { - target = target.Elem() - } - - if source.Type() != target.Type() { - return &Error{Message: fmt.Sprintf("type mismatch: %T != %T", source.Interface(), target.Interface())} - } - - switch source.Kind() { - case reflect.Slice, reflect.Array: - if source.Len() != target.Len() { - return &Error{Message: fmt.Sprintf("length mismatch: %d != %d", source.Len(), target.Len())} - } - for i := 0; i < source.Len(); i++ { - if err := assert(source.Index(i), target.Index(i)); err != nil { - return &Error{ - Message: fmt.Sprintf("element %d", i), - Inner: err, - } - } - } - return nil - - case reflect.Map: - iter := source.MapRange() - for iter.Next() { - var ( - sourceElement = iter.Value() - targetElement = target.MapIndex(iter.Key()) - ) - if !targetElement.IsValid() { - return &Error{Message: fmt.Sprintf("missing key %v", iter.Key().Interface())} - } - if err := assert(sourceElement, targetElement); err != nil { - return &Error{ - Message: fmt.Sprintf("%v", iter.Key().Interface()), - Inner: err, - } - } - } - return nil - - default: - if !reflect.DeepEqual(source.Interface(), target.Interface()) { - return &Error{Message: fmt.Sprintf("%v != %v", source, target)} - } - return nil - } -} - -func canElem(v reflect.Value) bool { - return v.Kind() == reflect.Interface || v.Kind() == reflect.Ptr -} - -// Error is a subset assertion error. -type Error struct { - Message string // Message of the error - Inner error // Optional inner error -} - -// Error implements error. -func (e *Error) Error() string { - if e.Inner == nil { - return e.Message - } - return fmt.Sprintf("%s: %s", e.Message, e.Inner) -} - -// Unwrap returns the inner error, if set. -func (e *Error) Unwrap() error { return e.Inner } - -// YAMLAssert is like Assert but accepts YAML bytes as input. -func YAMLAssert(source, target []byte) error { - var sourceValue interface{} - if err := yaml.Unmarshal(source, &sourceValue); err != nil { - return err - } - var targetValue interface{} - if err := yaml.Unmarshal(target, &targetValue); err != nil { - return err - } - return Assert(sourceValue, targetValue) -} diff --git a/internal/util/subset/subset_test.go b/internal/util/subset/subset_test.go deleted file mode 100644 index a44441dd26..0000000000 --- a/internal/util/subset/subset_test.go +++ /dev/null @@ -1,92 +0,0 @@ -package subset - -import ( - "testing" - - "github.com/stretchr/testify/require" -) - -func TestAssert(t *testing.T) { - tt := []struct { - name string - source, target string - expect string - }{ - // Plain values - { - name: "values match", - source: `true`, - target: `true`, - expect: "", - }, - { - name: "values mismatch", - source: `true`, - target: `false`, - expect: "true != false", - }, - { - name: "type mismatch", - source: `true`, - target: `5`, - expect: "type mismatch: bool != int", - }, - - // Arrays - { - name: "arrays match", - source: `[1, 2, 3]`, - target: `[1, 2, 3]`, - expect: "", - }, - { - name: "arrays mismatch", - source: `[1, 2, 3]`, - target: `[1, 2, 4]`, - expect: "element 2: 3 != 4", - }, - { - name: "array element type mismatch", - source: `[1, 2, 3]`, - target: `[1, 2, true]`, - expect: "element 2: type mismatch: int != bool", - }, - - // Maps - { - name: "maps match", - source: `{"hello": "world"}`, - target: `{"hello": "world"}`, - expect: "", - }, - { - name: "maps mismatch", - source: `{"hello": "world", "year": 2000}`, - target: `{"hello": "world", "year": 2001}`, - expect: "year: 2000 != 2001", - }, - { - name: "maps subset", - source: `{"hello": "world"}`, - target: `{"hello": "world", "year": 2001}`, - expect: "", - }, - { - name: "maps type mismatch", - source: `{"hello": "world", "year": 2000}`, - target: `{"hello": "world", "year": "yes"}`, - expect: "year: type mismatch: int != string", - }, - } - - for _, tc := range tt { - t.Run(tc.name, func(t *testing.T) { - err := YAMLAssert([]byte(tc.source), []byte(tc.target)) - if tc.expect == "" { - require.NoError(t, err) - } else { - require.EqualError(t, err, tc.expect) - } - }) - } -} diff --git a/internal/util/unregisterer.go b/internal/util/unregisterer.go deleted file mode 100644 index 822132b017..0000000000 --- a/internal/util/unregisterer.go +++ /dev/null @@ -1,63 +0,0 @@ -package util - -import "github.com/prometheus/client_golang/prometheus" - -// Unregisterer is a Prometheus Registerer that can unregister all collectors -// passed to it. -type Unregisterer struct { - wrap prometheus.Registerer - cs map[prometheus.Collector]struct{} -} - -// WrapWithUnregisterer wraps a prometheus Registerer with capabilities to -// unregister all collectors. -func WrapWithUnregisterer(reg prometheus.Registerer) *Unregisterer { - return &Unregisterer{ - wrap: reg, - cs: make(map[prometheus.Collector]struct{}), - } -} - -// Register implements prometheus.Registerer. -func (u *Unregisterer) Register(c prometheus.Collector) error { - if u.wrap == nil { - return nil - } - - err := u.wrap.Register(c) - if err != nil { - return err - } - u.cs[c] = struct{}{} - return nil -} - -// MustRegister implements prometheus.Registerer. -func (u *Unregisterer) MustRegister(cs ...prometheus.Collector) { - for _, c := range cs { - if err := u.Register(c); err != nil { - panic(err) - } - } -} - -// Unregister implements prometheus.Registerer. -func (u *Unregisterer) Unregister(c prometheus.Collector) bool { - if u.wrap != nil && u.wrap.Unregister(c) { - delete(u.cs, c) - return true - } - return false -} - -// UnregisterAll unregisters all collectors that were registered through the -// Registerer. -func (u *Unregisterer) UnregisterAll() bool { - success := true - for c := range u.cs { - if !u.Unregister(c) { - success = false - } - } - return success -}