From 497212fc86f855647ee11762986a39c93d570f51 Mon Sep 17 00:00:00 2001 From: Tiffany Hrabusa <30397949+tiffany76@users.noreply.github.com> Date: Wed, 17 Apr 2024 08:37:42 -0400 Subject: [PATCH] Unify internal observability documentation - 1 of 3 (#4246) --- .../en/docs/collector/internal-telemetry.md | 115 ++++++++++++++++++ content/en/docs/collector/troubleshooting.md | 6 + static/refcache.json | 8 ++ 3 files changed, 129 insertions(+) create mode 100644 content/en/docs/collector/internal-telemetry.md diff --git a/content/en/docs/collector/internal-telemetry.md b/content/en/docs/collector/internal-telemetry.md new file mode 100644 index 000000000000..9405568798a1 --- /dev/null +++ b/content/en/docs/collector/internal-telemetry.md @@ -0,0 +1,115 @@ +--- +title: Internal telemetry +weight: 25 +cSpell:ignore: journalctl kube otecol pprof tracez zpages +--- + +You can monitor the health of any OpenTelemetry Collector instance by checking +its own internal telemetry. Read on to learn how to configure this telemetry to +help you [troubleshoot](/docs/collector/troubleshooting/) Collector issues. + +## Activate internal telemetry in the Collector + +By default, the Collector exposes its own telemetry in two ways: + +- Internal [metrics](#configure-internal-metrics) are exposed using a Prometheus + interface which defaults to port `8888`. +- [Logs](#configure-internal-logs) are emitted to `stderr` by default. + +### Configure internal metrics + +You can configure how internal metrics are generated and exposed by the +Collector. By default, the Collector generates basic metrics about itself and +exposes them for scraping at `http://127.0.0.1:8888/metrics`. You can expose the +endpoint to one specific or all network interfaces when needed. For +containerized environments, you might want to expose this port on a public +interface. + +Set the address in the config `service::telemetry::metrics`: + +```yaml +service: + telemetry: + metrics: + address: '0.0.0.0:8888' +``` + +You can enhance the metrics telemetry level using the `level` field. The +following is a list of all possible values and their explanations. + +- `none` indicates that no telemetry data should be collected. +- `basic` is the recommended value and covers the basics of the service + telemetry. +- `normal` adds other indicators on top of basic. +- `detailed` adds dimensions and views to the previous levels. + +For example: + +```yaml +service: + telemetry: + metrics: + level: detailed + address: ':8888' +``` + +The Collector can also be configured to scrape its own metrics and send them +through configured pipelines. For example: + +```yaml +receivers: + prometheus: + config: + scrape_configs: + - job_name: 'otelcol' + scrape_interval: 10s + static_configs: + - targets: ['0.0.0.0:8888'] + metric_relabel_configs: + - source_labels: [__name__] + regex: '.*grpc_io.*' + action: drop +exporters: + debug: +service: + pipelines: + metrics: + receivers: [prometheus] + exporters: [debug] +``` + +{{% alert title="Caution" color="warning" %}} + +Self-monitoring is a risky practice. If an issue arises, the source of the +problem is unclear and the telemetry is unreliable. + +{{% /alert %}} + +### Configure internal logs + +You can find log output in `stderr`. The verbosity level for logs defaults to +`INFO`, but you can adjust it in the config `service::telemetry::logs`: + +```yaml +service: + telemetry: + logs: + level: 'debug' +``` + +You can also see logs for the Collector on a Linux systemd system using +`journalctl`: + +{{< tabpane text=true >}} {{% tab "All logs" %}} + +```sh +journalctl | grep otelcol +``` + +{{% /tab %}} {{% tab "Errors only" %}} + +```sh +journalctl | grep otelcol | grep Error +``` + +{{% /tab %}} {{< /tabpane >}} diff --git a/content/en/docs/collector/troubleshooting.md b/content/en/docs/collector/troubleshooting.md index d83b7cdb821e..8278d00b678b 100644 --- a/content/en/docs/collector/troubleshooting.md +++ b/content/en/docs/collector/troubleshooting.md @@ -8,6 +8,12 @@ This page describes some options when troubleshooting the health or performance of the OpenTelemetry Collector. The Collector provides a variety of metrics, logs, and extensions for debugging issues. +## Internal telemetry + +You can configure and use the Collector's own +[internal telemetry](/docs/collector/internal-telemetry/) to monitor its +performance. + ## Sending test data For certain types of issues, particularly verifying configuration and debugging diff --git a/static/refcache.json b/static/refcache.json index 50fe2909b266..b7577c80c695 100644 --- a/static/refcache.json +++ b/static/refcache.json @@ -3079,6 +3079,10 @@ "StatusCode": 200, "LastSeen": "2024-01-18T19:36:56.082576-05:00" }, + "https://github.com/open-telemetry/opentelemetry-collector/issues/7532": { + "StatusCode": 200, + "LastSeen": "2024-04-04T11:07:15.276911438-07:00" + }, "https://github.com/open-telemetry/opentelemetry-collector/pull/6140": { "StatusCode": 200, "LastSeen": "2024-01-30T05:18:24.402543-05:00" @@ -4523,6 +4527,10 @@ "StatusCode": 200, "LastSeen": "2024-04-12T20:40:33.435682362Z" }, + "https://grafana.com/grafana/dashboards/15983-opentelemetry-collector/": { + "StatusCode": 200, + "LastSeen": "2024-04-10T15:11:30.311778613-07:00" + }, "https://grafana.com/oss/opentelemetry/": { "StatusCode": 200, "LastSeen": "2024-01-18T08:52:48.999991-05:00"