From 1af03eb55ce51a376c3df2dc0cdf3c81738b2dd6 Mon Sep 17 00:00:00 2001 From: Mike Sarahan Date: Tue, 17 Dec 2024 14:45:48 -0600 Subject: [PATCH] remove certs and simplify telemetry summarize (#1750) The goal here is to remove the need for certificates. Any worker that is not in our VPC can talk directly to fluentbit, and fluentbit will be configured with certificates to talk to Tempo. The implementation implication is that we need to run telemetry stuff ONLY on nodes in our VPC. To avoid needing to move all jobs to these nodes, we instead temporarily store telemetry data as artifacts, and in one final job, we process and send telemetry info for all jobs from one job. Part of https://github.com/rapidsai/shared-workflows/pull/269 and https://github.com/rapidsai/shared-actions/pull/28 Authors: - Mike Sarahan (https://github.com/msarahan) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/rmm/pull/1750 --- .github/workflows/pr.yaml | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 3ecd52c7e..e71c8cf64 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -19,11 +19,11 @@ jobs: - conda-python-build - conda-python-tests - docs-build - - telemetry-setup - wheel-build-cpp - wheel-build-python - wheel-tests - devcontainer + - telemetry-setup secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.02 if: always() @@ -33,9 +33,11 @@ jobs: runs-on: ubuntu-latest continue-on-error: true env: - OTEL_SERVICE_NAME: "pr-rmm" + OTEL_SERVICE_NAME: "pr-rmm" steps: - name: Telemetry setup + # This gate is here and not at the job level because we need the job to not be skipped, + # since other jobs depend on it. if: ${{ vars.TELEMETRY_ENABLED == 'true' }} uses: rapidsai/shared-actions/telemetry-dispatch-stash-base-env-vars@main changed-files: @@ -141,16 +143,11 @@ jobs: sccache -s; telemetry-summarize: - runs-on: ubuntu-latest + # This job must use a self-hosted runner to record telemetry traces. + runs-on: linux-amd64-cpu4 needs: pr-builder if: ${{ vars.TELEMETRY_ENABLED == 'true' && !cancelled() }} continue-on-error: true steps: - - name: Load stashed telemetry env vars - uses: rapidsai/shared-actions/telemetry-dispatch-load-base-env-vars@main - with: - load_service_name: true - name: Telemetry summarize - uses: rapidsai/shared-actions/telemetry-dispatch-write-summary@main - with: - cert_concat: "${{ secrets.OTEL_EXPORTER_OTLP_CA_CERTIFICATE }};${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE }};${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_KEY }}" + uses: rapidsai/shared-actions/telemetry-dispatch-summarize@main