From 7f24b9acd189e12e1289d47f7dc6fe0dfffbcbcc Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 27 Jan 2025 08:05:05 -0800 Subject: [PATCH] [CI] Support multiple jobs in metrics container (#124457) This patch makes it so that the metrics script can support multiple jobs in a single workflow. This is needed so that we do not crash on an assertion now that the windows job has been enabled within the premerge workflow. --- .ci/metrics/metrics.py | 79 ++++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 41 deletions(-) diff --git a/.ci/metrics/metrics.py b/.ci/metrics/metrics.py index 48d2aa2f330ec..70b787665a8b9 100644 --- a/.ci/metrics/metrics.py +++ b/.ci/metrics/metrics.py @@ -130,34 +130,6 @@ def get_per_workflow_metrics( workflow_jobs = workflow_run.jobs() if workflow_jobs.totalCount == 0: continue - if workflow_jobs.totalCount > 1: - raise ValueError( - f"Encountered an unexpected number of jobs: {workflow_jobs.totalCount}" - ) - - created_at = workflow_jobs[0].created_at - started_at = workflow_jobs[0].started_at - completed_at = workflow_jobs[0].completed_at - - job_result = int(workflow_jobs[0].conclusion == "success") - if job_result: - # We still might want to mark the job as a failure if one of the steps - # failed. This is required due to use setting continue-on-error in - # the premerge pipeline to prevent sending emails while we are - # testing the infrastructure. - # TODO(boomanaiden154): Remove this once the premerge pipeline is no - # longer in a testing state and we can directly assert the workflow - # result. - for step in workflow_jobs[0].steps: - if step.conclusion != "success": - job_result = 0 - break - - queue_time = started_at - created_at - run_time = completed_at - started_at - - if run_time.seconds == 0: - continue if ( workflows_to_track[workflow_run.name] is None @@ -170,20 +142,45 @@ def get_per_workflow_metrics( ): break - # The timestamp associated with the event is expected by Grafana to be - # in nanoseconds. - created_at_ns = int(created_at.timestamp()) * 10**9 - - workflow_metrics.append( - JobMetrics( - workflow_run.name, - queue_time.seconds, - run_time.seconds, - job_result, - created_at_ns, - workflow_run.id, + for workflow_job in workflow_jobs: + created_at = workflow_job.created_at + started_at = workflow_job.started_at + completed_at = workflow_job.completed_at + + job_result = int(workflow_job.conclusion == "success") + if job_result: + # We still might want to mark the job as a failure if one of the steps + # failed. This is required due to use setting continue-on-error in + # the premerge pipeline to prevent sending emails while we are + # testing the infrastructure. + # TODO(boomanaiden154): Remove this once the premerge pipeline is no + # longer in a testing state and we can directly assert the workflow + # result. + for step in workflow_job.steps: + if step.conclusion != "success": + job_result = 0 + break + + queue_time = started_at - created_at + run_time = completed_at - started_at + + if run_time.seconds == 0: + continue + + # The timestamp associated with the event is expected by Grafana to be + # in nanoseconds. + created_at_ns = int(created_at.timestamp()) * 10**9 + + workflow_metrics.append( + JobMetrics( + workflow_run.name + "-" + workflow_job.name, + queue_time.seconds, + run_time.seconds, + job_result, + created_at_ns, + workflow_run.id, + ) ) - ) return workflow_metrics