From 89902d883256803b948b75737d6860f50196d134 Mon Sep 17 00:00:00 2001 From: Ivan Razumov Date: Wed, 12 Feb 2025 13:16:13 +0100 Subject: [PATCH 01/11] Split auto-killing of stuck tests from rocm-tests PR --- parse_jenkins_builds.py | 62 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/parse_jenkins_builds.py b/parse_jenkins_builds.py index 4d267eb04663..674b9d661604 100755 --- a/parse_jenkins_builds.py +++ b/parse_jenkins_builds.py @@ -5,6 +5,7 @@ import subprocess from es_utils import send_payload, get_payload, resend_payload, get_payload_wscroll from cmsutils import epoch2week +import urllib.request JENKINS_PREFIX = "jenkins" try: @@ -169,6 +170,67 @@ def grep(filename, pattern, verbose=False): payload["wait_time"] = current_time - queue_time payload["start_time"] = 0 + kill_index = 0 + + # Abort stuck rocm jobs + if ( + job_name in ("ib-run-pr-unittests", "ib-run-pr-relvals", "ib-run-baseline") + and reason.endswith("-offline") + and (payload["wait_time"] / 1000 / 60 > 60) + ): + params = element["params"].strip().split("\n") + main_params = "" + other_params = [] + context = "" + upload_unique_id = "" + pull_request = "" + commit = "" + + for _ in params: + k, v = _.split("=") + if k == "PULL_REQUEST": + main_params = _ + pull_request = v + else: + if k == "CONTEXT_PREFIX": + context = v + if k == "UPLOAD_UNIQ_ID": + upload_unique_id = v + + other_params.append(_) + + if "GPU_FLAVOR=rocm" in other_params or "TEST_FLAVOR=rocm" in other_params: + with open("abort-{0}.prop".format(kill_index), "w") as f: + f.write("JENKINS_PROJECT_TO_KILL={0}\n".format(job_name)) + f.write("JENKINS_PROJECT_PARAMS={0}\n".format(main_params)) + f.write("EXTRA_PARAMS={0}\n".format(";".join(other_params))) + + kill_index += 1 + + repository, pr = pull_request.split("#", 1) + + if upload_unique_id: + with urllib.request.urlopen( + "http://localhost/SDT/jenkins-artifacts/pull-request-integration/{0}/prs_commits.txt".format( + upload_unique_id + ) + ) as f: + commits = f.read().decode("ascii", "ignore").splitlines() + + for line in commits: + if line.startswith(pull_request): + commit = _.split("='", 1)[1] + break + + with open("commit-status-{0}.prop", "w") as f: + f.write("REPOSITORY={0}\n".format(repository)) + f.write("PULL_REQUEST={0}\n".format(commit)) + f.write("CONTEXT={0}\n".format(context)) + f.write("STATUS=error\n") + f.write("STATUS_MESSAGE=Timed out waiting for ROCm node\n") + + continue + unique_id = ( JENKINS_PREFIX + ":/build/builds/" + job_name + "/" + str(queue_id) ) # Not a real path From 07cca151debe53d9d28f965684cc5747ac7b5f23 Mon Sep 17 00:00:00 2001 From: iarspider Date: Thu, 13 Feb 2025 11:51:18 +0100 Subject: [PATCH 02/11] Update parse_jenkins_builds.py --- parse_jenkins_builds.py | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/parse_jenkins_builds.py b/parse_jenkins_builds.py index 674b9d661604..ec11b288808e 100755 --- a/parse_jenkins_builds.py +++ b/parse_jenkins_builds.py @@ -201,34 +201,14 @@ def grep(filename, pattern, verbose=False): if "GPU_FLAVOR=rocm" in other_params or "TEST_FLAVOR=rocm" in other_params: with open("abort-{0}.prop".format(kill_index), "w") as f: + f.write("UPLOAD_UNIQ_ID={0}\n".format(upload_unique_id)) + f.write("PULL_REQUEST={0}\n".format(pull_request)) + f.write("CONTEXT={0}\n".format(context)) f.write("JENKINS_PROJECT_TO_KILL={0}\n".format(job_name)) f.write("JENKINS_PROJECT_PARAMS={0}\n".format(main_params)) f.write("EXTRA_PARAMS={0}\n".format(";".join(other_params))) kill_index += 1 - - repository, pr = pull_request.split("#", 1) - - if upload_unique_id: - with urllib.request.urlopen( - "http://localhost/SDT/jenkins-artifacts/pull-request-integration/{0}/prs_commits.txt".format( - upload_unique_id - ) - ) as f: - commits = f.read().decode("ascii", "ignore").splitlines() - - for line in commits: - if line.startswith(pull_request): - commit = _.split("='", 1)[1] - break - - with open("commit-status-{0}.prop", "w") as f: - f.write("REPOSITORY={0}\n".format(repository)) - f.write("PULL_REQUEST={0}\n".format(commit)) - f.write("CONTEXT={0}\n".format(context)) - f.write("STATUS=error\n") - f.write("STATUS_MESSAGE=Timed out waiting for ROCm node\n") - continue unique_id = ( From 1b2805689d37f480796455742cc2aa3c60be3d40 Mon Sep 17 00:00:00 2001 From: Ivan Razumov Date: Thu, 13 Feb 2025 16:23:15 +0100 Subject: [PATCH 03/11] Add config --- parse_jenkins_builds.json | 5 +++++ parse_jenkins_builds.py | 31 ++++++++++++++++--------------- 2 files changed, 21 insertions(+), 15 deletions(-) create mode 100644 parse_jenkins_builds.json diff --git a/parse_jenkins_builds.json b/parse_jenkins_builds.json new file mode 100644 index 000000000000..848f89cd53e4 --- /dev/null +++ b/parse_jenkins_builds.json @@ -0,0 +1,5 @@ +{ +"whitelist": ["ib-run-pr-unittests", "ib-run-pr-relvals", "ib-run-baseline"], +"timeout": 3600, +"custom": {} +} diff --git a/parse_jenkins_builds.py b/parse_jenkins_builds.py index ec11b288808e..813f068aabdb 100755 --- a/parse_jenkins_builds.py +++ b/parse_jenkins_builds.py @@ -5,7 +5,7 @@ import subprocess from es_utils import send_payload, get_payload, resend_payload, get_payload_wscroll from cmsutils import epoch2week -import urllib.request +import json JENKINS_PREFIX = "jenkins" try: @@ -172,11 +172,14 @@ def grep(filename, pattern, verbose=False): kill_index = 0 - # Abort stuck rocm jobs + with open("parse_jenkins_builds.json") as f: + config = json.load(f) + + # Abort stuck jobs if ( - job_name in ("ib-run-pr-unittests", "ib-run-pr-relvals", "ib-run-baseline") + job_name in config["whitelist"] and reason.endswith("-offline") - and (payload["wait_time"] / 1000 / 60 > 60) + and (payload["wait_time"] / 1000 > config["custom"].get(job_name, config["timeout"])) ): params = element["params"].strip().split("\n") main_params = "" @@ -199,17 +202,15 @@ def grep(filename, pattern, verbose=False): other_params.append(_) - if "GPU_FLAVOR=rocm" in other_params or "TEST_FLAVOR=rocm" in other_params: - with open("abort-{0}.prop".format(kill_index), "w") as f: - f.write("UPLOAD_UNIQ_ID={0}\n".format(upload_unique_id)) - f.write("PULL_REQUEST={0}\n".format(pull_request)) - f.write("CONTEXT={0}\n".format(context)) - f.write("JENKINS_PROJECT_TO_KILL={0}\n".format(job_name)) - f.write("JENKINS_PROJECT_PARAMS={0}\n".format(main_params)) - f.write("EXTRA_PARAMS={0}\n".format(";".join(other_params))) - - kill_index += 1 - continue + with open("abort-{0}.prop".format(kill_index), "w") as f: + f.write("UPLOAD_UNIQ_ID={0}\n".format(upload_unique_id)) + f.write("PULL_REQUEST={0}\n".format(pull_request)) + f.write("CONTEXT={0}\n".format(context)) + f.write("JENKINS_PROJECT_TO_KILL={0}\n".format(job_name)) + f.write("JENKINS_PROJECT_PARAMS={0}\n".format(main_params)) + f.write("EXTRA_PARAMS={0}\n".format(";".join(other_params))) + + kill_index += 1 unique_id = ( JENKINS_PREFIX + ":/build/builds/" + job_name + "/" + str(queue_id) From 482c1734c36086a601b47ed0e7f237bfc2591f64 Mon Sep 17 00:00:00 2001 From: Ivan Razumov Date: Mon, 17 Feb 2025 15:21:42 +0100 Subject: [PATCH 04/11] Collect commit statuses to update --- common/github_reports.sh | 3 ++- update-commit-statues-matching.py | 43 +++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) create mode 100755 update-commit-statues-matching.py diff --git a/common/github_reports.sh b/common/github_reports.sh index 90424159683f..e5ae1ca44ae6 100644 --- a/common/github_reports.sh +++ b/common/github_reports.sh @@ -22,7 +22,8 @@ function mark_commit_status_all_prs () { if [ $(echo $CMSSW_QUEUE | grep '_X' | wc -l) -gt 0 ] ; then CMSSW_FLAVOR=$(echo $CMSSW_QUEUE | cut -d_ -f4) fi - if [ "${CMSSW_FLAVOR}" != "X" ] ; then CONTEXT="${CMSSW_FLAVOR}/${CONTEXT}" ; fi + if [ "${CMSSW_FLAVOR}" != "X" ] ; then CMSSW_FLAVOR="default" ; fi + CONTEXT="${CMSSW_FLAVOR}/${CONTEXT}" if [ "$1" != "" ] ; then CONTEXT="${CONTEXT}/$1" ; fi else CONTEXT="${COMMIT_STATUS_CONTEXT}" diff --git a/update-commit-statues-matching.py b/update-commit-statues-matching.py new file mode 100755 index 000000000000..c1553dded102 --- /dev/null +++ b/update-commit-statues-matching.py @@ -0,0 +1,43 @@ +import github_utils +import argparse + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--repository", "-r") + parser.add_argument("--commit", "-c") + parser.add_argument("--architecture", "-a") + parser.add_argument("--queue", "-u", required=False) + parser.add_argument("--prefix", "-p") + args = parser.parse_args() + + status_suffix = args.architecture + if args.queue: + flavor = args.queue.split("_")[3] + if flavor == "X": + flavor = "default" + + status_suffix = flavor + "/" + status_suffix + + all_statuses = github_utils.get_combined_statuses(args.commit, args.repository).get( + "statuses", [] + ) + index = 0 + + for status in all_statuses: + if ( + status["context"].startswith(args.prefix + "/" + status_suffix) + and status["state"] == "pending" + ): + with open("update-pr-status-{0}.prop".format(index), "w") as f: + f.write("REPOSITORY={0}\n".format(args.repository)) + f.write("PULL_REQUEST={0}\n".format(args.commit)) + f.write("CONTEXT={0}\n".format(status["context"])) + f.write("STATUS=error\n") + f.write("STATUS_MESSAGE=Stuck due to all nodes being offline\n") + + index = index + 1 + + +if __name__ == "__main__": + main() From f62842a9d1c89c8e02d774e478ad143249488210 Mon Sep 17 00:00:00 2001 From: Ivan Razumov Date: Mon, 17 Feb 2025 16:36:47 +0100 Subject: [PATCH 05/11] Fix style --- update-commit-statues-matching.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/update-commit-statues-matching.py b/update-commit-statues-matching.py index c1553dded102..2141c7f87b94 100755 --- a/update-commit-statues-matching.py +++ b/update-commit-statues-matching.py @@ -16,7 +16,7 @@ def main(): flavor = args.queue.split("_")[3] if flavor == "X": flavor = "default" - + status_suffix = flavor + "/" + status_suffix all_statuses = github_utils.get_combined_statuses(args.commit, args.repository).get( From 8466b861314d31e276a532e51af94973167509fe Mon Sep 17 00:00:00 2001 From: Ivan Razumov Date: Tue, 18 Feb 2025 12:14:00 +0100 Subject: [PATCH 06/11] Simplify update-commit-statues-matching --- parse_jenkins_builds.py | 8 ++++++-- update-commit-statues-matching.py | 15 +++------------ 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/parse_jenkins_builds.py b/parse_jenkins_builds.py index 813f068aabdb..cf0edc358074 100755 --- a/parse_jenkins_builds.py +++ b/parse_jenkins_builds.py @@ -188,6 +188,7 @@ def grep(filename, pattern, verbose=False): upload_unique_id = "" pull_request = "" commit = "" + release = "" for _ in params: k, v = _.split("=") @@ -195,9 +196,11 @@ def grep(filename, pattern, verbose=False): main_params = _ pull_request = v else: - if k == "CONTEXT_PREFIX": + if k == "RELEASE_FORMAT": + release = v + elif k == "CONTEXT_PREFIX": context = v - if k == "UPLOAD_UNIQ_ID": + elif k == "UPLOAD_UNIQ_ID": upload_unique_id = v other_params.append(_) @@ -209,6 +212,7 @@ def grep(filename, pattern, verbose=False): f.write("JENKINS_PROJECT_TO_KILL={0}\n".format(job_name)) f.write("JENKINS_PROJECT_PARAMS={0}\n".format(main_params)) f.write("EXTRA_PARAMS={0}\n".format(";".join(other_params))) + f.write("RELEASE_FORMAT={0}\n".format(release)) kill_index += 1 diff --git a/update-commit-statues-matching.py b/update-commit-statues-matching.py index 2141c7f87b94..c0fa17376ac6 100755 --- a/update-commit-statues-matching.py +++ b/update-commit-statues-matching.py @@ -7,17 +7,11 @@ def main(): parser.add_argument("--repository", "-r") parser.add_argument("--commit", "-c") parser.add_argument("--architecture", "-a") - parser.add_argument("--queue", "-u", required=False) + parser.add_argument("--queue", "-u") parser.add_argument("--prefix", "-p") args = parser.parse_args() - status_suffix = args.architecture - if args.queue: - flavor = args.queue.split("_")[3] - if flavor == "X": - flavor = "default" - - status_suffix = flavor + "/" + status_suffix + status_prefix = f"{args.prefix}/{args.architecture}/{args.queue}/" all_statuses = github_utils.get_combined_statuses(args.commit, args.repository).get( "statuses", [] @@ -25,10 +19,7 @@ def main(): index = 0 for status in all_statuses: - if ( - status["context"].startswith(args.prefix + "/" + status_suffix) - and status["state"] == "pending" - ): + if status["context"].startswith(status_prefix) and status["state"] == "pending": with open("update-pr-status-{0}.prop".format(index), "w") as f: f.write("REPOSITORY={0}\n".format(args.repository)) f.write("PULL_REQUEST={0}\n".format(args.commit)) From c9695ee0cd92c1d287fb27b581b8504872f615c4 Mon Sep 17 00:00:00 2001 From: iarspider Date: Tue, 18 Feb 2025 18:15:25 +0100 Subject: [PATCH 07/11] Mark step as "success" even if it wasn't run --- update-commit-statues-matching.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/update-commit-statues-matching.py b/update-commit-statues-matching.py index c0fa17376ac6..2b97cc32b5bc 100755 --- a/update-commit-statues-matching.py +++ b/update-commit-statues-matching.py @@ -24,7 +24,7 @@ def main(): f.write("REPOSITORY={0}\n".format(args.repository)) f.write("PULL_REQUEST={0}\n".format(args.commit)) f.write("CONTEXT={0}\n".format(status["context"])) - f.write("STATUS=error\n") + f.write("STATUS=success\n") f.write("STATUS_MESSAGE=Stuck due to all nodes being offline\n") index = index + 1 From 92f651ed2381edac3f44bf92fe4fa903cf174e7c Mon Sep 17 00:00:00 2001 From: Ivan Razumov Date: Wed, 19 Feb 2025 11:43:19 +0100 Subject: [PATCH 08/11] Changes from review --- common/github_reports.sh | 3 +- parse_jenkins_builds.py | 78 ++++++++++++++----------------- update-commit-statues-matching.py | 12 +++-- 3 files changed, 44 insertions(+), 49 deletions(-) diff --git a/common/github_reports.sh b/common/github_reports.sh index e5ae1ca44ae6..90424159683f 100644 --- a/common/github_reports.sh +++ b/common/github_reports.sh @@ -22,8 +22,7 @@ function mark_commit_status_all_prs () { if [ $(echo $CMSSW_QUEUE | grep '_X' | wc -l) -gt 0 ] ; then CMSSW_FLAVOR=$(echo $CMSSW_QUEUE | cut -d_ -f4) fi - if [ "${CMSSW_FLAVOR}" != "X" ] ; then CMSSW_FLAVOR="default" ; fi - CONTEXT="${CMSSW_FLAVOR}/${CONTEXT}" + if [ "${CMSSW_FLAVOR}" != "X" ] ; then CONTEXT="${CMSSW_FLAVOR}/${CONTEXT}" ; fi if [ "$1" != "" ] ; then CONTEXT="${CONTEXT}/$1" ; fi else CONTEXT="${COMMIT_STATUS_CONTEXT}" diff --git a/parse_jenkins_builds.py b/parse_jenkins_builds.py index cf0edc358074..13e8f7fcbebf 100755 --- a/parse_jenkins_builds.py +++ b/parse_jenkins_builds.py @@ -153,22 +153,22 @@ def grep(filename, pattern, verbose=False): jenkins_queue = dict() current_time = get_current_time() for element in queue_json["items"]: - payload = dict() - job_name = element["task"]["name"] queue_id = int(element["id"]) queue_time = int(element["inQueueSince"]) labels = element["why"].encode("ascii", "ignore").decode("ascii", "ignore") reason = process_queue_reason(labels) - payload["jenkins_server"] = JENKINS_PREFIX - payload["in_queue_since"] = queue_time - payload["queue_id"] = queue_id - payload["job_name"] = job_name - payload["node_labels"] = reason - payload["in_queue"] = 1 - payload["wait_time"] = current_time - queue_time - payload["start_time"] = 0 + payload = { + "jenkins_server": JENKINS_PREFIX, + "in_queue_since": queue_time, + "queue_id": queue_id, + "job_name": job_name, + "node_labels": reason, + "in_queue": 1, + "wait_time": current_time - queue_time, + "start_time": 0, + } kill_index = 0 @@ -181,38 +181,32 @@ def grep(filename, pattern, verbose=False): and reason.endswith("-offline") and (payload["wait_time"] / 1000 > config["custom"].get(job_name, config["timeout"])) ): - params = element["params"].strip().split("\n") - main_params = "" - other_params = [] - context = "" - upload_unique_id = "" - pull_request = "" - commit = "" - release = "" - - for _ in params: - k, v = _.split("=") - if k == "PULL_REQUEST": - main_params = _ - pull_request = v - else: - if k == "RELEASE_FORMAT": - release = v - elif k == "CONTEXT_PREFIX": - context = v - elif k == "UPLOAD_UNIQ_ID": - upload_unique_id = v - - other_params.append(_) - - with open("abort-{0}.prop".format(kill_index), "w") as f: - f.write("UPLOAD_UNIQ_ID={0}\n".format(upload_unique_id)) - f.write("PULL_REQUEST={0}\n".format(pull_request)) - f.write("CONTEXT={0}\n".format(context)) - f.write("JENKINS_PROJECT_TO_KILL={0}\n".format(job_name)) - f.write("JENKINS_PROJECT_PARAMS={0}\n".format(main_params)) - f.write("EXTRA_PARAMS={0}\n".format(";".join(other_params))) - f.write("RELEASE_FORMAT={0}\n".format(release)) + params = dict( + line.split("=", 1) for line in element["params"].strip().splitlines() if "=" in line + ) + + if "rocm" not in (params.get("GPU_FLAVOR"), params.get("TEST_FLAVOR")): + continue + + try: + pull_request = params["PULL_REQUEST"] + main_params = f"PULL_REQUEST={pull_request}" + release = params["RELEASE_FORMAT"] + context = params["CONTEXT_PREFIX"] + upload_unique_id = params["UPLOAD_UNIQ_ID"] + except KeyError: + continue + + other_params = ";".join(f"{k}={v}" for k, v in params if k != "PULL_REQUEST") + + with open(f"abort-{kill_index}.prop", "w") as f: + f.write(f"UPLOAD_UNIQ_ID={upload_unique_id}\n") + f.write(f"PULL_REQUEST={pull_request}\n") + f.write(f"CONTEXT={context}\n") + f.write(f"JENKINS_PROJECT_TO_KILL={job_name}\n") + f.write(f"JENKINS_PROJECT_PARAMS={main_params}\n") + f.write(f"EXTRA_PARAMS={other_params}\n") + f.write(f"RELEASE_FORMAT={release}\n") kill_index += 1 diff --git a/update-commit-statues-matching.py b/update-commit-statues-matching.py index 2b97cc32b5bc..04528530865a 100755 --- a/update-commit-statues-matching.py +++ b/update-commit-statues-matching.py @@ -6,12 +6,10 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument("--repository", "-r") parser.add_argument("--commit", "-c") - parser.add_argument("--architecture", "-a") - parser.add_argument("--queue", "-u") parser.add_argument("--prefix", "-p") args = parser.parse_args() - status_prefix = f"{args.prefix}/{args.architecture}/{args.queue}/" + status_prefix = f"{args.prefix}/" all_statuses = github_utils.get_combined_statuses(args.commit, args.repository).get( "statuses", [] @@ -19,13 +17,17 @@ def main(): index = 0 for status in all_statuses: - if status["context"].startswith(status_prefix) and status["state"] == "pending": + if ( + status["context"].startswith(status_prefix) + and status["context"].endswith("/rocm") + and status["state"] == "pending" + ): with open("update-pr-status-{0}.prop".format(index), "w") as f: f.write("REPOSITORY={0}\n".format(args.repository)) f.write("PULL_REQUEST={0}\n".format(args.commit)) f.write("CONTEXT={0}\n".format(status["context"])) f.write("STATUS=success\n") - f.write("STATUS_MESSAGE=Stuck due to all nodes being offline\n") + f.write("STATUS_MESSAGE=Timed out waiting for node\n") index = index + 1 From 9b63a3a71423306bd52935df92a92e9277ba809b Mon Sep 17 00:00:00 2001 From: Ivan Razumov Date: Wed, 19 Feb 2025 15:47:24 +0100 Subject: [PATCH 09/11] Mark failure in status page --- kill-stuck-pr-test.sh | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100755 kill-stuck-pr-test.sh diff --git a/kill-stuck-pr-test.sh b/kill-stuck-pr-test.sh new file mode 100755 index 000000000000..ec1d3c625a83 --- /dev/null +++ b/kill-stuck-pr-test.sh @@ -0,0 +1,26 @@ +#!/bin/bash +rm -f *.prop + +if [ "X${UPLOAD_UNIQUE_ID}" = "X" ] ; then exit 0 ; fi +if [ "X${PULL_REQUEST}" = "X" ] ; then exit 0 ; fi + +REPOSITORY=$(echo ${PULL_REQUEST} | cut -d '#' -f 1) +PR_ID=$(echo ${PULL_REQUEST} | cut -d '#' -f 2) + +COMMIT_ID=$(curl -L http://localhost/SDT/jenkins-artifacts/pull-request-integration/${UPLOAD_UNIQUE_ID}/prs_commits.txt | grep "^${PULL_REQUEST}=") +if [ "X${COMMIT_ID}" = "X" ] ; then exit 0 ; fi + +./cms-bot/update-commit-statuses-matching.py -r ${REPOSITORY} -c ${COMMIT_ID} -p ${CONTEXT} + +touch abort-jenkins-job.prop +echo "JENKINS_PROJECT_TO_KILL=${JENKINS_PROJECT_TO_KILL}" >> abort-jenkins-job.prop +echo "JENKINS_PROJECT_PARAMS=${JENKINS_PROJECT_PARAMS}" >> abort-jenkins-job.prop +echo "EXTRA_PARAMS=${EXTRA_PARAMS}" >> abort-jenkins-job.prop + +source $(dirname $0)/setup-pr-test-env.sh + +echo "MATRIXROCM_TESTS;ERROR,Matrix ROCM Tests Outputs,Timed out waiting for node,none" > ${RESULTS_DIR}/relvalROCM.txt +echo "RelVals-ROCM" > ${RESULTS_DIR}/12ROCM-relvals-failed.res +echo "rocm_UNIT_TEST_RESULTS;ERROR,ROCM GPU Unit Tests,Timed out waiting for node,none" > ${RESULTS_DIR}/unittestrocm.txt +echo "rocmUnitTests" > ${RESULTS_DIR}/14-failed.res +prepare_upload_results \ No newline at end of file From 2c3d5085c53d5e97712d22351f73809b08ac58f8 Mon Sep 17 00:00:00 2001 From: Ivan Razumov Date: Thu, 20 Feb 2025 12:21:13 +0100 Subject: [PATCH 10/11] Changes from review --- kill-stuck-pr-test.sh | 2 +- update-commit-statues-matching.py | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/kill-stuck-pr-test.sh b/kill-stuck-pr-test.sh index ec1d3c625a83..0ede63fa1890 100755 --- a/kill-stuck-pr-test.sh +++ b/kill-stuck-pr-test.sh @@ -10,7 +10,7 @@ PR_ID=$(echo ${PULL_REQUEST} | cut -d '#' -f 2) COMMIT_ID=$(curl -L http://localhost/SDT/jenkins-artifacts/pull-request-integration/${UPLOAD_UNIQUE_ID}/prs_commits.txt | grep "^${PULL_REQUEST}=") if [ "X${COMMIT_ID}" = "X" ] ; then exit 0 ; fi -./cms-bot/update-commit-statuses-matching.py -r ${REPOSITORY} -c ${COMMIT_ID} -p ${CONTEXT} +./cms-bot/update-commit-statuses-matching.py -r ${REPOSITORY} -c ${COMMIT_ID} -p ${CONTEXT} rocm touch abort-jenkins-job.prop echo "JENKINS_PROJECT_TO_KILL=${JENKINS_PROJECT_TO_KILL}" >> abort-jenkins-job.prop diff --git a/update-commit-statues-matching.py b/update-commit-statues-matching.py index 04528530865a..638e5f23676d 100755 --- a/update-commit-statues-matching.py +++ b/update-commit-statues-matching.py @@ -7,6 +7,7 @@ def main(): parser.add_argument("--repository", "-r") parser.add_argument("--commit", "-c") parser.add_argument("--prefix", "-p") + parser.add_argument("suffix") args = parser.parse_args() status_prefix = f"{args.prefix}/" @@ -14,22 +15,21 @@ def main(): all_statuses = github_utils.get_combined_statuses(args.commit, args.repository).get( "statuses", [] ) - index = 0 for status in all_statuses: if ( status["context"].startswith(status_prefix) - and status["context"].endswith("/rocm") + and status["context"].endswith(f"/{args.suffix}") and status["state"] == "pending" ): - with open("update-pr-status-{0}.prop".format(index), "w") as f: - f.write("REPOSITORY={0}\n".format(args.repository)) - f.write("PULL_REQUEST={0}\n".format(args.commit)) - f.write("CONTEXT={0}\n".format(status["context"])) - f.write("STATUS=success\n") - f.write("STATUS_MESSAGE=Timed out waiting for node\n") - - index = index + 1 + github_utils.mark_commit_status( + args.commit, + args.repository, + status["context"], + "success", + "", + "Timed out waiting for node", + ) if __name__ == "__main__": From a776bbbe0044f5f93c76a1e94054a1c5beb134cc Mon Sep 17 00:00:00 2001 From: Ivan Razumov Date: Thu, 20 Feb 2025 13:36:54 +0100 Subject: [PATCH 11/11] Only select jobs with single node --- parse_jenkins_builds.py | 1 + 1 file changed, 1 insertion(+) diff --git a/parse_jenkins_builds.py b/parse_jenkins_builds.py index 13e8f7fcbebf..ed12630715b2 100755 --- a/parse_jenkins_builds.py +++ b/parse_jenkins_builds.py @@ -179,6 +179,7 @@ def grep(filename, pattern, verbose=False): if ( job_name in config["whitelist"] and reason.endswith("-offline") + and reason != "multiple-offline" and (payload["wait_time"] / 1000 > config["custom"].get(job_name, config["timeout"])) ): params = dict(