From ec54bd70ea89193fe7d732e0cbcd43ddf779a795 Mon Sep 17 00:00:00 2001 From: Karel Suta Date: Fri, 26 Jul 2024 15:34:11 +0200 Subject: [PATCH] Raise Training operator timeout to 60 minutes --- tests/kfto/core/kfto_kueue_sft_GPU_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/kfto/core/kfto_kueue_sft_GPU_test.go b/tests/kfto/core/kfto_kueue_sft_GPU_test.go index 7a1e2f85..7c5dcc97 100644 --- a/tests/kfto/core/kfto_kueue_sft_GPU_test.go +++ b/tests/kfto/core/kfto_kueue_sft_GPU_test.go @@ -98,7 +98,7 @@ func runMultiGpuPytorchjob(t *testing.T, modelConfigFile string) { if IsOpenShift(test) { // Check that GPUs were utilized recently // That itself doesn't guarantee that PyTorchJob generated the load in GPU, but is the best we can achieve for now - test.Eventually(openShiftPrometheusGpuUtil(test, namespace), 30*time.Minute). + test.Eventually(openShiftPrometheusGpuUtil(test, namespace), 60*time.Minute). Should( And( HaveLen(numberOfGpus), @@ -111,7 +111,7 @@ func runMultiGpuPytorchjob(t *testing.T, modelConfigFile string) { } // Make sure the PyTorch job succeed - test.Eventually(PytorchJob(test, namespace, tuningJob.Name), 30*time.Minute).Should(WithTransform(PytorchJobConditionSucceeded, Equal(corev1.ConditionTrue))) + test.Eventually(PytorchJob(test, namespace, tuningJob.Name), 60*time.Minute).Should(WithTransform(PytorchJobConditionSucceeded, Equal(corev1.ConditionTrue))) test.T().Logf("PytorchJob %s/%s ran successfully", tuningJob.Namespace, tuningJob.Name) }