From cf25b0711180ac870859ec1dbdd7716d7c84d284 Mon Sep 17 00:00:00 2001 From: Sander Vandenhaute Date: Sun, 28 Jul 2024 12:28:40 -0400 Subject: [PATCH] update model training defaults --- psiflow/execution.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/psiflow/execution.py b/psiflow/execution.py index 6920cbc..db4174f 100644 --- a/psiflow/execution.py +++ b/psiflow/execution.py @@ -247,13 +247,16 @@ def __init__( gpu=True, max_training_time: Optional[float] = None, env_vars: Optional[dict[str, str]] = None, + multigpu: bool = False, **kwargs, ) -> None: super().__init__(gpu=gpu, **kwargs) + assert self.gpu if max_training_time is not None: assert max_training_time * 60 < self.max_runtime self.max_training_time = max_training_time - if self.max_workers > 1: + self.multigpu = multigpu + if self.multigpu: message = ( "the max_training_time keyword does not work " "in combination with multi-gpu training. Adjust " @@ -266,7 +269,7 @@ def __init__( "OMP_NUM_THREADS": str(self.cores_per_worker), "KMP_AFFINITY": "granularity=fine,compact,1,0", "KMP_BLOCKTIME": "1", - "OMP_PROC_BIND": "spread", + "OMP_PROC_BIND": "spread", # different from Model Eval "PYTHONUNBUFFERED": "TRUE", } if env_vars is None: @@ -288,9 +291,9 @@ def wq_resources(self): if self.use_threadpool: return {} resource_specification = {} - if self.gpu: + + if self.multigpu: nworkers = int(self.cores_available / self.cores_per_worker) - resource_specification["gpus"] = nworkers # one per GPU else: nworkers = 1