Skip to content

Commit

Permalink
update model training defaults
Browse files Browse the repository at this point in the history
  • Loading branch information
svandenhaute committed Jul 28, 2024
1 parent f869923 commit cf25b07
Showing 1 changed file with 7 additions and 4 deletions.
11 changes: 7 additions & 4 deletions psiflow/execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,13 +247,16 @@ def __init__(
gpu=True,
max_training_time: Optional[float] = None,
env_vars: Optional[dict[str, str]] = None,
multigpu: bool = False,
**kwargs,
) -> None:
super().__init__(gpu=gpu, **kwargs)
assert self.gpu
if max_training_time is not None:
assert max_training_time * 60 < self.max_runtime
self.max_training_time = max_training_time
if self.max_workers > 1:
self.multigpu = multigpu
if self.multigpu:
message = (
"the max_training_time keyword does not work "
"in combination with multi-gpu training. Adjust "
Expand All @@ -266,7 +269,7 @@ def __init__(
"OMP_NUM_THREADS": str(self.cores_per_worker),
"KMP_AFFINITY": "granularity=fine,compact,1,0",
"KMP_BLOCKTIME": "1",
"OMP_PROC_BIND": "spread",
"OMP_PROC_BIND": "spread", # different from Model Eval
"PYTHONUNBUFFERED": "TRUE",
}
if env_vars is None:
Expand All @@ -288,9 +291,9 @@ def wq_resources(self):
if self.use_threadpool:
return {}
resource_specification = {}
if self.gpu:

if self.multigpu:
nworkers = int(self.cores_available / self.cores_per_worker)
resource_specification["gpus"] = nworkers # one per GPU
else:
nworkers = 1

Expand Down

0 comments on commit cf25b07

Please sign in to comment.