Skip to content

Commit

Permalink
Merge pull request EESSI#128 from Crivella/feature-QEpw_test
Browse files Browse the repository at this point in the history
Added test for QE `pw.x`
  • Loading branch information
casparvl authored May 29, 2024
2 parents 452cbc0 + 58a716d commit b0c91e4
Show file tree
Hide file tree
Showing 8 changed files with 333 additions and 43 deletions.
8 changes: 7 additions & 1 deletion config/github_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,13 @@
'options': ['--mem={size}'],
}
],
'max_jobs': 1
'max_jobs': 1,
'extras': {
# Make sure to round down, otherwise a job might ask for more mem than is available
# per node
# This is a fictional amount, GH actions probably has less, but only does --dry-run
'mem_per_node': 30 # in GiB
},
}
]
}
Expand Down
5 changes: 5 additions & 0 deletions config/it4i_karolina.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@
'features': [
FEATURES[CPU],
] + list(SCALES.keys()),
'extras': {
# Make sure to round down, otherwise a job might ask for more mem than is available
# per node
'mem_per_node': 219.345 # in GiB
},
'descr': 'CPU Universal Compute Nodes, see https://docs.it4i.cz/karolina/hardware-overview/'
},
# We don't have GPU budget on Karolina at this time
Expand Down
88 changes: 49 additions & 39 deletions config/izum_vega.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,47 +59,57 @@
'features': [
FEATURES[CPU],
] + list(SCALES.keys()),
'extras': {
# Make sure to round down, otherwise a job might ask for more mem than is available
# per node
'mem_per_node': 238.418 # in GiB
},
'descr': 'CPU partition Standard, see https://en-doc.vega.izum.si/architecture/'
},
{
'name': 'gpu',
'scheduler': 'slurm',
'prepare_cmds': [
'source %s' % common_eessi_init(),
# Pass job environment variables like $PATH, etc., into job steps
'export SLURM_EXPORT_ENV=ALL',
# Needed when using srun launcher
# 'export SLURM_MPI_TYPE=pmix', # WARNING: this broke the GROMACS on Vega
# Avoid https://github.com/EESSI/software-layer/issues/136
# Can be taken out once we don't care about old OpenMPI versions anymore (pre-4.1.1)
'export OMPI_MCA_pml=ucx',
],
'launcher': 'mpirun',
# Use --export=None to avoid that login environment is passed down to submitted jobs
'access': ['-p gpu', '--export=None'],
'environs': ['default'],
'max_jobs': 60,
'devices': [
{
'type': DEVICE_TYPES[GPU],
'num_devices': 4,
}
],
'resources': [
{
'name': '_rfm_gpu',
'options': ['--gpus-per-node={num_gpus_per_node}'],
},
{
'name': 'memory',
'options': ['--mem={size}'],
}
],
'features': [
FEATURES[GPU],
] + list(SCALES.keys()),
'descr': 'GPU partition, see https://en-doc.vega.izum.si/architecture/'
},
# {
# 'name': 'gpu',
# 'scheduler': 'slurm',
# 'prepare_cmds': [
# 'source %s' % common_eessi_init(),
# # Pass job environment variables like $PATH, etc., into job steps
# 'export SLURM_EXPORT_ENV=ALL',
# # Needed when using srun launcher
# # 'export SLURM_MPI_TYPE=pmix', # WARNING: this broke the GROMACS on Vega
# # Avoid https://github.com/EESSI/software-layer/issues/136
# # Can be taken out once we don't care about old OpenMPI versions anymore (pre-4.1.1)
# 'export OMPI_MCA_pml=ucx',
# ],
# 'launcher': 'mpirun',
# # Use --export=None to avoid that login environment is passed down to submitted jobs
# 'access': ['-p gpu', '--export=None'],
# 'environs': ['default'],
# 'max_jobs': 60,
# 'devices': [
# {
# 'type': DEVICE_TYPES[GPU],
# 'num_devices': 4,
# }
# ],
# 'resources': [
# {
# 'name': '_rfm_gpu',
# 'options': ['--gpus-per-node={num_gpus_per_node}'],
# },
# {
# 'name': 'memory',
# 'options': ['--mem={size}'],
# }
# ],
# 'features': [
# FEATURES[GPU],
# ] + list(SCALES.keys()),
# 'extras': {
# # Make sure to round down, otherwise a job might ask for more mem than is available
# # per node
# 'mem_per_node': 476.837 # in GiB (should be checked, its unclear from slurm.conf)
# },
# 'descr': 'GPU partition, see https://en-doc.vega.izum.si/architecture/'
# },
]
},
],
Expand Down
13 changes: 13 additions & 0 deletions config/surf_snellius.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@
'features': [
FEATURES[CPU],
] + list(SCALES.keys()),
'extras': {
# Make sure to round down, otherwise a job might ask for more mem than is available
# per node
'mem_per_node': 213.623 # in GiB
},
'descr': 'AMD Rome CPU partition with native EESSI stack'
},
{
Expand All @@ -72,6 +77,11 @@
'features': [
FEATURES[CPU],
] + list(SCALES.keys()),
'extras': {
# Make sure to round down, otherwise a job might ask for more mem than is available
# per node
'mem_per_node': 320.434 # in GiB
},
'descr': 'AMD Genoa CPU partition with native EESSI stack'
},

Expand Down Expand Up @@ -105,6 +115,9 @@
] + valid_scales_snellius_gpu,
'extras': {
GPU_VENDOR: GPU_VENDORS[NVIDIA],
# Make sure to round down, otherwise a job might ask for more mem than is available
# per node
'mem_per_node': 457.763 # in GiB
},
'descr': 'Nvidia A100 GPU partition with native EESSI stack'
},
Expand Down
25 changes: 24 additions & 1 deletion config/vsc_hortense.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
from reframe.core.backends import register_launcher
from reframe.core.launchers import JobLauncher

from eessi.testsuite.common_config import common_logging_config, common_general_config, common_eessi_init
from eessi.testsuite.common_config import (common_eessi_init,
common_general_config,
common_logging_config)
from eessi.testsuite.constants import * # noqa: F403

account = "my-slurm-account"
Expand Down Expand Up @@ -54,6 +56,11 @@ def command(self, job):
'features': [
FEATURES[CPU],
] + list(SCALES.keys()),
'extras': {
# Make sure to round down, otherwise a job might ask for more mem than is available
# per node
'mem_per_node': 256.000 # in GiB (should be checked, its unclear from slurm.conf)
},
},
{
'name': 'cpu_rome_512gb',
Expand Down Expand Up @@ -81,6 +88,11 @@ def command(self, job):
'features': [
FEATURES[CPU],
] + list(SCALES.keys()),
'extras': {
# Make sure to round down, otherwise a job might ask for more mem than is available
# per node
'mem_per_node': 511.983 # in GiB
},
},
{
'name': 'cpu_milan',
Expand Down Expand Up @@ -108,6 +120,11 @@ def command(self, job):
'features': [
FEATURES[CPU],
] + list(SCALES.keys()),
'extras': {
# Make sure to round down, otherwise a job might ask for more mem than is available
# per node
'mem_per_node': 256.000 # in GiB (should be checked, its unclear from slurm.conf)
},
},
{
'name': 'gpu_rome_a100_40gb',
Expand All @@ -131,6 +148,9 @@ def command(self, job):
] + list(SCALES.keys()),
'extras': {
GPU_VENDOR: GPU_VENDORS[NVIDIA],
# Make sure to round down, otherwise a job might ask for more mem than is available
# per node
'mem_per_node': 256.000 # in GiB
},
'resources': [
{
Expand Down Expand Up @@ -172,6 +192,9 @@ def command(self, job):
] + list(SCALES.keys()),
'extras': {
GPU_VENDOR: GPU_VENDORS[NVIDIA],
# Make sure to round down, otherwise a job might ask for more mem than is available
# per node
'mem_per_node': 511.983 # in GiB
},
'resources': [
{
Expand Down
88 changes: 87 additions & 1 deletion eessi/testsuite/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from eessi.testsuite.constants import *
from eessi.testsuite.utils import (get_max_avail_gpus_per_node, is_cuda_required_module, log,
check_proc_attribute_defined)
check_proc_attribute_defined, check_extras_key_defined)


def _assign_default_num_cpus_per_node(test: rfm.RegressionTest):
Expand Down Expand Up @@ -383,6 +383,92 @@ def filter_valid_systems_by_device_type(test: rfm.RegressionTest, required_devic
log(f'valid_systems set to {test.valid_systems}')


def req_memory_per_node(test: rfm.RegressionTest, app_mem_req):
"""
This hook will request a specific amount of memory per node to the batch scheduler.
First, it computes which fraction of CPUs is requested from a node, and how much the corresponding (proportional)
amount of memory would be.
Then, the hook compares this to how much memory the application claims to need per node (app_mem_req).
It then passes the maximum of these two numbers to the batch scheduler as a memory request.
Note: using this hook requires that the ReFrame configuration defines system.partition.extras['mem_per_node']
That field should be defined in GiB
Arguments:
- test: the ReFrame test to which this hook should apply
- app_mem_req: the amount of memory this application needs (per node) in GiB
Example 1:
- A system with 128 cores and 64 GiB per node.
- The test is launched on 64 cores
- The app_mem_req is 40 (GiB)
In this case, the test requests 50% of the CPUs. Thus, the proportional amount of memory is 32 GiB.
The app_mem_req is higher. Thus, 40GiB (per node) is requested from the batch scheduler.
Example 2:
- A system with 128 cores per node, 128 GiB mem per node is used.
- The test is launched on 64 cores
- the app_mem_req is 40 (GiB)
In this case, the test requests 50% of the CPUs. Thus, the proportional amount of memory is 64 GiB.
This is higher than the app_mem_req. Thus, 64 GiB (per node) is requested from the batch scheduler.
"""
# Check that the systems.partitions.extra dict in the ReFrame config contains mem_per_node
check_extras_key_defined(test, 'mem_per_node')
# Skip if the current partition doesn't have sufficient memory to run the application
msg = f"Skipping test: nodes in this partition only have {test.current_partition.extras['mem_per_node']} GiB"
msg += " memory available (per node) accodring to the current ReFrame configuration,"
msg += f" but {app_mem_req} GiB is needed"
test.skip_if(test.current_partition.extras['mem_per_node'] < app_mem_req, msg)

# Compute what is higher: the requested memory, or the memory available proportional to requested CPUs
# Fraction of CPU cores requested
check_proc_attribute_defined(test, 'num_cpus')
cpu_fraction = test.num_tasks_per_node * test.num_cpus_per_task / test.current_partition.processor.num_cpus
proportional_mem = cpu_fraction * test.current_partition.extras['mem_per_node']

scheduler_name = test.current_partition.scheduler.registered_name
if scheduler_name == 'slurm' or scheduler_name == 'squeue':
# SLURMs --mem defines memory per node, see https://slurm.schedmd.com/sbatch.html
# SLURM uses megabytes and gigabytes, i.e. base-10, so conversion is 1000, not 1024
# Thus, we convert from GiB (gibibytes) to MB (megabytes) (1024 * 1024 * 1024 / (1000 * 1000) = 1073.741824)
app_mem_req = math.ceil(1073.741824 * app_mem_req)
log(f"Memory requested by application: {app_mem_req} MB")
proportional_mem = math.floor(1073.741824 * proportional_mem)
log(f"Memory proportional to the core count: {proportional_mem} MB")

# Request the maximum of the proportional_mem, and app_mem_req to the scheduler
req_mem_per_node = max(proportional_mem, app_mem_req)

test.extra_resources = {'memory': {'size': f'{req_mem_per_node}M'}}
log(f"Requested {req_mem_per_node} MB per node from the SLURM batch scheduler")

elif scheduler_name == 'torque':
# Torque/moab requires asking for --pmem (--mem only works single node and thus doesnt generalize)
# See https://docs.adaptivecomputing.com/10-0-1/Torque/torque.htm#topics/torque/3-jobs/3.1.3-requestingRes.htm
# Units are MiB according to the documentation, thus, we simply multiply with 1024
# We immediately divide by num_tasks_per_node (before rounding), since -pmem specifies memroy _per process_
app_mem_req_task = math.ceil(1024 * app_mem_req / test.num_tasks_per_node)
proportional_mem_task = math.floor(1024 * proportional_mem / test.num_tasks_per_node)

# Request the maximum of the proportional_mem, and app_mem_req to the scheduler
req_mem_per_task = max(proportional_mem_task, app_mem_req_task)

# We assume here the reframe config defines the extra resource memory as asking for pmem
# i.e. 'options': ['--pmem={size}']
test.extra_resources = {'memory': {'size': f'{req_mem_per_task}mb'}}
log(f"Requested {req_mem_per_task} MiB per task from the torque batch scheduler")

else:
logger = rflog.getlogger()
msg = "hooks.req_memory_per_node does not support the scheduler you configured"
msg += f" ({test.current_partition.scheduler.registered_name})."
msg += " The test will run, but since it doesn't request the required amount of memory explicitely,"
msg += " it may result in an out-of-memory error."
msg += " Please expand the functionality of hooks.req_memory_per_node for your scheduler."
# Warnings will, at default loglevel, be printed on stdout when executing the ReFrame command
logger.warning(msg)


def set_modules(test: rfm.RegressionTest):
"""
Skip current test if module_name is not among a list of modules,
Expand Down
Loading

0 comments on commit b0c91e4

Please sign in to comment.