Skip to content

Commit

Permalink
Merge pull request EESSI#160 from casparvl/hyperthread_support
Browse files Browse the repository at this point in the history
Change behavior for `assign_tasks_per_compute_unit(test, COMPUTE_UNIT[CPU])` on hyperthreading-enabled systems + use compat process binding in ESPResSo test
  • Loading branch information
boegel authored Jun 28, 2024
2 parents 8152585 + 09b4bc5 commit e0f7048
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 7 deletions.
2 changes: 2 additions & 0 deletions eessi/testsuite/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

AMD = 'AMD'
CI = 'CI'
HWTHREAD = 'HWTHREAD'
CPU = 'CPU'
CPU_SOCKET = 'CPU_SOCKET'
GPU = 'GPU'
Expand All @@ -19,6 +20,7 @@
}

COMPUTE_UNIT = {
HWTHREAD: 'hwthread',
CPU: 'cpu',
CPU_SOCKET: 'cpu_socket',
GPU: 'gpu',
Expand Down
70 changes: 63 additions & 7 deletions eessi/testsuite/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,20 +66,19 @@ def assign_tasks_per_compute_unit(test: rfm.RegressionTest, compute_unit: str, n
Total task count is determined based on the number of nodes to be used in the test.
Behaviour of this function is (usually) sensible for MPI tests.
WARNING: when using COMPUTE_UNIT[HWTHREAD] and invoking a hook for process binding, please verify that process
binding happens correctly.
Arguments:
- test: the ReFrame test to which this hook should apply
- compute_unit: a device as listed in eessi.testsuite.constants.COMPUTE_UNIT
Examples:
On a single node with 2 sockets, 64 cores and 128 hyperthreads:
- assign_tasks_per_compute_unit(test, COMPUTE_UNIT[CPU]) will launch 64 tasks with 1 thread
- assign_tasks_per_compute_unit(test, COMPUTE_UNIT[CPU_SOCKET]) will launch 2 tasks with 32 threads per task
- assign_tasks_per_compute_unit(test, COMPUTE_UNIT[HWTHREAD]) will launch 128 tasks with 1 thread per task
- assign_tasks_per_compute_unit(test, COMPUTE_UNIT[CPU]) will launch 64 tasks with 2 threads per task
- assign_tasks_per_compute_unit(test, COMPUTE_UNIT[CPU_SOCKET]) will launch 2 tasks with 64 threads per task
Future work:
Currently, on a single node with 2 sockets, 64 cores and 128 hyperthreads, this
- assign_one_task_per_compute_unit(test, COMPUTE_UNIT[CPU], true) launches 128 tasks with 1 thread
- assign_one_task_per_compute_unit(test, COMPUTE_UNIT[CPU_SOCKET], true) launches 2 tasks with 64 threads per task
In the future, we'd like to add an arugment that disables spawning tasks for hyperthreads.
"""
if num_per != 1 and compute_unit in [COMPUTE_UNIT[GPU], COMPUTE_UNIT[CPU], COMPUTE_UNIT[CPU_SOCKET]]:
raise NotImplementedError(
Expand All @@ -100,12 +99,26 @@ def assign_tasks_per_compute_unit(test: rfm.RegressionTest, compute_unit: str, n
)

_assign_default_num_cpus_per_node(test)
# If on
# - a hyperthreading system
# - num_cpus_per_node was set by the scale
# - compute_unit != COMPUTE_UNIT[HWTHREAD]
# double the default_num_cpus_per_node. In this scenario, if the scale asks for e.g. 1 num_cpus_per_node and
# the test doesn't state it wants to use hwthreads, we want to launch on two hyperthreads, i.e. one physical core
if SCALES[test.scale].get('num_cpus_per_node') is not None and compute_unit != COMPUTE_UNIT[HWTHREAD]:
check_proc_attribute_defined(test, 'num_cpus_per_core')
num_cpus_per_core = test.current_partition.processor.num_cpus_per_core
# On a hyperthreading system?
if num_cpus_per_core > 1:
test.default_num_cpus_per_node = test.default_num_cpus_per_node * num_cpus_per_core

if FEATURES[GPU] in test.current_partition.features:
_assign_default_num_gpus_per_node(test)

if compute_unit == COMPUTE_UNIT[GPU]:
_assign_one_task_per_gpu(test)
elif compute_unit == COMPUTE_UNIT[HWTHREAD]:
_assign_one_task_per_hwthread(test)
elif compute_unit == COMPUTE_UNIT[CPU]:
_assign_one_task_per_cpu(test)
elif compute_unit == COMPUTE_UNIT[CPU_SOCKET]:
Expand Down Expand Up @@ -223,6 +236,44 @@ def _assign_one_task_per_cpu(test: rfm.RegressionTest):
--setvar num_tasks_per_node=<x> and/or
--setvar num_cpus_per_task=<y>.
Default resources requested:
- num_tasks_per_node = default_num_cpus_per_node
- num_cpus_per_task = default_num_cpus_per_node / num_tasks_per_node
"""
# neither num_tasks_per_node nor num_cpus_per_task are set
if not test.num_tasks_per_node and not test.num_cpus_per_task:
check_proc_attribute_defined(test, 'num_cpus_per_core')
test.num_tasks_per_node = max(
int(test.default_num_cpus_per_node / test.current_partition.processor.num_cpus_per_core),
1
)
test.num_cpus_per_task = int(test.default_num_cpus_per_node / test.num_tasks_per_node)

# num_tasks_per_node is not set, but num_cpus_per_task is
elif not test.num_tasks_per_node:
test.num_tasks_per_node = int(test.default_num_cpus_per_node / test.num_cpus_per_task)

# num_cpus_per_task is not set, but num_tasks_per_node is
elif not test.num_cpus_per_task:
test.num_cpus_per_task = int(test.default_num_cpus_per_node / test.num_tasks_per_node)

else:
pass # both num_tasks_per_node and num_cpus_per_node are already set

test.num_tasks = test.num_nodes * test.num_tasks_per_node

log(f'num_tasks_per_node set to {test.num_tasks_per_node}')
log(f'num_cpus_per_task set to {test.num_cpus_per_task}')
log(f'num_tasks set to {test.num_tasks}')


def _assign_one_task_per_hwthread(test: rfm.RegressionTest):
"""
Sets num_tasks_per_node and num_cpus_per_task such that it will run one task per core,
unless specified with:
--setvar num_tasks_per_node=<x> and/or
--setvar num_cpus_per_task=<y>.
Default resources requested:
- num_tasks_per_node = default_num_cpus_per_node
- num_cpus_per_task = default_num_cpus_per_node / num_tasks_per_node
Expand Down Expand Up @@ -508,6 +559,10 @@ def set_compact_process_binding(test: rfm.RegressionTest):
This hook sets a binding policy for process binding.
More specifically, it will bind each process to subsequent domains of test.num_cpus_per_task cores.
Arguments:
- test: the ReFrame test to which this hook should apply
A few examples:
- Pure MPI (test.num_cpus_per_task = 1) will result in binding 1 process to each core.
this will happen in a compact way, i.e. rank 0 to core 0, rank 1 to core 1, etc
Expand All @@ -522,6 +577,7 @@ def set_compact_process_binding(test: rfm.RegressionTest):

# Check if hyperthreading is enabled. If so, divide the number of cpus per task by the number
# of hw threads per core to get a physical core count
# TODO: check if this also leads to sensible binding when using COMPUTE_UNIT[HWTHREAD]
check_proc_attribute_defined(test, 'num_cpus_per_core')
num_cpus_per_core = test.current_partition.processor.num_cpus_per_core
physical_cpus_per_task = int(test.num_cpus_per_task / num_cpus_per_core)
Expand Down
4 changes: 4 additions & 0 deletions eessi/testsuite/tests/apps/espresso/espresso.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ def set_mem(self):
mem_required_per_node = self.num_tasks_per_node * 0.9
hooks.req_memory_per_node(test=self, app_mem_req=mem_required_per_node * 1024)

@run_after('setup')
def set_binding(self):
hooks.set_compact_process_binding(self)

@deferrable
def assert_completion(self):
'''Check completion'''
Expand Down

0 comments on commit e0f7048

Please sign in to comment.