From 5454737458f0a892d308b93dec1dd5e94c28f86c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= Date: Tue, 2 May 2023 10:46:03 +0100 Subject: [PATCH 01/61] [feat] Parse convertible mapping objects with `json` module The advantage is that we can automatically convert nested mappings, like for example the `extra_resources` attribute, which can thus be set from the command line. --- reframe/utility/typecheck.py | 33 ++++++++++++++++++++------------- unittests/test_typecheck.py | 8 ++++++++ 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/reframe/utility/typecheck.py b/reframe/utility/typecheck.py index 9f27234e88..36bf67a6b2 100644 --- a/reframe/utility/typecheck.py +++ b/reframe/utility/typecheck.py @@ -95,6 +95,7 @@ import abc import datetime +import json import re @@ -322,19 +323,25 @@ def __rfm_cast_str__(cls, s): mappping_type = cls._type key_type = cls._key_type value_type = cls._value_type - seq = [] - for key_datum in s.split(','): - try: - k, v = key_datum.split(':') - except ValueError: - # Re-raise as TypeError - raise TypeError( - f'cannot convert string {s!r} to {cls.__name__!r}' - ) from None - - seq.append((key_type(k), value_type(v))) - - return mappping_type(seq) + + try: + d = json.loads(s) + except json.JSONDecodeError: + seq = [] + for key_datum in s.split(','): + try: + k, v = key_datum.split(':') + except ValueError: + # Re-raise as TypeError + raise TypeError( + f'cannot convert string {s!r} to {cls.__name__!r}' + ) from None + + seq.append((key_type(k), value_type(v))) + + return mappping_type(seq) + else: + return mappping_type(d) class _StrType(_SequenceType): diff --git a/unittests/test_typecheck.py b/unittests/test_typecheck.py index f640a9d7df..c081f7722f 100644 --- a/unittests/test_typecheck.py +++ b/unittests/test_typecheck.py @@ -208,6 +208,14 @@ def test_mapping_type(): # Test conversions assert typ.Dict[str, int]('a:1,b:2') == {'a': 1, 'b': 2} + # Conversion with JSON syntax, for nested dictionaries + s = '{"gpu":{"num_gpus_per_node":8}, "mpi": {"num_slots": 64}}' + assert (typ.Dict[str, typ.Dict[str, object]](s) == + { + "gpu": {"num_gpus_per_node": 8}, + "mpi": {"num_slots": 64}, + }) + with pytest.raises(TypeError): typ.Dict[str, int]('a:1,b') From 6e3b636a6672236474433f7a0a4058e43d67b094 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 1 Jul 2023 15:41:56 +0000 Subject: [PATCH 02/61] Bump semver from 2.13.0 to 3.0.1 Bumps [semver](https://github.com/python-semver/python-semver) from 2.13.0 to 3.0.1. - [Release notes](https://github.com/python-semver/python-semver/releases) - [Changelog](https://github.com/python-semver/python-semver/blob/master/CHANGELOG.rst) - [Commits](https://github.com/python-semver/python-semver/compare/2.13.0...3.0.1) --- updated-dependencies: - dependency-name: semver dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- docs/requirements.txt | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 40025d2384..079acee9a2 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,6 +1,6 @@ archspec==0.2.1 docutils==0.17.1 # https://github.com/sphinx-doc/sphinx/issues/9001 jsonschema==3.2.0 -semver==3.0.0 +semver==3.0.1 Sphinx==5.3.0 sphinx-rtd-theme==1.2.1 diff --git a/requirements.txt b/requirements.txt index c3c116435c..f68b8c6f49 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,7 +13,7 @@ PyYAML==6.0 requests==2.27.1; python_version == '3.6' requests==2.31.0; python_version >= '3.7' semver==2.13.0; python_version == '3.6' -semver==3.0.0; python_version >= '3.7' +semver==3.0.1; python_version >= '3.7' setuptools==59.6.0; python_version == '3.6' setuptools==67.8.0; python_version >= '3.7' wcwidth==0.2.6 From 0f98f2fb6689a3137067fb96a5b7de324ad831b2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 2 Jul 2023 20:41:16 +0000 Subject: [PATCH 03/61] Bump argcomplete from 3.0.8 to 3.1.1 Bumps [argcomplete](https://github.com/kislyuk/argcomplete) from 3.0.8 to 3.1.1. - [Release notes](https://github.com/kislyuk/argcomplete/releases) - [Changelog](https://github.com/kislyuk/argcomplete/blob/develop/Changes.rst) - [Commits](https://github.com/kislyuk/argcomplete/compare/v3.0.8...v3.1.1) --- updated-dependencies: - dependency-name: argcomplete dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index f68b8c6f49..d352d6d8f5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ archspec==0.2.1 -argcomplete==3.0.8 +argcomplete==3.1.1 importlib_metadata==4.0.1; python_version < '3.8' jsonschema==3.2.0 lxml==4.9.2 From 31379ec4b1c98b37a1bd9dbc8cd5348f81f8f7c8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 2 Jul 2023 21:34:25 +0000 Subject: [PATCH 04/61] Bump setuptools from 59.6.0 to 68.0.0 Bumps [setuptools](https://github.com/pypa/setuptools) from 59.6.0 to 68.0.0. - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v59.6.0...v68.0.0) --- updated-dependencies: - dependency-name: setuptools dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index d352d6d8f5..9469b49d56 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,6 +15,6 @@ requests==2.31.0; python_version >= '3.7' semver==2.13.0; python_version == '3.6' semver==3.0.1; python_version >= '3.7' setuptools==59.6.0; python_version == '3.6' -setuptools==67.8.0; python_version >= '3.7' +setuptools==68.0.0; python_version >= '3.7' wcwidth==0.2.6 #+pygelf%pygelf==0.4.0 From 3ffb618c549d2cafb3ed08a5b1fe700ca3ce59c9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 3 Jul 2023 09:47:05 +0000 Subject: [PATCH 05/61] Bump sphinx-rtd-theme from 1.2.1 to 1.2.2 Bumps [sphinx-rtd-theme](https://github.com/readthedocs/sphinx_rtd_theme) from 1.2.1 to 1.2.2. - [Changelog](https://github.com/readthedocs/sphinx_rtd_theme/blob/master/docs/changelog.rst) - [Commits](https://github.com/readthedocs/sphinx_rtd_theme/compare/1.2.1...1.2.2) --- updated-dependencies: - dependency-name: sphinx-rtd-theme dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- docs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 079acee9a2..93623c819c 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -3,4 +3,4 @@ docutils==0.17.1 # https://github.com/sphinx-doc/sphinx/issues/9001 jsonschema==3.2.0 semver==3.0.1 Sphinx==5.3.0 -sphinx-rtd-theme==1.2.1 +sphinx-rtd-theme==1.2.2 From 5b0c6b56e8b478b91c08fdc84a83d93f0f24dbbd Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Mon, 3 Jul 2023 23:05:21 +0300 Subject: [PATCH 06/61] Style fixes --- reframe/utility/typecheck.py | 10 ++++------ unittests/test_typecheck.py | 12 ++++++------ 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/reframe/utility/typecheck.py b/reframe/utility/typecheck.py index 36bf67a6b2..e09bc503be 100644 --- a/reframe/utility/typecheck.py +++ b/reframe/utility/typecheck.py @@ -325,9 +325,9 @@ def __rfm_cast_str__(cls, s): value_type = cls._value_type try: - d = json.loads(s) + items = json.loads(s) except json.JSONDecodeError: - seq = [] + items = [] for key_datum in s.split(','): try: k, v = key_datum.split(':') @@ -337,11 +337,9 @@ def __rfm_cast_str__(cls, s): f'cannot convert string {s!r} to {cls.__name__!r}' ) from None - seq.append((key_type(k), value_type(v))) + items.append((key_type(k), value_type(v))) - return mappping_type(seq) - else: - return mappping_type(d) + return mappping_type(items) class _StrType(_SequenceType): diff --git a/unittests/test_typecheck.py b/unittests/test_typecheck.py index c081f7722f..79342d37d1 100644 --- a/unittests/test_typecheck.py +++ b/unittests/test_typecheck.py @@ -209,12 +209,12 @@ def test_mapping_type(): assert typ.Dict[str, int]('a:1,b:2') == {'a': 1, 'b': 2} # Conversion with JSON syntax, for nested dictionaries - s = '{"gpu":{"num_gpus_per_node":8}, "mpi": {"num_slots": 64}}' - assert (typ.Dict[str, typ.Dict[str, object]](s) == - { - "gpu": {"num_gpus_per_node": 8}, - "mpi": {"num_slots": 64}, - }) + s = '{"gpu":{"num_gpus_per_node": 8}, "mpi": {"num_slots": 64}}' + expected = { + 'gpu': {'num_gpus_per_node': 8}, + 'mpi': {'num_slots': 64}, + } + assert typ.Dict[str, typ.Dict[str, object]](s) == expected with pytest.raises(TypeError): typ.Dict[str, int]('a:1,b') From e90208c880fb6495e69860753f3606ecccb247ba Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Mon, 3 Jul 2023 23:13:36 +0300 Subject: [PATCH 07/61] Update docs --- docs/manpage.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/manpage.rst b/docs/manpage.rst index 2320c26f2f..1f8b5c1e65 100644 --- a/docs/manpage.rst +++ b/docs/manpage.rst @@ -632,6 +632,9 @@ Options controlling ReFrame execution - Sequence types: ``-S seqvar=1,2,3,4`` - Mapping types: ``-S mapvar=a:1,b:2,c:3`` + Nested mapping types can also be converted using JSON syntax. + For example, the :attr:`~reframe.core.pipeline.RegressionTest.extra_resources` complex dictionary could be set with ``-S extra_resources='{"gpu": {"num_gpus_per_node":8}}'``. + Conversions to arbitrary objects are also supported. See :class:`~reframe.utility.typecheck.ConvertibleType` for more details. From 1dfd632cb881cc60bbdf11f9f4c59b6b865e4dcc Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Mon, 3 Jul 2023 23:16:02 +0300 Subject: [PATCH 08/61] Add version annotation in docs --- docs/manpage.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/manpage.rst b/docs/manpage.rst index 1f8b5c1e65..703b51d79f 100644 --- a/docs/manpage.rst +++ b/docs/manpage.rst @@ -690,6 +690,9 @@ Options controlling ReFrame execution Allow setting variables in fixtures. + .. versionchanged:: 4.4 + + Allow setting nested mapping types using JSON syntax. .. option:: --skip-performance-check From 53ee447060d0660536cc25ee5eae7d033815fc39 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Mon, 3 Jul 2023 23:17:47 +0300 Subject: [PATCH 09/61] Bump dev version --- reframe/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reframe/__init__.py b/reframe/__init__.py index 0f140ebfce..35d1051326 100644 --- a/reframe/__init__.py +++ b/reframe/__init__.py @@ -6,7 +6,7 @@ import os import sys -VERSION = '4.3.0' +VERSION = '4.4.0-dev.0' INSTALL_PREFIX = os.path.normpath( os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) ) From 7fe90f3869f0103065b40422f02f70f7cdbe75f9 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Fri, 14 Jul 2023 12:39:49 +0300 Subject: [PATCH 10/61] Support for custom schedulers --- reframe/schemas/config.json | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/reframe/schemas/config.json b/reframe/schemas/config.json index 59a1d8f9ec..c699062426 100644 --- a/reframe/schemas/config.json +++ b/reframe/schemas/config.json @@ -116,8 +116,7 @@ "items": {"type": "string"} }, "use_nodes_option": {"type": "boolean"} - }, - "additionalProperties": false + } }, "stream_handler": { "allOf": [ @@ -275,11 +274,7 @@ "name": {"$ref": "#/defs/alphanum_ext_string"}, "descr": {"type": "string"}, "scheduler": { - "type": "string", - "enum": [ - "flux", "local", "lsf", "oar", "pbs", - "sge", "slurm", "squeue", "torque" - ] + "type": "string" }, "sched_options": {"$ref": "#/defs/sched_options"}, "launcher": { From aa8605acd9833ea4205b4401e8a4a9df89d2d3a7 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Wed, 12 Jul 2023 19:03:01 +0300 Subject: [PATCH 11/61] Add an SSH scheduler for running remote jobs The following are added: - Implementation of a future that wraps a spawned process - A new scheduler that can spawn reframe jobs on a remote machine accessed with SSH. --- reframe/core/backends.py | 3 +- reframe/core/schedulers/__init__.py | 11 ++ reframe/core/schedulers/local.py | 9 +- reframe/core/schedulers/ssh.py | 219 ++++++++++++++++++++++++++++ reframe/schemas/config.json | 7 +- reframe/utility/osext.py | 149 ++++++++++++++++++- unittests/test_utility.py | 176 ++++++++++++++++++++++ 7 files changed, 565 insertions(+), 9 deletions(-) create mode 100644 reframe/core/schedulers/ssh.py diff --git a/reframe/core/backends.py b/reframe/core/backends.py index a8720c2ce7..6a2924bc87 100644 --- a/reframe/core/backends.py +++ b/reframe/core/backends.py @@ -23,7 +23,8 @@ 'reframe.core.schedulers.pbs', 'reframe.core.schedulers.oar', 'reframe.core.schedulers.sge', - 'reframe.core.schedulers.slurm' + 'reframe.core.schedulers.slurm', + 'reframe.core.schedulers.ssh' ] _schedulers = {} diff --git a/reframe/core/schedulers/__init__.py b/reframe/core/schedulers/__init__.py index e21e33e56e..781bb031bb 100644 --- a/reframe/core/schedulers/__init__.py +++ b/reframe/core/schedulers/__init__.py @@ -627,3 +627,14 @@ def in_state(self, state): :returns: :class:`True` if the nodes's state matches the given one, :class:`False` otherwise. ''' + +class AlwaysIdleNode(Node): + def __init__(self, name): + self._name = name + + @property + def name(self): + return self._name + + def in_state(self, state): + return state.casefold() == 'idle' diff --git a/reframe/core/schedulers/local.py b/reframe/core/schedulers/local.py index 5ace2c917a..a6d969ff7c 100644 --- a/reframe/core/schedulers/local.py +++ b/reframe/core/schedulers/local.py @@ -12,11 +12,7 @@ import reframe.core.schedulers as sched import reframe.utility.osext as osext from reframe.core.backends import register_scheduler -from reframe.core.exceptions import JobError, ReframeError - - -class _TimeoutExpired(ReframeError): - pass +from reframe.core.exceptions import JobError class _LocalJob(sched.Job): @@ -27,6 +23,7 @@ def __init__(self, *args, **kwargs): self._f_stderr = None self._signal = None self._cancel_time = None + self.spawn_command = f'./{self._script_filename}' @property def proc(self): @@ -66,7 +63,7 @@ def submit(self, job): # we can later kill any other processes that this might spawn by just # killing this one. proc = osext.run_command_async( - os.path.abspath(job.script_filename), + job.spawn_command, stdout=f_stdout, stderr=f_stderr, start_new_session=True diff --git a/reframe/core/schedulers/ssh.py b/reframe/core/schedulers/ssh.py new file mode 100644 index 0000000000..e99b3e467a --- /dev/null +++ b/reframe/core/schedulers/ssh.py @@ -0,0 +1,219 @@ +# Copyright 2016-2023 Swiss National Supercomputing Centre (CSCS/ETH Zurich) +# ReFrame Project Developers. See the top-level LICENSE file for details. +# +# SPDX-License-Identifier: BSD-3-Clause + +import os +import functools +import time + +import reframe.utility.osext as osext +from reframe.core.backends import register_scheduler +from reframe.core.exceptions import ConfigError, SpawnedProcessError +from reframe.core.schedulers import Job, JobScheduler, AlwaysIdleNode + + +class _SSHJob(Job): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._localdir = None + self._remotedir = None + self._host = None + self._ssh_options = [] + + # Async processes spawned for this job + self.steps = {} + + @property + def localdir(self): + return self._localdir + + @property + def remotedir(self): + return self._remotedir + + @property + def host(self): + return self._host + + @property + def ssh_options(self): + return self._ssh_options + +@register_scheduler('ssh') +class SSHJobScheduler(JobScheduler): + def __init__(self): + self._free_hosts = set(self.get_option('hosts')) + self._allocated_hosts = set() + if not self._free_hosts: + raise ConfigError(f'no hosts specified for the SSH scheduler: {self._config_prefix}') + + # Determine if rsync is available + try: + osext.run_command('rsync --version', check=True) + except SpawnedProcessError: + self._has_rsync = False + else: + self._has_rsync = True + + def _reserve_host(self, host=None): + pool = self._free_hosts if self._free_hosts else self._allocated_hosts + if host: + pool.discard(host) + self._allocated_hosts.add(host) + return host + + host = pool.pop() + self._allocated_hosts.add(host) + return host + + def make_job(self, *args, **kwargs): + return _SSHJob(*args, **kwargs) + + def emit_preamble(self, job): + return [] + + def _push_artefacts(self, job): + assert isinstance(job, _SSHJob) + options = ' '.join(job.ssh_options) + + # Create a temporary directory on the remote host and push the job artifacts + completed = osext.run_command(f'ssh -o BatchMode=yes {options} {job.host} mktemp -td rfm.XXXXXXXX', check=True) + remotedir = completed.stdout.strip() + + # Store the local and remote dirs + job._localdir = os.getcwd() + job._remotedir = remotedir + + if self._has_rsync: + job.steps['push'] = osext.run_command_async2( + f'rsync -az -e "ssh -o BatchMode=yes {options}" {job.localdir}/ {job.host}:{remotedir}/', check=True + ) + else: + job.steps['push'] = osext.run_command_async2( + f'scp -r -o BatchMode=yes {options} {job.localdir}/* {job.host}:{remotedir}/', shell=True, check=True + ) + + + def _pull_artefacts(self, job): + assert isinstance(job, _SSHJob) + options = ' '.join(job.ssh_options) + if self._has_rsync: + job.steps['pull'] = osext.run_command_async2( + f'rsync -az -e "ssh -o BatchMode=yes {options}" {job.host}:{job.remotedir}/ {job.localdir}/' + ) + else: + job.steps['pull'] = osext.run_command_async2( + f"scp -r -o BatchMode=yes {options} '{job.host}:{job.remotedir}/*' {job.localdir}/", shell=True + ) + + def _do_submit(self, job): + # Modify the spawn command and submit + options = ' '.join(job.ssh_options) + job.steps['exec'] = osext.run_command_async2( + f'ssh -o BatchMode=yes {options} {job.host} "cd {job.remotedir} && bash -l {job.script_filename}"' + ) + + def submit(self, job): + assert isinstance(job, _SSHJob) + + # Check if `#host` pseudo-option is specified and use this as a host, + # stripping it off the rest of the options + host = None + stripped_opts = [] + options = job.sched_access + job.options + job.cli_options + for opt in options: + if opt.startswith('#host='): + _, host = opt.split('=', maxsplit=1) + else: + stripped_opts.append(opt) + + # Host is pinned externally (`--distribute` option) + if job.pin_nodes: + host = job.pin_nodes[0] + + job._submit_time = time.time() + job._ssh_options = stripped_opts + job._host = self._reserve_host(host) + + self._push_artefacts(job) + self._do_submit(job) + self._pull_artefacts(job) + + def success(proc): + return proc.exitcode == 0 + + job.steps['push'].then( + job.steps['exec'], + when=success + ).then( + job.steps['pull'], + when=success + ) + job.steps['push'].start() + job._jobid = job.steps['push'].pid + + def wait(self, job): + for step in job.steps.values(): + if step.started(): + step.wait() + + def cancel(self, job): + for step in job.steps.values(): + if step.started(): + step.cancel() + + def finished(self, job): + if job.exception: + raise job.exception + + return job.state is not None + + def poll(self, *jobs): + for job in jobs: + self._poll_job(job) + + def _poll_job(self, job): + last_done = None + last_failed = None + for proc_kind, proc in job.steps.items(): + if proc.started() and proc.done(): + last_done = proc_kind + if proc.exitcode != 0: + last_failed = proc_kind + break + + if last_failed is None and last_done != 'pull': + return False + + # Either all processes were done or one failed + # Update the job info + last_proc = job.steps[last_done] + job._exitcode = last_proc.exitcode + job._exception = last_proc.exception() + job._signal = last_proc.signal + if job._exitcode == 0: + job._state = 'SUCCESS' + else: + job._state = 'FAILURE' + + exec_proc = job.steps['exec'] + if exec_proc.started(): + with osext.change_dir(job.localdir): + with open(job.stdout, 'w+') as fout, open(job.stderr, 'w+') as ferr: + fout.write(exec_proc.stdout.read()) + ferr.write(exec_proc.stderr.read()) + + return True + + def allnodes(self): + return [AlwaysIdleNode(h) for h in self._free_hosts] + + def filternodes(self, job, nodes): + options = job.sched_access + job.options + job.cli_options + for opt in options: + if opt.startswith('#host='): + _, host = opt.split('=', maxsplit=1) + return [AlwaysIdleNode(host)] + else: + return [AlwaysIdleNode(h) for h in self._free_hosts] diff --git a/reframe/schemas/config.json b/reframe/schemas/config.json index 59a1d8f9ec..c1e6cfdfd9 100644 --- a/reframe/schemas/config.json +++ b/reframe/schemas/config.json @@ -109,6 +109,10 @@ "sched_options": { "type": "object", "properties": { + "hosts": { + "type": "array", + "items": {"type": "string"} + }, "ignore_reqnodenotavail": {"type": "boolean"}, "job_submit_timeout": {"type": "number"}, "resubmit_on_errors": { @@ -278,7 +282,7 @@ "type": "string", "enum": [ "flux", "local", "lsf", "oar", "pbs", - "sge", "slurm", "squeue", "torque" + "sge", "slurm", "squeue", "ssh", "torque" ] }, "sched_options": {"$ref": "#/defs/sched_options"}, @@ -620,6 +624,7 @@ "systems/partitions/time_limit": null, "systems/partitions/devices": [], "systems/partitions/extras": {}, + "systems/*/sched_options/hosts": [], "systems*/sched_options/ignore_reqnodenotavail": false, "systems*/sched_options/job_submit_timeout": 60, "systems*/sched_options/resubmit_on_errors": [], diff --git a/reframe/utility/osext.py b/reframe/utility/osext.py index efd22ee639..207e90183e 100644 --- a/reframe/utility/osext.py +++ b/reframe/utility/osext.py @@ -29,6 +29,151 @@ from . import OrderedSet +class UnstartedProcError(ReframeError): + '''Raised when a process operation is attempted on an unstarted process future''' + +class _ProcFuture: + def __init__(self, check=False, *args, **kwargs): + self._proc = None + self._exitcode = None + self._signal = None + self._check = check + self._cmd_args = (args, kwargs) + self._next = [] + self._done_callbacks = [] + self._completed = False + self._cancelled = False + + def _check_started(self): + if not self.started(): + raise UnstartedProcError + + def start(self): + args, kwargs = self._cmd_args + self._proc = run_command_async(*args, **kwargs) + + if os.getsid(self._proc.pid) == self._proc.pid: + self._session = True + else: + self._session = False + + @property + def pid(self): + return self._proc.pid + + @property + def exitcode(self): + return self._exitcode + + @property + def signal(self): + return self._signal + + def cancelled(self): + return self._cancelled + + def scheduled(self): + return self._scheduled + + def is_session(self): + return self._session + + def kill(self, signum): + self._check_started() + kill_fn = os.killpg if self.is_session() else os.kill + kill_fn(self.pid, signum) + self._signal = signum + + def terminate(self): + self.kill(signal.SIGTERM) + + def cancel(self): + self._check_started() + if not self.cancelled(): + self.kill(signal.SIGKILL) + + self._cancelled = True + + def add_done_callback(self, func): + self._done_callbacks.append(func) + + def then(self, future, when=None): + if when is None: + when = lambda fut: True + + self._next.append((future, when)) + return future + + def started(self): + return self._proc is not None + + def _wait(self, *, nohang=False): + self._check_started() + if self._completed: + return True + + options = os.WNOHANG if nohang else 0 + try: + pid, status = os.waitpid(self.pid, options) + except OSError as e: + if e.errno == errno.ECHILD: + self._completed = True + return self._completed + else: + raise e + + if nohang and not pid: + return False + + if os.WIFEXITED(status): + self._exitcode = os.WEXITSTATUS(status) + elif os.WIFSIGNALED(status): + self._signal = os.WTERMSIG(status) + + self._completed = True + + # Call any done callbacks + for func in self._done_callbacks: + func(self) + + # Start the next futures in the chain + for fut, cond in self._next: + if cond(self): + fut.start() + + return self._completed + + def done(self): + self._check_started() + return self._wait(nohang=True) + + def wait(self): + self._wait() + + def exception(self): + self._wait() + if not self._check: + return + + if self._proc.returncode == 0: + return + + return SpawnedProcessError(self._proc.args, + self._proc.stdout.read(), + self._proc.stderr.read(), + self._proc.returncode) + + @property + def stdout(self): + self._wait() + return self._proc.stdout + + @property + def stderr(self): + self._wait() + return self._proc.stderr + + def run_command(cmd, check=False, timeout=None, **kwargs): '''Run command synchronously. @@ -102,7 +247,7 @@ def run_command_async(cmd, if log: from reframe.core.logging import getlogger - getlogger().debug2(f'[CMD] {cmd!r}') + getlogger().debug(f'[CMD] {cmd!r}') if isinstance(cmd, str) and not shell: cmd = shlex.split(cmd) @@ -115,6 +260,8 @@ def run_command_async(cmd, shell=shell, **popen_args) +def run_command_async2(*args, check=False, **kwargs): + return _ProcFuture(check, *args, **kwargs) def osuser(): '''Return the name of the current OS user. diff --git a/unittests/test_utility.py b/unittests/test_utility.py index b89236cea8..b2e995920a 100644 --- a/unittests/test_utility.py +++ b/unittests/test_utility.py @@ -6,6 +6,7 @@ import os import pytest import random +import signal import sys import time @@ -83,6 +84,181 @@ def test_command_async(): assert t_launch < 1 assert t_sleep >= 1 +def test_command_futures(): + proc = osext.run_command_async2('echo hello', shell=True) + + # Check that some operations cannot be performed on an unstarted future + with pytest.raises(osext.UnstartedProcError): + proc.done() + + with pytest.raises(osext.UnstartedProcError): + proc.cancel() + + with pytest.raises(osext.UnstartedProcError): + proc.terminate() + + with pytest.raises(osext.UnstartedProcError): + proc.wait() + + assert not proc.started() + proc.start() + assert proc.started() + assert proc.pid is not None + + # By default a process is not started as a new session + assert not proc.is_session() + + # stdout must block + assert proc.stdout.read() == 'hello\n' + assert proc.exitcode == 0 + assert proc.signal is None + + # Additional wait() should have no effect + proc.wait() + proc.wait() + + assert proc.done() + assert not proc.cancelled() + assert proc.exception() is None + + +def test_command_futures_callbacks(): + num_called = 0 + def _callback(_): + nonlocal num_called + num_called += 1 + + proc = osext.run_command_async2("echo hello", shell=True) + proc.add_done_callback(_callback) + proc.start() + while not proc.done(): + pass + + # Call explicitly more times + proc.done() + proc.done() + assert num_called == 1 + + +@pytest.fixture(params=['checked', 'unchecked']) +def _checked_cmd(request): + return request.param == 'checked' + +def test_command_futures_error(_checked_cmd): + proc = osext.run_command_async2("false", shell=True, check=_checked_cmd) + proc.start() + + # exception() blocks until the process is finished + if _checked_cmd: + assert isinstance(proc.exception(), SpawnedProcessError) + else: + assert proc.exception() is None + + assert proc.exitcode == 1 + assert proc.signal is None + +@pytest.fixture(params=['SIGINT', 'SIGTERM', 'SIGKILL']) +def _signal(request): + if request.param == 'SIGINT': + return signal.SIGINT + elif request.param == 'SIGTERM': + return signal.SIGTERM + elif request.param == 'SIGKILL': + return signal.SIGKILL + + assert 0 + +def test_command_futures_signal(_checked_cmd, _signal): + proc = osext.run_command_async2('sleep 3', shell=True, check=_checked_cmd) + proc.start() + if _signal == signal.SIGTERM: + proc.terminate() + elif _signal == signal.SIGKILL: + proc.cancel() + else: + proc.kill(_signal) + + proc.wait() + assert proc.done() + if _signal == signal.SIGKILL: + assert proc.cancelled() + else: + assert not proc.cancelled() + + assert proc.signal == _signal + assert proc.exitcode is None + if _checked_cmd: + assert isinstance(proc.exception(), SpawnedProcessError) + else: + assert proc.exception() is None + +def test_command_futures_chain(tmp_path): + with open(tmp_path / 'stdout.txt', 'w+') as fp: + proc0 = osext.run_command_async2('echo hello', shell=True, stdout=fp) + proc1 = osext.run_command_async2('sleep 1', shell=True, stdout=fp) + proc2 = osext.run_command_async2('sleep 1', shell=True, stdout=fp) + proc3 = osext.run_command_async2('echo world', shell=True, stdout=fp) + proc0.then(proc1) + proc0.then(proc2).then(proc3) + + all_procs = [proc0, proc1, proc2, proc3] + t_start = time.time() + proc0.start() + while not all(p.done() for p in all_procs if p.started()): + pass + + t_elapsed = time.time() - t_start + assert t_elapsed < 2 + assert all(p.done() for p in all_procs) + + with open(tmp_path / 'stdout.txt') as fp: + assert fp.read() == 'hello\nworld\n' + +@pytest.fixture(params=['fail_on_error', 'ignore_errors']) +def _chain_policy(request): + return request.param + +def test_command_futures_chain_cond(_chain_policy, tmp_path): + if _chain_policy == 'fail_on_error': + def cond(proc): + return proc.exitcode == 0 + else: + def cond(proc): + return True + + with open(tmp_path / 'stdout.txt', 'w+') as fp: + proc0 = osext.run_command_async2("echo hello", shell=True, stdout=fp) + proc1 = osext.run_command_async2("false", shell=True) + proc2 = osext.run_command_async2("echo world", shell=True, stdout=fp) + proc0.then(proc1).then(proc2, when=cond) + proc0.start() + proc0.wait() + proc1.wait() + if _chain_policy == 'fail_on_error': + assert not proc2.started() + else: + proc2.wait() + + with open(tmp_path / 'stdout.txt') as fp: + if _chain_policy == 'fail_on_error': + assert fp.read() == 'hello\n' + else: + assert fp.read() == 'hello\nworld\n' + + +def test_command_futures_chain_cancel(): + proc0 = osext.run_command_async2('echo hello', shell=True) + proc1 = osext.run_command_async2('sleep 1', shell=True) + proc2 = osext.run_command_async2('echo world', shell=True) + proc0.then(proc1).then(proc2) + proc0.start() + while not proc0.done(): + pass + + assert proc1.started() + proc1.cancel() + assert proc1.cancelled() + assert not proc2.started() def test_copytree(tmp_path): dir_src = tmp_path / 'src' From aa91c1345291807d5b80ba5bf5533b4f36378b53 Mon Sep 17 00:00:00 2001 From: Victor Holanda Date: Fri, 21 Jul 2023 14:04:43 +0200 Subject: [PATCH 12/61] Add host_keys_check test This test checks the ssh host keys age. The list of host keys and the maximum age allowed are parameters and variables of the test. If the key file is not found, the test is skipped --- hpctestlib/system/ssh/host_keys.py | 66 ++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 hpctestlib/system/ssh/host_keys.py diff --git a/hpctestlib/system/ssh/host_keys.py b/hpctestlib/system/ssh/host_keys.py new file mode 100644 index 0000000000..eabb470b03 --- /dev/null +++ b/hpctestlib/system/ssh/host_keys.py @@ -0,0 +1,66 @@ +# Copyright 2016-2022 Swiss National Supercomputing Centre (CSCS/ETH Zurich) +# ReFrame Project Developers. See the top-level LICENSE file for details. +# +# SPDX-License-Identifier: BSD-3-Clause + +import time + +from datetime import timedelta + +import reframe as rfm +import reframe.utility.sanity as sn +import reframe.utility.typecheck as typ + + +@rfm.simple_test +class host_keys_check(rfm.RunOnlyRegressionTest): + '''host keys age check + + The host keys should be renewed regularly. + In this case, we are checking against the + max_key_age variable + ''' + + #: Parameter list with all host keys to check + #: + #: The test skips if a key is not found + #: + #: :type: :class:`str` + #: :values: ``['/etc/ssh/ssh_host_rsa_key', + #: '/etc/ssh/ssh_host_ecdsa_key', + #: '/etc/ssh/ssh_host_ed25519_key']`` + host_keys = parameter([ + '/etc/ssh/ssh_host_rsa_key', + '/etc/ssh/ssh_host_ecdsa_key', + '/etc/ssh/ssh_host_ed25519_key', + ], fmt=lambda x: x.split('_')[2], loggable=True) + + #: The max age of the keys in ReFrame duration format + #: + #: :type: :class:`str` + #: :default: ``'365d'`` + max_key_age = variable(str, value='365d', loggable=True) + + executable = 'stat' + executable_opts = ['-c', '%Y'] + tags = {'system', 'ssh'} + + @run_after('init') + def set_hosts_keys(self): + self.executable_opts += [self.host_keys] + + @sanity_function + def assert_file_age(self): + current_time = time.time() + + skip_me = sn.extractall('No such file or directory', self.stderr) + self.skip_if(skip_me, msg=f'Skipping test because {self.host_keys}' + f' was not found') + + return sn.all([ + sn.assert_lt(current_time - + sn.extractsingle(r'\d+', self.stdout, 0, int), + typ.Duration(self.max_key_age), + msg=f'File {self.host_keys} is older than ' + f'{self.max_key_age}') + ]) From 0398eaa0ebd5ef9d84892c0ed9962e3e789f2ff5 Mon Sep 17 00:00:00 2001 From: Victor Holanda Date: Fri, 21 Jul 2023 14:08:00 +0200 Subject: [PATCH 13/61] Remove unused import --- hpctestlib/system/ssh/host_keys.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/hpctestlib/system/ssh/host_keys.py b/hpctestlib/system/ssh/host_keys.py index eabb470b03..46f2e9b812 100644 --- a/hpctestlib/system/ssh/host_keys.py +++ b/hpctestlib/system/ssh/host_keys.py @@ -5,8 +5,6 @@ import time -from datetime import timedelta - import reframe as rfm import reframe.utility.sanity as sn import reframe.utility.typecheck as typ From 7e674efb7d86d360791f69bc1e7c5abaf5c74407 Mon Sep 17 00:00:00 2001 From: Victor Holanda Date: Tue, 25 Jul 2023 14:22:53 +0200 Subject: [PATCH 14/61] Address PR remarks --- hpctestlib/system/ssh/host_keys.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/hpctestlib/system/ssh/host_keys.py b/hpctestlib/system/ssh/host_keys.py index 46f2e9b812..642e054731 100644 --- a/hpctestlib/system/ssh/host_keys.py +++ b/hpctestlib/system/ssh/host_keys.py @@ -1,4 +1,4 @@ -# Copyright 2016-2022 Swiss National Supercomputing Centre (CSCS/ETH Zurich) +# Copyright 2016-2023 Swiss National Supercomputing Centre (CSCS/ETH Zurich) # ReFrame Project Developers. See the top-level LICENSE file for details. # # SPDX-License-Identifier: BSD-3-Clause @@ -11,23 +11,23 @@ @rfm.simple_test -class host_keys_check(rfm.RunOnlyRegressionTest): - '''host keys age check +class ssh_host_keys_check(rfm.RunOnlyRegressionTest): + '''SSH host keys age check - The host keys should be renewed regularly. + The ssh host keys should be renewed regularly. In this case, we are checking against the max_key_age variable ''' #: Parameter list with all host keys to check #: - #: The test skips if a key is not found + #: The test is skipped if a key is not found #: #: :type: :class:`str` #: :values: ``['/etc/ssh/ssh_host_rsa_key', #: '/etc/ssh/ssh_host_ecdsa_key', #: '/etc/ssh/ssh_host_ed25519_key']`` - host_keys = parameter([ + ssh_host_keys = parameter([ '/etc/ssh/ssh_host_rsa_key', '/etc/ssh/ssh_host_ecdsa_key', '/etc/ssh/ssh_host_ed25519_key', @@ -55,10 +55,8 @@ def assert_file_age(self): self.skip_if(skip_me, msg=f'Skipping test because {self.host_keys}' f' was not found') - return sn.all([ - sn.assert_lt(current_time - + return sn.assert_lt(current_time - sn.extractsingle(r'\d+', self.stdout, 0, int), typ.Duration(self.max_key_age), msg=f'File {self.host_keys} is older than ' f'{self.max_key_age}') - ]) From 88fcf5d497b0d851ddfc291703505d9e048283a2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 1 Aug 2023 15:47:48 +0000 Subject: [PATCH 15/61] Bump pyyaml from 6.0 to 6.0.1 Bumps [pyyaml](https://github.com/yaml/pyyaml) from 6.0 to 6.0.1. - [Changelog](https://github.com/yaml/pyyaml/blob/6.0.1/CHANGES) - [Commits](https://github.com/yaml/pyyaml/compare/6.0...6.0.1) --- updated-dependencies: - dependency-name: pyyaml dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9469b49d56..f5e0b6f895 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,7 +9,7 @@ pytest-forked==1.6.0; python_version >= '3.7' pytest-parallel==0.1.1 pytest-rerunfailures==10.3; python_version == '3.6' pytest-rerunfailures==11.1.2; python_version >= '3.7' -PyYAML==6.0 +PyYAML==6.0.1 requests==2.27.1; python_version == '3.6' requests==2.31.0; python_version >= '3.7' semver==2.13.0; python_version == '3.6' From 6b38dabb05eed289ad316c7d059b5491ded189f0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 4 Aug 2023 07:59:35 +0000 Subject: [PATCH 16/61] Bump pytest-rerunfailures from 10.3 to 12.0 Bumps [pytest-rerunfailures](https://github.com/pytest-dev/pytest-rerunfailures) from 10.3 to 12.0. - [Changelog](https://github.com/pytest-dev/pytest-rerunfailures/blob/master/CHANGES.rst) - [Commits](https://github.com/pytest-dev/pytest-rerunfailures/compare/10.3...12.0) --- updated-dependencies: - dependency-name: pytest-rerunfailures dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index f5e0b6f895..43846ab461 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,7 @@ pytest-forked==1.4.0; python_version == '3.6' pytest-forked==1.6.0; python_version >= '3.7' pytest-parallel==0.1.1 pytest-rerunfailures==10.3; python_version == '3.6' -pytest-rerunfailures==11.1.2; python_version >= '3.7' +pytest-rerunfailures==12.0; python_version >= '3.7' PyYAML==6.0.1 requests==2.27.1; python_version == '3.6' requests==2.31.0; python_version >= '3.7' From 512d04ab794650afecd69d86feba2fce5af8e036 Mon Sep 17 00:00:00 2001 From: fenoyc Date: Fri, 4 Aug 2023 17:04:39 +0200 Subject: [PATCH 17/61] Ensure bjobs gets the right format On sites with non-default bjobs output format, ReFrame fails to detect the state of the job --- reframe/core/schedulers/lsf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/reframe/core/schedulers/lsf.py b/reframe/core/schedulers/lsf.py index 999b88f18c..26b376c4df 100644 --- a/reframe/core/schedulers/lsf.py +++ b/reframe/core/schedulers/lsf.py @@ -94,7 +94,8 @@ def poll(self, *jobs): return completed = _run_strict( - f'bjobs -noheader {" ".join(job.jobid for job in jobs)}' + 'bjobs -o "jobid: user:10 stat: queue:" -noheader ' + f'{" ".join(job.jobid for job in jobs)}' ) job_status = {} job_status_lines = completed.stdout.split('\n') From 6246f8ad31bef1fb4484e2beb8c58e639f13b1a5 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Thu, 24 Aug 2023 00:33:15 +0200 Subject: [PATCH 18/61] Allow users to specify extra LSF options --- docs/config_reference.rst | 8 ++++++++ reframe/core/schedulers/lsf.py | 4 +++- reframe/schemas/config.json | 5 +++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/docs/config_reference.rst b/docs/config_reference.rst index 4dbcab805a..4c5d48feb4 100644 --- a/docs/config_reference.rst +++ b/docs/config_reference.rst @@ -360,6 +360,14 @@ System Partition Configuration If timeout is reached, the test issuing that command will be marked as a failure. +.. py:attribute:: systems.partitions.sched_options.lsf_bjobs_extras + + :required: No + :default: ``[]`` + + List of extra options to be passed to the ``bjobs`` command of the LSF scheduler. + + .. py:attribute:: systems.partitions.sched_options.resubmit_on_errors :required: No diff --git a/reframe/core/schedulers/lsf.py b/reframe/core/schedulers/lsf.py index 26b376c4df..052f207503 100644 --- a/reframe/core/schedulers/lsf.py +++ b/reframe/core/schedulers/lsf.py @@ -93,8 +93,10 @@ def poll(self, *jobs): if not jobs: return + # Retrieve extra options + bjobs_opts = ' '.join(self.get_option('lsf_bjobs_extras')) completed = _run_strict( - 'bjobs -o "jobid: user:10 stat: queue:" -noheader ' + f'bjobs {bjobs_opts} -noheader ' f'{" ".join(job.jobid for job in jobs)}' ) job_status = {} diff --git a/reframe/schemas/config.json b/reframe/schemas/config.json index c699062426..9cce8906b8 100644 --- a/reframe/schemas/config.json +++ b/reframe/schemas/config.json @@ -111,6 +111,10 @@ "properties": { "ignore_reqnodenotavail": {"type": "boolean"}, "job_submit_timeout": {"type": "number"}, + "lsf_bjobs_extras": { + "type": "array", + "items": {"type": "string"} + }, "resubmit_on_errors": { "type": "array", "items": {"type": "string"} @@ -617,6 +621,7 @@ "systems/partitions/extras": {}, "systems*/sched_options/ignore_reqnodenotavail": false, "systems*/sched_options/job_submit_timeout": 60, + "systems*/sched_options/lsf_bjobs_extras": [], "systems*/sched_options/resubmit_on_errors": [], "systems*/sched_options/use_nodes_option": false } From 604a43d834db28b686e7faf9a1105070804d4a3d Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Fri, 25 Aug 2023 21:56:52 +0200 Subject: [PATCH 19/61] Revert "Allow users to specify extra LSF options" This reverts commit 6246f8ad31bef1fb4484e2beb8c58e639f13b1a5. --- docs/config_reference.rst | 8 -------- reframe/core/schedulers/lsf.py | 4 +--- reframe/schemas/config.json | 5 ----- 3 files changed, 1 insertion(+), 16 deletions(-) diff --git a/docs/config_reference.rst b/docs/config_reference.rst index 4c5d48feb4..4dbcab805a 100644 --- a/docs/config_reference.rst +++ b/docs/config_reference.rst @@ -360,14 +360,6 @@ System Partition Configuration If timeout is reached, the test issuing that command will be marked as a failure. -.. py:attribute:: systems.partitions.sched_options.lsf_bjobs_extras - - :required: No - :default: ``[]`` - - List of extra options to be passed to the ``bjobs`` command of the LSF scheduler. - - .. py:attribute:: systems.partitions.sched_options.resubmit_on_errors :required: No diff --git a/reframe/core/schedulers/lsf.py b/reframe/core/schedulers/lsf.py index 052f207503..26b376c4df 100644 --- a/reframe/core/schedulers/lsf.py +++ b/reframe/core/schedulers/lsf.py @@ -93,10 +93,8 @@ def poll(self, *jobs): if not jobs: return - # Retrieve extra options - bjobs_opts = ' '.join(self.get_option('lsf_bjobs_extras')) completed = _run_strict( - f'bjobs {bjobs_opts} -noheader ' + 'bjobs -o "jobid: user:10 stat: queue:" -noheader ' f'{" ".join(job.jobid for job in jobs)}' ) job_status = {} diff --git a/reframe/schemas/config.json b/reframe/schemas/config.json index 9cce8906b8..c699062426 100644 --- a/reframe/schemas/config.json +++ b/reframe/schemas/config.json @@ -111,10 +111,6 @@ "properties": { "ignore_reqnodenotavail": {"type": "boolean"}, "job_submit_timeout": {"type": "number"}, - "lsf_bjobs_extras": { - "type": "array", - "items": {"type": "string"} - }, "resubmit_on_errors": { "type": "array", "items": {"type": "string"} @@ -621,7 +617,6 @@ "systems/partitions/extras": {}, "systems*/sched_options/ignore_reqnodenotavail": false, "systems*/sched_options/job_submit_timeout": 60, - "systems*/sched_options/lsf_bjobs_extras": [], "systems*/sched_options/resubmit_on_errors": [], "systems*/sched_options/use_nodes_option": false } From de7a82a519e7b0dfc56f3e8f51cc52a36a5222c3 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Fri, 25 Aug 2023 23:03:39 +0200 Subject: [PATCH 20/61] Style fixes --- reframe/core/schedulers/ssh.py | 29 ++++++++++++++++++++--------- reframe/utility/osext.py | 8 ++++++-- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/reframe/core/schedulers/ssh.py b/reframe/core/schedulers/ssh.py index e99b3e467a..43fba82e9c 100644 --- a/reframe/core/schedulers/ssh.py +++ b/reframe/core/schedulers/ssh.py @@ -40,13 +40,15 @@ def host(self): def ssh_options(self): return self._ssh_options + @register_scheduler('ssh') class SSHJobScheduler(JobScheduler): def __init__(self): self._free_hosts = set(self.get_option('hosts')) self._allocated_hosts = set() if not self._free_hosts: - raise ConfigError(f'no hosts specified for the SSH scheduler: {self._config_prefix}') + raise ConfigError(f'no hosts specified for the SSH scheduler: ' + f'{self._config_prefix}') # Determine if rsync is available try: @@ -78,7 +80,10 @@ def _push_artefacts(self, job): options = ' '.join(job.ssh_options) # Create a temporary directory on the remote host and push the job artifacts - completed = osext.run_command(f'ssh -o BatchMode=yes {options} {job.host} mktemp -td rfm.XXXXXXXX', check=True) + completed = osext.run_command( + f'ssh -o BatchMode=yes {options} {job.host} ' + f'mktemp -td rfm.XXXXXXXX', check=True + ) remotedir = completed.stdout.strip() # Store the local and remote dirs @@ -87,31 +92,36 @@ def _push_artefacts(self, job): if self._has_rsync: job.steps['push'] = osext.run_command_async2( - f'rsync -az -e "ssh -o BatchMode=yes {options}" {job.localdir}/ {job.host}:{remotedir}/', check=True + f'rsync -az -e "ssh -o BatchMode=yes {options}" ' + f'{job.localdir}/ {job.host}:{remotedir}/', check=True ) else: job.steps['push'] = osext.run_command_async2( - f'scp -r -o BatchMode=yes {options} {job.localdir}/* {job.host}:{remotedir}/', shell=True, check=True + f'scp -r -o BatchMode=yes {options} ' + f'{job.localdir}/* {job.host}:{remotedir}/', + shell=True, check=True ) - def _pull_artefacts(self, job): assert isinstance(job, _SSHJob) options = ' '.join(job.ssh_options) if self._has_rsync: job.steps['pull'] = osext.run_command_async2( - f'rsync -az -e "ssh -o BatchMode=yes {options}" {job.host}:{job.remotedir}/ {job.localdir}/' + f'rsync -az -e "ssh -o BatchMode=yes {options}" ' + f'{job.host}:{job.remotedir}/ {job.localdir}/' ) else: job.steps['pull'] = osext.run_command_async2( - f"scp -r -o BatchMode=yes {options} '{job.host}:{job.remotedir}/*' {job.localdir}/", shell=True + f"scp -r -o BatchMode=yes {options} " + f"'{job.host}:{job.remotedir}/*' {job.localdir}/", shell=True ) def _do_submit(self, job): # Modify the spawn command and submit options = ' '.join(job.ssh_options) job.steps['exec'] = osext.run_command_async2( - f'ssh -o BatchMode=yes {options} {job.host} "cd {job.remotedir} && bash -l {job.script_filename}"' + f'ssh -o BatchMode=yes {options} {job.host} ' + f'"cd {job.remotedir} && bash -l {job.script_filename}"' ) def submit(self, job): @@ -200,7 +210,8 @@ def _poll_job(self, job): exec_proc = job.steps['exec'] if exec_proc.started(): with osext.change_dir(job.localdir): - with open(job.stdout, 'w+') as fout, open(job.stderr, 'w+') as ferr: + with (open(job.stdout, 'w+') as fout, + open(job.stderr, 'w+') as ferr): fout.write(exec_proc.stdout.read()) ferr.write(exec_proc.stderr.read()) diff --git a/reframe/utility/osext.py b/reframe/utility/osext.py index 207e90183e..a1f5c21a96 100644 --- a/reframe/utility/osext.py +++ b/reframe/utility/osext.py @@ -30,7 +30,9 @@ class UnstartedProcError(ReframeError): - '''Raised when a process operation is attempted on an unstarted process future''' + '''Raised when a process operation is attempted on a + not yet started process future''' + class _ProcFuture: def __init__(self, check=False, *args, **kwargs): @@ -99,7 +101,7 @@ def add_done_callback(self, func): def then(self, future, when=None): if when is None: - when = lambda fut: True + def when(fut): return True self._next.append((future, when)) return future @@ -260,9 +262,11 @@ def run_command_async(cmd, shell=shell, **popen_args) + def run_command_async2(*args, check=False, **kwargs): return _ProcFuture(check, *args, **kwargs) + def osuser(): '''Return the name of the current OS user. From ea8fa42e5a5b2b4663ab6403f8e13471c3f435fd Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Mon, 28 Aug 2023 00:17:26 +0200 Subject: [PATCH 21/61] Add docs about SSH scheduler --- docs/config_reference.rst | 27 ++++++++- reframe/core/schedulers/ssh.py | 4 +- reframe/utility/osext.py | 101 +++++++++++++++++++++++++++++++-- unittests/test_utility.py | 18 +++++- 4 files changed, 139 insertions(+), 11 deletions(-) diff --git a/docs/config_reference.rst b/docs/config_reference.rst index 4dbcab805a..2808244acc 100644 --- a/docs/config_reference.rst +++ b/docs/config_reference.rst @@ -260,8 +260,9 @@ System Partition Configuration The job scheduler that will be used to launch jobs on this partition. Supported schedulers are the following: - - ``local``: Jobs will be launched locally without using any job scheduler. - ``flux``: Jobs will be launched using the `Flux Framework `_ scheduler. + - ``local``: Jobs will be launched locally without using any job scheduler. + - ``lsf``: Jobs will be launched using the `LSF `__ scheduler. - ``oar``: Jobs will be launched using the `OAR `__ scheduler. - ``pbs``: Jobs will be launched using the `PBS Pro `__ scheduler. - ``sge``: Jobs will be launched using the `Sun Grid Engine `__ scheduler. @@ -270,8 +271,19 @@ System Partition Configuration If not, you should consider using the ``squeue`` backend below. - ``squeue``: Jobs will be launched using the `Slurm `__ scheduler. This backend does not rely on job accounting to retrieve job statuses, but ReFrame does its best to query the job state as reliably as possible. + - ``ssh``: Jobs will be launched on a remote host using SSH. + + The remote host will be selected from the list of hosts specified in :attr:`~systems.partitions.sched_options.hosts`. + The scheduler keeps track of the hosts that it has submitted jobs to, and it will select the next available one in a round-robin fashion. + For connecting to a remote host, the options specified in :attr:`~systems.partitions.access` will be used. + + When a job is submitted with this scheduler, its stage directory will be copied over to a unique temporary directory on the remote host, then the job will be executed and, finally, any produced artifacts will be copied back. + + The contents of the stage directory are copied to the remote host either using ``rsync``, if available, or ``scp`` as a second choice. + The same :attr:`~systems.partitions.access` options will be used in those operations as well. + Please note, that the connection options of ``ssh`` and ``scp`` differ and ReFrame will not attempt to translate any options between the two utilities in case ``scp`` is selected for copying to the remote host. + In this case, it is preferable to set up the host connection options in ``~/.ssh/config`` and leave :attr:`~systems.partition.access` blank. - ``torque``: Jobs will be launched using the `Torque `__ scheduler. - - ``lsf``: Jobs will be launched using the `LSF `__ scheduler. .. versionadded:: 3.7.2 Support for the SGE scheduler is added. @@ -282,6 +294,9 @@ System Partition Configuration .. versionadded:: 3.11.0 Support for the LSF scheduler is added. + .. versionadded:: 4.4 + The ``ssh`` scheduler is added. + .. note:: The way that multiple node jobs are submitted using the SGE scheduler can be very site-specific. @@ -337,6 +352,14 @@ System Partition Configuration .. warning:: This option is broken in 4.0. +.. py:attribute:: systems.partitions.sched_options.hosts + + :required: No + :default: ``[]`` + + List of hosts in a partition that uses the ``ssh`` scheduler. + + .. py:attribute:: systems.partitions.sched_options.ignore_reqnodenotavail :required: No diff --git a/reframe/core/schedulers/ssh.py b/reframe/core/schedulers/ssh.py index 43fba82e9c..dbfb678f24 100644 --- a/reframe/core/schedulers/ssh.py +++ b/reframe/core/schedulers/ssh.py @@ -212,8 +212,8 @@ def _poll_job(self, job): with osext.change_dir(job.localdir): with (open(job.stdout, 'w+') as fout, open(job.stderr, 'w+') as ferr): - fout.write(exec_proc.stdout.read()) - ferr.write(exec_proc.stderr.read()) + fout.write(exec_proc.stdout().read()) + ferr.write(exec_proc.stderr().read()) return True diff --git a/reframe/utility/osext.py b/reframe/utility/osext.py index a1f5c21a96..ccc2d90365 100644 --- a/reframe/utility/osext.py +++ b/reframe/utility/osext.py @@ -35,6 +35,17 @@ class UnstartedProcError(ReframeError): class _ProcFuture: + '''A future encapsulating a command to be executed asynchronously. + + Users may not create a :class:`_ProcFuture` directly, but should use + :func:`run_command_async2` instead. + + :meta public: + + .. versionadded:: 4.4 + + ''' + def __init__(self, check=False, *args, **kwargs): self._proc = None self._exitcode = None @@ -51,6 +62,8 @@ def _check_started(self): raise UnstartedProcError def start(self): + '''Start the future, i.e. spawn the encapsulated command.''' + args, kwargs = self._cmd_args self._proc = run_command_async(*args, **kwargs) @@ -59,37 +72,50 @@ def start(self): else: self._session = False + return self + @property def pid(self): + '''The PID of the spawned process.''' return self._proc.pid @property def exitcode(self): + '''The exit code of the spawned process.''' return self._exitcode @property def signal(self): + '''The signal number that caused the spawned process to exit.''' return self._signal def cancelled(self): + '''Returns :obj:`True` if the future was cancelled.''' return self._cancelled - def scheduled(self): - return self._scheduled - def is_session(self): + '''Returns :obj:`True` is the spawned process is a group or session + leader.''' return self._session def kill(self, signum): + '''Send signal ``signum`` to the spawned process. + + If the process is a group or session leader, the signal will be sent + to the whole group or session. + ''' + self._check_started() kill_fn = os.killpg if self.is_session() else os.kill kill_fn(self.pid, signum) self._signal = signum def terminate(self): + '''Terminate the spawned process by sending ``SIGTERM``.''' self.kill(signal.SIGTERM) def cancel(self): + '''Cancel the spawned process by sending ``SIGKILL``.''' self._check_started() if not self.cancelled(): self.kill(signal.SIGKILL) @@ -97,16 +123,45 @@ def cancel(self): self._cancelled = True def add_done_callback(self, func): + '''Add a callback that will be called when this future is done. + + The callback function will be called with the future as its sole + argument. + ''' + if not util.is_trivially_callable(func, non_def_args=1): + raise ValueError('the callback function must ' + 'accept a single argument') + self._done_callbacks.append(func) def then(self, future, when=None): + '''Schedule another future for execution after this one. + + :arg future: a :class:`_ProcFuture` to be started after this one + finishes. + :arg when: A callable that will be used as conditional for starting or + not the next future. It will be called with this future as its + sole argument and must return a boolean. If the return value is + true, then ``future`` will start execution, otherwise not. + + If ``when`` is :obj:`None`, then the next future will be executed + unconditionally. + :returns: the passed ``future``, so that multiple :func:`then` calls + can be chained. + ''' + if when is None: def when(fut): return True + if not util.is_trivially_callable(when, non_def_args=1): + raise ValueError("the 'when' function must " + "accept a single argument") + self._next.append((future, when)) return future def started(self): + '''Check if this future has started.''' return self._proc is not None def _wait(self, *, nohang=False): @@ -146,13 +201,27 @@ def _wait(self, *, nohang=False): return self._completed def done(self): + '''Check if the future has finished. + + This is a non-blocking call. + ''' self._check_started() return self._wait(nohang=True) def wait(self): + '''Wait for this future to finish.''' self._wait() def exception(self): + '''Retrieve the exception raised by this future. + + This is a blocking call and will wait until this future finishes. + + The only exception that a :func:`_ProcFuture` can return is a + :class:`SpawnedProcessError` if the ``check`` flag was set in + :func:`run_command_async2`. + ''' + self._wait() if not self._check: return @@ -165,13 +234,19 @@ def exception(self): self._proc.stderr.read(), self._proc.returncode) - @property def stdout(self): + '''Retrieve the standard output of the spawned process. + + This is a blocking call and will wait until the future finishes. + ''' self._wait() return self._proc.stdout - @property def stderr(self): + '''Retrieve the standard error of the spawned process. + + This is a blocking call and will wait until the future finishes. + ''' self._wait() return self._proc.stderr @@ -264,6 +339,22 @@ def run_command_async(cmd, def run_command_async2(*args, check=False, **kwargs): + '''Return a :class:`_ProcFuture` that encapsulates a command to be + executed. + + The command to be executed will not start until the returned future is + started. + + :arg args: Any of the arguments that can be passed to + :func:`run_command_async` + :arg check: If true, flag the future with a :func:`SpawnedProcessError` + exception, upon failure. + :arg kwargs: Any of the keyword arguments that can be passed to + :func:`run_command_async`. + + .. versionadded:: 4.4 + + ''' return _ProcFuture(check, *args, **kwargs) diff --git a/unittests/test_utility.py b/unittests/test_utility.py index b2e995920a..c3d33c6101 100644 --- a/unittests/test_utility.py +++ b/unittests/test_utility.py @@ -84,6 +84,7 @@ def test_command_async(): assert t_launch < 1 assert t_sleep >= 1 + def test_command_futures(): proc = osext.run_command_async2('echo hello', shell=True) @@ -109,7 +110,7 @@ def test_command_futures(): assert not proc.is_session() # stdout must block - assert proc.stdout.read() == 'hello\n' + assert proc.stdout().read() == 'hello\n' assert proc.exitcode == 0 assert proc.signal is None @@ -124,12 +125,16 @@ def test_command_futures(): def test_command_futures_callbacks(): num_called = 0 + def _callback(_): nonlocal num_called num_called += 1 proc = osext.run_command_async2("echo hello", shell=True) proc.add_done_callback(_callback) + with pytest.raises(ValueError): + proc.add_done_callback(lambda: 1) + proc.start() while not proc.done(): pass @@ -144,6 +149,7 @@ def _callback(_): def _checked_cmd(request): return request.param == 'checked' + def test_command_futures_error(_checked_cmd): proc = osext.run_command_async2("false", shell=True, check=_checked_cmd) proc.start() @@ -157,6 +163,7 @@ def test_command_futures_error(_checked_cmd): assert proc.exitcode == 1 assert proc.signal is None + @pytest.fixture(params=['SIGINT', 'SIGTERM', 'SIGKILL']) def _signal(request): if request.param == 'SIGINT': @@ -168,6 +175,7 @@ def _signal(request): assert 0 + def test_command_futures_signal(_checked_cmd, _signal): proc = osext.run_command_async2('sleep 3', shell=True, check=_checked_cmd) proc.start() @@ -192,6 +200,7 @@ def test_command_futures_signal(_checked_cmd, _signal): else: assert proc.exception() is None + def test_command_futures_chain(tmp_path): with open(tmp_path / 'stdout.txt', 'w+') as fp: proc0 = osext.run_command_async2('echo hello', shell=True, stdout=fp) @@ -200,7 +209,6 @@ def test_command_futures_chain(tmp_path): proc3 = osext.run_command_async2('echo world', shell=True, stdout=fp) proc0.then(proc1) proc0.then(proc2).then(proc3) - all_procs = [proc0, proc1, proc2, proc3] t_start = time.time() proc0.start() @@ -214,10 +222,12 @@ def test_command_futures_chain(tmp_path): with open(tmp_path / 'stdout.txt') as fp: assert fp.read() == 'hello\nworld\n' + @pytest.fixture(params=['fail_on_error', 'ignore_errors']) def _chain_policy(request): return request.param + def test_command_futures_chain_cond(_chain_policy, tmp_path): if _chain_policy == 'fail_on_error': def cond(proc): @@ -231,6 +241,9 @@ def cond(proc): proc1 = osext.run_command_async2("false", shell=True) proc2 = osext.run_command_async2("echo world", shell=True, stdout=fp) proc0.then(proc1).then(proc2, when=cond) + with pytest.raises(ValueError): + proc0.then(proc1, when=lambda: False) + proc0.start() proc0.wait() proc1.wait() @@ -260,6 +273,7 @@ def test_command_futures_chain_cancel(): assert proc1.cancelled() assert not proc2.started() + def test_copytree(tmp_path): dir_src = tmp_path / 'src' dir_src.mkdir() From b3a7a26f49f223f8ceca5aabec61357d3c9c7f8d Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Mon, 28 Aug 2023 22:54:21 +0200 Subject: [PATCH 22/61] Fix unit tests --- reframe/core/schedulers/local.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/reframe/core/schedulers/local.py b/reframe/core/schedulers/local.py index a6d969ff7c..09d671f43e 100644 --- a/reframe/core/schedulers/local.py +++ b/reframe/core/schedulers/local.py @@ -23,7 +23,6 @@ def __init__(self, *args, **kwargs): self._f_stderr = None self._signal = None self._cancel_time = None - self.spawn_command = f'./{self._script_filename}' @property def proc(self): @@ -63,7 +62,7 @@ def submit(self, job): # we can later kill any other processes that this might spawn by just # killing this one. proc = osext.run_command_async( - job.spawn_command, + os.path.abspath(job.script_filename), stdout=f_stdout, stderr=f_stderr, start_new_session=True From 47b15fb58f8963a8c332d07fa89e9986d9dda128 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Tue, 29 Aug 2023 00:03:08 +0200 Subject: [PATCH 23/61] Remove parenthesized with stmt --- reframe/core/schedulers/ssh.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/reframe/core/schedulers/ssh.py b/reframe/core/schedulers/ssh.py index dbfb678f24..41be9cfd04 100644 --- a/reframe/core/schedulers/ssh.py +++ b/reframe/core/schedulers/ssh.py @@ -210,9 +210,10 @@ def _poll_job(self, job): exec_proc = job.steps['exec'] if exec_proc.started(): with osext.change_dir(job.localdir): - with (open(job.stdout, 'w+') as fout, - open(job.stderr, 'w+') as ferr): + with open(job.stdout, 'w+') as fout: fout.write(exec_proc.stdout().read()) + + with open(job.stderr, 'w+') as ferr: ferr.write(exec_proc.stderr().read()) return True From d150b87b885510dd246869de5588d6692166834a Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Tue, 29 Aug 2023 22:46:10 +0200 Subject: [PATCH 24/61] Accept `y` and `n` for boolean conversions --- reframe/utility/typecheck.py | 15 +++++++++++---- unittests/test_typecheck.py | 12 ++++++------ 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/reframe/utility/typecheck.py b/reframe/utility/typecheck.py index 9f27234e88..45c2894e6f 100644 --- a/reframe/utility/typecheck.py +++ b/reframe/utility/typecheck.py @@ -374,8 +374,9 @@ class Bool(metaclass=_BuiltinType): This type represents a boolean value but allows implicit conversions from :class:`str`. More specifically, the following conversions are supported: - - The strings ``'yes'``, ``'true'`` and ``'1'`` are converted to ``True``. - - The strings ``'no'``, ``'false'`` and ``'0'`` are converted to + - The strings ``'yes'``, ``'y'``, 'true'`` and ``'1'`` are converted to + ``True``. + - The strings ``'no'``, ``'n'``, ``'false'`` and ``'0'`` are converted to ``False``. The built-in :class:`bool` type is registered as a subclass of this type. @@ -383,15 +384,21 @@ class Bool(metaclass=_BuiltinType): Boolean test variables that are meant to be set properly from the command line must be declared of this type and not :class:`bool`. + .. versionchanged:: 4.3.3 + + The strings ``'y'`` and ``'n'`` are also recognized as valid boolean + values and string comparison is now case-insensitive. + ''' _type = bool @classmethod def __rfm_cast_str__(cls, s): - if s in ('true', 'yes', '1'): + s = s.lower() + if s in ('true', 'yes', 'y', '1'): return True - elif s in ('false', 'no', '0'): + elif s in ('false', 'no', 'n', '0'): return False raise TypeError(f'cannot convert {s!r} to bool') diff --git a/unittests/test_typecheck.py b/unittests/test_typecheck.py index f640a9d7df..4873d6dc71 100644 --- a/unittests/test_typecheck.py +++ b/unittests/test_typecheck.py @@ -27,17 +27,17 @@ def test_bool_type(): with pytest.raises(TypeError): typ.Bool('foo') - with pytest.raises(TypeError): - typ.Bool('True') - - with pytest.raises(TypeError): - typ.Bool('False') - # Test for boolean conversion assert typ.Bool('true') is True + assert typ.Bool('True') is True assert typ.Bool('yes') is True + assert typ.Bool('y') is True + assert typ.Bool('YeS') is True assert typ.Bool('false') is False + assert typ.Bool('False') is False assert typ.Bool('no') is False + assert typ.Bool('n') is False + assert typ.Bool('nO') is False def test_duration_type(): From ef47bf2192c3df981fb4fab077f7a31bedbbff9f Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Tue, 29 Aug 2023 23:09:03 +0200 Subject: [PATCH 25/61] Document default value of `format_vars` --- docs/config_reference.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/config_reference.rst b/docs/config_reference.rst index 4dbcab805a..cbb9373032 100644 --- a/docs/config_reference.rst +++ b/docs/config_reference.rst @@ -1095,6 +1095,9 @@ All logging handlers share the following set of common attributes: .. py:attribute:: logging.handlers.format_perfvars .. py:attribute:: logging.handlers_perflog.format_perfvars + :required: No + :default: ``""`` + Format specifier for logging the performance variables. This defines how the ``%(check_perfvalues)s`` will be formatted. From b9533056bd884b37e36d24903d65e645696523de Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Thu, 31 Aug 2023 22:30:35 +0200 Subject: [PATCH 26/61] Remove parallel launcher from remote detection --- reframe/frontend/autodetect.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/reframe/frontend/autodetect.py b/reframe/frontend/autodetect.py index 0ad927a765..e1e523f4c7 100644 --- a/reframe/frontend/autodetect.py +++ b/reframe/frontend/autodetect.py @@ -125,10 +125,9 @@ def _is_part_local(part): def _remote_detect(part): def _emit_script(job, env): - launcher_cmd = job.launcher.run_command(job) commands = [ f'./bootstrap.sh', - f'{launcher_cmd} ./bin/reframe --detect-host-topology=topo.json' + f'./bin/reframe --detect-host-topology=topo.json' ] job.prepare(commands, env, trap_errors=True) From 84f95c7c48d10e89f0a135ead777b45518d17a41 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Thu, 31 Aug 2023 22:45:05 +0200 Subject: [PATCH 27/61] Fix remote detection for pip installations --- reframe/frontend/autodetect.py | 46 +++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/reframe/frontend/autodetect.py b/reframe/frontend/autodetect.py index e1e523f4c7..1adf4c401f 100644 --- a/reframe/frontend/autodetect.py +++ b/reframe/frontend/autodetect.py @@ -48,15 +48,19 @@ def __enter__(self): tempfile.mkdtemp(prefix='rfm.', dir=self._prefix) ) paths = ['bin/', 'reframe/', 'bootstrap.sh', 'requirements.txt'] - for p in paths: - src = os.path.join(rfm.INSTALL_PREFIX, p) - if os.path.isdir(src): - dst = os.path.join(self._workdir, p) - osext.copytree(src, dst, dirs_exist_ok=True) - else: - shutil.copy2(src, self._workdir) - - return self._workdir + use_pip = False + try: + for p in paths: + src = os.path.join(rfm.INSTALL_PREFIX, p) + if os.path.isdir(src): + dst = os.path.join(self._workdir, p) + osext.copytree(src, dst, dirs_exist_ok=True) + else: + shutil.copy2(src, self._workdir) + except OSError: + use_pip = True + + return self._workdir, use_pip def __exit__(self, exc_type, exc_val, exc_tb): osext.rmtree(self._workdir) @@ -124,10 +128,20 @@ def _is_part_local(part): def _remote_detect(part): - def _emit_script(job, env): + def _emit_script_for_source(job, env): + commands = [ + './bootstrap.sh', + './bin/reframe --detect-host-topology=topo.json' + ] + job.prepare(commands, env, trap_errors=True) + + def _emit_script_for_pip(job, env): commands = [ - f'./bootstrap.sh', - f'./bin/reframe --detect-host-topology=topo.json' + 'python3 -m venv venv.reframe', + 'source venv.reframe/bin/activate', + 'pip install reframe-hpc', + 'reframe --detect-host-topology=topo.json', + 'deactivate' ] job.prepare(commands, env, trap_errors=True) @@ -138,13 +152,17 @@ def _emit_script(job, env): topo_info = {} try: prefix = runtime.runtime().get_option('general/0/remote_workdir') - with _copy_reframe(prefix) as dirname: + with _copy_reframe(prefix) as (dirname, use_pip): with osext.change_dir(dirname): job = Job.create(part.scheduler, part.launcher_type(), name='rfm-detect-job', sched_access=part.access) - _emit_script(job, [part.local_env]) + if use_pip: + _emit_script_for_pip(job, [part.local_env]) + else: + _emit_script_for_source(job, [part.local_env]) + getlogger().debug('submitting detection script') _log_contents(job.script_filename) job.submit() From 7f58c22b8c6f5d93b561cd27bc9778b2658e3d13 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 1 Sep 2023 15:12:08 +0000 Subject: [PATCH 28/61] Bump setuptools from 59.6.0 to 68.1.2 Bumps [setuptools](https://github.com/pypa/setuptools) from 59.6.0 to 68.1.2. - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v59.6.0...v68.1.2) --- updated-dependencies: - dependency-name: setuptools dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 43846ab461..4d1fa08ec4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,6 +15,6 @@ requests==2.31.0; python_version >= '3.7' semver==2.13.0; python_version == '3.6' semver==3.0.1; python_version >= '3.7' setuptools==59.6.0; python_version == '3.6' -setuptools==68.0.0; python_version >= '3.7' +setuptools==68.1.2; python_version >= '3.7' wcwidth==0.2.6 #+pygelf%pygelf==0.4.0 From c8983c5f8ce76c46716781bc9170e91343b8d393 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Sat, 2 Sep 2023 00:24:39 +0200 Subject: [PATCH 29/61] Fine tune implementation --- reframe/frontend/autodetect.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/reframe/frontend/autodetect.py b/reframe/frontend/autodetect.py index 1adf4c401f..d4c75674f9 100644 --- a/reframe/frontend/autodetect.py +++ b/reframe/frontend/autodetect.py @@ -57,7 +57,7 @@ def __enter__(self): osext.copytree(src, dst, dirs_exist_ok=True) else: shutil.copy2(src, self._workdir) - except OSError: + except FileNotFoundError: use_pip = True return self._workdir, use_pip @@ -139,7 +139,7 @@ def _emit_script_for_pip(job, env): commands = [ 'python3 -m venv venv.reframe', 'source venv.reframe/bin/activate', - 'pip install reframe-hpc', + f'pip install reframe-hpc=={rfm.VERSION}', 'reframe --detect-host-topology=topo.json', 'deactivate' ] From e0f757bc853bc4ce5cc1dcafeb0ed05b81361cda Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Sun, 3 Sep 2023 21:11:28 +0200 Subject: [PATCH 30/61] Pin setuptools for Python 3.7 --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4d1fa08ec4..710f8a28c2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,6 +15,7 @@ requests==2.31.0; python_version >= '3.7' semver==2.13.0; python_version == '3.6' semver==3.0.1; python_version >= '3.7' setuptools==59.6.0; python_version == '3.6' -setuptools==68.1.2; python_version >= '3.7' +setuptools==68.0.0; python_version == '3.7' +setuptools==68.1.2; python_version >= '3.8' wcwidth==0.2.6 #+pygelf%pygelf==0.4.0 From 2fda5760d008fddae8985d6ac4f634b9b4a620d0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 3 Sep 2023 19:26:21 +0000 Subject: [PATCH 31/61] Bump lxml from 4.9.2 to 4.9.3 Bumps [lxml](https://github.com/lxml/lxml) from 4.9.2 to 4.9.3. - [Release notes](https://github.com/lxml/lxml/releases) - [Changelog](https://github.com/lxml/lxml/blob/master/CHANGES.txt) - [Commits](https://github.com/lxml/lxml/compare/lxml-4.9.2...lxml-4.9.3) --- updated-dependencies: - dependency-name: lxml dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 710f8a28c2..8e2bffa6b5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ archspec==0.2.1 argcomplete==3.1.1 importlib_metadata==4.0.1; python_version < '3.8' jsonschema==3.2.0 -lxml==4.9.2 +lxml==4.9.3 pytest==7.0.1 pytest-forked==1.4.0; python_version == '3.6' pytest-forked==1.6.0; python_version >= '3.7' From 225d6dd3b8cc9a38be46634e7778110948a1e9f8 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Wed, 6 Sep 2023 21:37:25 +0200 Subject: [PATCH 32/61] Allow pinning a hook as the last of its stage --- docs/regression_test_api.rst | 4 +-- reframe/core/builtins.py | 21 ++++++++++++---- reframe/core/hooks.py | 9 ++++--- reframe/core/pipeline.py | 30 +++++++++++++++++++---- unittests/test_meta.py | 8 +++--- unittests/test_pipeline.py | 47 ++++++++++++++++++++++++++++++++++++ 6 files changed, 99 insertions(+), 20 deletions(-) diff --git a/docs/regression_test_api.rst b/docs/regression_test_api.rst index a42dcd621b..dc5695eb0d 100644 --- a/docs/regression_test_api.rst +++ b/docs/regression_test_api.rst @@ -67,9 +67,9 @@ The use of this module is required only when creating new tests programmatically .. autodecorator:: reframe.core.builtins.require_deps -.. autodecorator:: reframe.core.builtins.run_after(stage) +.. autodecorator:: reframe.core.builtins.run_after(stage, *, always_last=False) -.. autodecorator:: reframe.core.builtins.run_before(stage) +.. autodecorator:: reframe.core.builtins.run_before(stage, *, always_last=False) .. autodecorator:: reframe.core.builtins.sanity_function diff --git a/reframe/core/builtins.py b/reframe/core/builtins.py index 8d4eb5992a..6f32c3a7f4 100644 --- a/reframe/core/builtins.py +++ b/reframe/core/builtins.py @@ -37,7 +37,7 @@ def final(fn): # Hook-related builtins -def run_before(stage): +def run_before(stage, *, always_last=False): '''Attach the decorated function before a certain pipeline stage. The function will run just before the specified pipeline stage and it @@ -47,14 +47,25 @@ def run_before(stage): :param stage: The pipeline stage where this function will be attached to. See :ref:`pipeline-hooks` for the list of valid stage values. + + :param always_last: Run this hook always as the last one of the stage. In + a whole test hierarchy, only a single hook can be explicitly pinned at + the end of the same-stage sequence of hooks. If another hook is + declared as ``always_last`` in the same stage, an error will be + issued. + + .. versionchanged:: 4.4 + The ``always_last`` argument was added. + ''' - return hooks.attach_to('pre_' + stage) + + return hooks.attach_to('pre_' + stage, always_last) -def run_after(stage): +def run_after(stage, *, always_last=False): '''Attach the decorated function after a certain pipeline stage. - This is analogous to :func:`~RegressionMixin.run_before`, except that the + This is analogous to :func:`run_before`, except that the hook will execute right after the stage it was attached to. This decorator also supports ``'init'`` as a valid ``stage`` argument, where in this case, the hook will execute right after the test is initialized (i.e. @@ -81,7 +92,7 @@ def __init__(self): Add support for post-init hooks. ''' - return hooks.attach_to('post_' + stage) + return hooks.attach_to('post_' + stage, always_last) require_deps = hooks.require_deps diff --git a/reframe/core/hooks.py b/reframe/core/hooks.py index 4fde89322e..74fab55a5b 100644 --- a/reframe/core/hooks.py +++ b/reframe/core/hooks.py @@ -9,16 +9,16 @@ import reframe.utility as util -def attach_to(phase): +def attach_to(phase, always_last): '''Backend function to attach a hook to a given phase. :meta private: ''' def deco(func): if hasattr(func, '_rfm_attach'): - func._rfm_attach.append(phase) + func._rfm_attach.append((phase, always_last)) else: - func._rfm_attach = [phase] + func._rfm_attach = [(phase, always_last)] try: # no need to resolve dependencies independently; this function is @@ -124,6 +124,7 @@ def __init__(self, fn): @property def stages(self): return self._rfm_attach + # return [stage for stage, _ in self._rfm_attach] def __getattr__(self, attr): return getattr(self.__fn, attr) @@ -179,7 +180,7 @@ def add(self, v): self.__hooks.discard(h) self.__hooks.add(h) elif hasattr(v, '_rfm_resolve_deps'): - v._rfm_attach = ['post_setup'] + v._rfm_attach = [('post_setup', None)] self.__hooks.add(Hook(v)) def update(self, other, *, denied_hooks=None): diff --git a/reframe/core/pipeline.py b/reframe/core/pipeline.py index ee72e4a041..ac08e2fc78 100644 --- a/reframe/core/pipeline.py +++ b/reframe/core/pipeline.py @@ -179,12 +179,32 @@ def disable_hook(self, hook_name): @classmethod def pipeline_hooks(cls): ret = {} + last = {} for hook in cls._rfm_hook_registry: - for stage in hook.stages: - try: - ret[stage].append(hook.fn) - except KeyError: - ret[stage] = [hook.fn] + for stage, always_last in hook.stages: + if always_last: + if stage in last: + hook_name = hook.__qualname__ + pinned_name = last[stage].__qualname__ + raise ReframeSyntaxError( + f'cannot pin hook {hook_name!r} as last ' + f'of stage {stage!r} as {pinned_name!r} ' + f'is already pinned last' + ) + + last[stage] = hook + else: + try: + ret[stage].append(hook.fn) + except KeyError: + ret[stage] = [hook.fn] + + # Append the last hooks + for stage, hook in last.items(): + try: + ret[stage].append(hook.fn) + except KeyError: + ret[stage] = [hook.fn] return ret diff --git a/unittests/test_meta.py b/unittests/test_meta.py index 53c7095a51..060df0bfdf 100644 --- a/unittests/test_meta.py +++ b/unittests/test_meta.py @@ -189,7 +189,7 @@ class Foo(MyMeta): def hook_a(self): pass - @run_before('compile') + @run_before('compile', always_last=True) def hook_b(self): pass @@ -198,11 +198,11 @@ def hook_c(self): pass @classmethod - def hook_in_stage(cls, hook, stage): + def hook_in_stage(cls, hook, stage, always_last=False): '''Assert that a hook is in a given registry stage.''' for h in cls._rfm_hook_registry: if h.__name__ == hook: - if stage in h.stages: + if (stage, always_last) in h.stages: return True break @@ -210,7 +210,7 @@ def hook_in_stage(cls, hook, stage): return False assert Foo.hook_in_stage('hook_a', 'post_setup') - assert Foo.hook_in_stage('hook_b', 'pre_compile') + assert Foo.hook_in_stage('hook_b', 'pre_compile', True) assert Foo.hook_in_stage('hook_c', 'post_run') class Bar(Foo): diff --git a/unittests/test_pipeline.py b/unittests/test_pipeline.py index 0bb95574b9..cdaae8c8a9 100644 --- a/unittests/test_pipeline.py +++ b/unittests/test_pipeline.py @@ -1162,6 +1162,53 @@ def foo(self): assert test.pipeline_hooks() == {'post_setup': [MyTest.foo]} +def test_pinned_hooks(): + @test_util.custom_prefix('unittests/resources/checks') + class X(rfm.RunOnlyRegressionTest): + @run_before('run', always_last=True) + def foo(self): + pass + + class Y(X): + @run_before('run') + def bar(self): + pass + + test = Y() + assert test.pipeline_hooks() == {'pre_run': [Y.bar, X.foo]} + + +def test_pinned_hooks_multiple_last(): + @test_util.custom_prefix('unittests/resources/checks') + class X(rfm.RunOnlyRegressionTest): + @run_before('run', always_last=True) + def foo(self): + pass + + class Y(X): + @run_before('run', always_last=True) + def bar(self): + pass + + with pytest.raises(ReframeSyntaxError): + test = Y() + + +def test_pinned_hooks_multiple_last_inherited(): + @test_util.custom_prefix('unittests/resources/checks') + class X(rfm.RunOnlyRegressionTest): + @run_before('run', always_last=True) + def foo(self): + pass + + @run_before('run', always_last=True) + def bar(self): + pass + + with pytest.raises(ReframeSyntaxError): + test = X() + + def test_disabled_hooks(HelloTest, local_exec_ctx): @test_util.custom_prefix('unittests/resources/checks') class BaseTest(HelloTest): From 36ba041152e5646e6007a83ca8c06eaf53407889 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Mon, 11 Sep 2023 22:12:22 +0200 Subject: [PATCH 33/61] Add another test hook to increase coverage --- unittests/test_pipeline.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/unittests/test_pipeline.py b/unittests/test_pipeline.py index cdaae8c8a9..56dfb457c2 100644 --- a/unittests/test_pipeline.py +++ b/unittests/test_pipeline.py @@ -1169,13 +1169,21 @@ class X(rfm.RunOnlyRegressionTest): def foo(self): pass + @run_after('sanity', always_last=True) + def fooX(self): + '''Check that a single `always_last` hook is registered + correctly.''' + class Y(X): @run_before('run') def bar(self): pass test = Y() - assert test.pipeline_hooks() == {'pre_run': [Y.bar, X.foo]} + assert test.pipeline_hooks() == { + 'pre_run': [Y.bar, X.foo], + 'post_sanity': [X.fooX] + } def test_pinned_hooks_multiple_last(): From eff04d2e78ecf857ac86af60df775b128ec2ad0a Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Mon, 11 Sep 2023 22:59:21 +0200 Subject: [PATCH 34/61] Pin version of flux scheduler in CI --- .github/workflows/test-flux.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-flux.yaml b/.github/workflows/test-flux.yaml index bf29b70362..e2a542d36c 100644 --- a/.github/workflows/test-flux.yaml +++ b/.github/workflows/test-flux.yaml @@ -10,7 +10,7 @@ jobs: strategy: fail-fast: false matrix: - container: ['fluxrm/flux-sched:focal'] + container: ['fluxrm/flux-sched:focal-v0.28.0'] container: image: ${{ matrix.container }} From b778a767c95d03afddb5b5ddc4ea28e4aa219ed7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= Date: Mon, 11 Sep 2023 18:44:10 +0100 Subject: [PATCH 35/61] Check exit status of PBS Pro jobs --- reframe/core/schedulers/pbs.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/reframe/core/schedulers/pbs.py b/reframe/core/schedulers/pbs.py index bb93c0d150..e6193f29a3 100644 --- a/reframe/core/schedulers/pbs.py +++ b/reframe/core/schedulers/pbs.py @@ -178,7 +178,9 @@ def _update_nodelist(self, job, nodespec): job._nodelist = [x.split('/')[0] for x in nodespec.split('+')] job._nodelist.sort() - def poll(self, *jobs): + # The second argument is to specialise some code paths to PBS Pro only, but + # not Torque. + def _poll(self, is_pbs_pro, *jobs): def output_ready(job): # We report a job as finished only when its stdout/stderr are # written back to the working directory @@ -209,6 +211,19 @@ def output_ready(job): if job.cancelled or output_ready(job): self.log(f'Assuming job {job.jobid} completed') job._completed = True + if is_pbs_pro: + # With PBS Pro we can obtain the exit status of the job, + # in case it actually failed. + extended_info = osext.run_command( + f'qstat -xf {job.jobid}' + ) + exit_status_match = re.search( + r'^ *Exit_status *= *(?P\d+)', + extended_info.stdout, + flags=re.MULTILINE, + ) + if exit_status_match: + job._exitcode = int(exit_status_match.group('exit_status')) return @@ -277,7 +292,13 @@ def output_ready(job): job._exception = JobError('maximum pending time exceeded', job.jobid) + def poll(self, *job): + self._poll(True, *job) + @register_scheduler('torque') class TorqueJobScheduler(PbsJobScheduler): TASKS_OPT = '-l nodes={num_nodes}:ppn={num_cpus_per_node}' + + def poll(self, *job): + self._poll(False, *job) From a4e8469dd6de8e1d543b7a7be1ff161458d49d93 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Tue, 12 Sep 2023 22:22:55 +0200 Subject: [PATCH 36/61] Bump patch level --- reframe/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reframe/__init__.py b/reframe/__init__.py index 4c60888681..b954307b0e 100644 --- a/reframe/__init__.py +++ b/reframe/__init__.py @@ -6,7 +6,7 @@ import os import sys -VERSION = '4.3.2' +VERSION = '4.3.3' INSTALL_PREFIX = os.path.normpath( os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) ) From c6d5eb4913e8c66c8b74b8ca9ca4af648c4508ca Mon Sep 17 00:00:00 2001 From: Victor Holanda Date: Thu, 14 Sep 2023 09:17:09 +0200 Subject: [PATCH 37/61] Fix the ssh_host_keys hpclibtest The test references the self.host_keys variable but it should be self.ssh_host_keys instead --- hpctestlib/system/ssh/host_keys.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/hpctestlib/system/ssh/host_keys.py b/hpctestlib/system/ssh/host_keys.py index 642e054731..798ff1b053 100644 --- a/hpctestlib/system/ssh/host_keys.py +++ b/hpctestlib/system/ssh/host_keys.py @@ -14,9 +14,8 @@ class ssh_host_keys_check(rfm.RunOnlyRegressionTest): '''SSH host keys age check - The ssh host keys should be renewed regularly. - In this case, we are checking against the - max_key_age variable + The test checks if the list of SSH keys has been updated recently. + In this case, we are checking against the max_key_age variable ''' #: Parameter list with all host keys to check @@ -45,18 +44,18 @@ class ssh_host_keys_check(rfm.RunOnlyRegressionTest): @run_after('init') def set_hosts_keys(self): - self.executable_opts += [self.host_keys] + self.executable_opts += [self.ssh_host_keys] @sanity_function def assert_file_age(self): current_time = time.time() skip_me = sn.extractall('No such file or directory', self.stderr) - self.skip_if(skip_me, msg=f'Skipping test because {self.host_keys}' + self.skip_if(skip_me, msg=f'Skipping test because {self.ssh_host_keys}' f' was not found') return sn.assert_lt(current_time - sn.extractsingle(r'\d+', self.stdout, 0, int), typ.Duration(self.max_key_age), - msg=f'File {self.host_keys} is older than ' + msg=f'File {self.ssh_host_keys} is older than ' f'{self.max_key_age}') From e9875decd55752a5d76ec37bd51fbe77505265d5 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Wed, 13 Sep 2023 23:57:56 +0200 Subject: [PATCH 38/61] Respect execution order of overriden hooks --- reframe/core/hooks.py | 54 ++++++++++++++++++++++++-------------- reframe/core/meta.py | 5 ++-- unittests/test_pipeline.py | 20 ++++++++++++++ 3 files changed, 56 insertions(+), 23 deletions(-) diff --git a/reframe/core/hooks.py b/reframe/core/hooks.py index 4fde89322e..ffa12a208e 100644 --- a/reframe/core/hooks.py +++ b/reframe/core/hooks.py @@ -9,13 +9,21 @@ import reframe.utility as util +def is_hook(func): + return hasattr(func, '_rfm_attach') + + +def is_dep_hook(func): + return hasattr(func, '_rfm_resolve_deps') + + def attach_to(phase): '''Backend function to attach a hook to a given phase. :meta private: ''' def deco(func): - if hasattr(func, '_rfm_attach'): + if is_hook(func): func._rfm_attach.append(phase) else: func._rfm_attach = [phase] @@ -118,7 +126,7 @@ class Hook: def __init__(self, fn): self.__fn = fn - if not hasattr(fn, '_rfm_attach'): + if not is_hook(fn): raise ValueError(f'{fn.__name__} is not a hook') @property @@ -152,7 +160,7 @@ class HookRegistry: '''Global hook registry.''' def __init__(self, hooks=None): - self.__hooks = util.OrderedSet() + self.__hooks = [] if hooks is not None: self.update(hooks) @@ -172,30 +180,36 @@ def add(self, v): of the pipeline where they must be attached. Dependencies will be resolved first in the post-setup phase if not assigned elsewhere. ''' - - if hasattr(v, '_rfm_attach'): + if is_hook(v): # Always override hooks with the same name h = Hook(v) - self.__hooks.discard(h) - self.__hooks.add(h) - elif hasattr(v, '_rfm_resolve_deps'): + try: + pos = self.__hooks.index(h) + except ValueError: + self.__hooks.append(h) + else: + self.__hooks[pos] = h + elif is_dep_hook(v): v._rfm_attach = ['post_setup'] - self.__hooks.add(Hook(v)) - - def update(self, other, *, denied_hooks=None): - '''Update the hook registry with the hooks from another hook registry. + self.__hooks.append(Hook(v)) - The optional ``denied_hooks`` argument takes a set of disallowed - hook names, preventing their inclusion into the current hook registry. - ''' + def update(self, other, *, forbidden_names=None): + '''Update the hook registry with the hooks from another hook + registry.''' assert isinstance(other, HookRegistry) - denied_hooks = denied_hooks or set() + forbidden_names = forbidden_names or {} for h in other: - if h.__name__ not in denied_hooks: - # Hooks in `other` override ours - self.__hooks.discard(h) - self.__hooks.add(h) + if (h.__name__ in forbidden_names and + not is_hook(forbidden_names[h.__name__])): + continue + + try: + pos = self.__hooks.index(h) + except ValueError: + self.__hooks.append(h) + else: + self.__hooks[pos] = h def __repr__(self): return repr(self.__hooks) diff --git a/reframe/core/meta.py b/reframe/core/meta.py index 9654a53114..c35b8f4c2e 100644 --- a/reframe/core/meta.py +++ b/reframe/core/meta.py @@ -348,9 +348,8 @@ def __init__(cls, name, bases, namespace, **kwargs): # parent classes in reverse MRO order for c in list(reversed(cls.mro()))[:-1]: if hasattr(c, '_rfm_local_hook_registry'): - cls._rfm_hook_registry.update( - c._rfm_local_hook_registry, denied_hooks=namespace - ) + cls._rfm_hook_registry.update(c._rfm_local_hook_registry, + forbidden_names=namespace) cls._rfm_hook_registry.update(cls._rfm_local_hook_registry) diff --git a/unittests/test_pipeline.py b/unittests/test_pipeline.py index 0bb95574b9..bbe1cc0cf8 100644 --- a/unittests/test_pipeline.py +++ b/unittests/test_pipeline.py @@ -1162,6 +1162,26 @@ def foo(self): assert test.pipeline_hooks() == {'post_setup': [MyTest.foo]} +def test_overriden_hook_exec_order(): + @test_util.custom_prefix('unittests/resources/checks') + class X(rfm.RunOnlyRegressionTest): + @run_before('run') + def foo(self): + pass + + @run_before('run') + def bar(self): + pass + + class Y(X): + @run_before('run') + def foo(self): + pass + + test = Y() + assert test.pipeline_hooks() == {'pre_run': [Y.foo, X.bar]} + + def test_disabled_hooks(HelloTest, local_exec_ctx): @test_util.custom_prefix('unittests/resources/checks') class BaseTest(HelloTest): From bbfae469b7ffb61938dc7eddbb3037a611b68f82 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Fri, 15 Sep 2023 20:44:18 +0200 Subject: [PATCH 39/61] Remove unused imports --- reframe/core/hooks.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/reframe/core/hooks.py b/reframe/core/hooks.py index ffa12a208e..832bc2ca80 100644 --- a/reframe/core/hooks.py +++ b/reframe/core/hooks.py @@ -6,8 +6,6 @@ import functools import inspect -import reframe.utility as util - def is_hook(func): return hasattr(func, '_rfm_attach') From b90e26a82ac804ff9dfe68ada1879d6d5969b7df Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Sat, 16 Sep 2023 23:15:57 +0200 Subject: [PATCH 40/61] Update `filelog` log handler docs --- docs/config_reference.rst | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/config_reference.rst b/docs/config_reference.rst index cbb9373032..4fe0619a16 100644 --- a/docs/config_reference.rst +++ b/docs/config_reference.rst @@ -1219,9 +1219,9 @@ The additional properties for the ``filelog`` handler are the following: {basedir}/ system1/ partition1/ - test_short_name.log + .log partition2/ - test_short_name.log + .log ... system2/ ... @@ -1241,6 +1241,12 @@ The additional properties for the ``filelog`` handler are the following: Examples of changes in the logged information are when the log record format changes or a new performance metric is added, deleted or has its name changed. This behavior guarantees that each log file is consistent and it will not break existing parsers. +.. versionchanged:: 4.3 + + In the generated log file, the name of the test class name is used instead of the test's short name (which included the test's hash). + This allows the results of different variants of a parameterized test to be stored in the same log file facilitating post-processing. + + The ``graylog`` log handler --------------------------- From 8c97dcb6683bf42f2e19adb0b6259b0b0517bcb9 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Sat, 16 Sep 2023 23:38:10 +0200 Subject: [PATCH 41/61] Allow JSON encoding of `ScopedDict` --- reframe/utility/__init__.py | 3 +++ unittests/test_utility.py | 12 +++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/reframe/utility/__init__.py b/reframe/utility/__init__.py index 12edb01e9a..a381abeac5 100644 --- a/reframe/utility/__init__.py +++ b/reframe/utility/__init__.py @@ -1275,6 +1275,9 @@ def __delitem__(self, key): def __missing__(self, key): raise KeyError(str(key)) + def __rfm_json_encode__(self): + return self.data + @functools.total_ordering class OrderedSet(collections.abc.MutableSet): diff --git a/unittests/test_utility.py b/unittests/test_utility.py index b89236cea8..224d4dc0a9 100644 --- a/unittests/test_utility.py +++ b/unittests/test_utility.py @@ -799,7 +799,6 @@ def test_scoped_dict_construction(): 'a': {'k1': 3, 'k2': 4}, 'b': {'k3': 5} } - namespace_dict = reframe.utility.ScopedDict() namespace_dict = reframe.utility.ScopedDict(d) # Change local dict and verify that the stored values are not affected @@ -1088,6 +1087,17 @@ def test_scoped_dict_update(): assert scoped_dict == scoped_dict_alt +def test_scoped_dict_json_enc(): + import json + + d = { + 'a': {'k1': 3, 'k2': 4}, + 'b': {'k3': 5} + } + ns_dict = reframe.utility.ScopedDict(d) + assert d == json.loads(jsonext.dumps(ns_dict)) + + def test_sequence_view(): l = util.SequenceView([1, 2, 2]) assert 1 == l[0] From d566100b4d1b07508fedf3fa3b0b2b1000851a0e Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Mon, 18 Sep 2023 19:38:35 +0200 Subject: [PATCH 42/61] Always remove temp dir when remote auto-detection fails --- reframe/frontend/autodetect.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/reframe/frontend/autodetect.py b/reframe/frontend/autodetect.py index d4c75674f9..5bca62a933 100644 --- a/reframe/frontend/autodetect.py +++ b/reframe/frontend/autodetect.py @@ -59,6 +59,9 @@ def __enter__(self): shutil.copy2(src, self._workdir) except FileNotFoundError: use_pip = True + except Exception as err: + osext.rmtree(self._workdir) + raise err return self._workdir, use_pip From 722bca584069cf4e84886300380c8b293daa333c Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Sat, 23 Sep 2023 00:07:00 +0200 Subject: [PATCH 43/61] Address PR comments --- reframe/core/schedulers/pbs.py | 42 +++++++++++++++++----------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/reframe/core/schedulers/pbs.py b/reframe/core/schedulers/pbs.py index e6193f29a3..4c357f8e1b 100644 --- a/reframe/core/schedulers/pbs.py +++ b/reframe/core/schedulers/pbs.py @@ -178,9 +178,21 @@ def _update_nodelist(self, job, nodespec): job._nodelist = [x.split('/')[0] for x in nodespec.split('+')] job._nodelist.sort() - # The second argument is to specialise some code paths to PBS Pro only, but - # not Torque. - def _poll(self, is_pbs_pro, *jobs): + def _query_exit_code(self, job): + '''Try to retrieve the exit code of a past job.''' + + # With PBS Pro we can obtain the exit status of a past job + extended_info = osext.run_command(f'qstat -xf {job.jobid}') + exit_status_match = re.search( + r'^ *Exit_status *= *(?P\d+)', extended_info.stdout, + flags=re.MULTILINE, + ) + if exit_status_match: + return int(exit_status_match.group('exit_status')) + + return None + + def poll(self, *jobs): def output_ready(job): # We report a job as finished only when its stdout/stderr are # written back to the working directory @@ -211,19 +223,7 @@ def output_ready(job): if job.cancelled or output_ready(job): self.log(f'Assuming job {job.jobid} completed') job._completed = True - if is_pbs_pro: - # With PBS Pro we can obtain the exit status of the job, - # in case it actually failed. - extended_info = osext.run_command( - f'qstat -xf {job.jobid}' - ) - exit_status_match = re.search( - r'^ *Exit_status *= *(?P\d+)', - extended_info.stdout, - flags=re.MULTILINE, - ) - if exit_status_match: - job._exitcode = int(exit_status_match.group('exit_status')) + job._exitcode = self._query_exit_code(job) return @@ -292,13 +292,13 @@ def output_ready(job): job._exception = JobError('maximum pending time exceeded', job.jobid) - def poll(self, *job): - self._poll(True, *job) - @register_scheduler('torque') class TorqueJobScheduler(PbsJobScheduler): TASKS_OPT = '-l nodes={num_nodes}:ppn={num_cpus_per_node}' - def poll(self, *job): - self._poll(False, *job) + def _query_exit_code(self, job): + '''Try to retrieve the exit code of a past job.''' + + # Torque does not provide a way to retrieve the history of jobs + return None From 2adc65074a3b82d48f3eb04d4e795b428169428e Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Mon, 25 Sep 2023 13:34:33 +0200 Subject: [PATCH 44/61] Add hpctestlib to the pypi package Signed-off-by: Theofilos Manitaras --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index ac7ef069f5..99019ad681 100644 --- a/setup.cfg +++ b/setup.cfg @@ -34,7 +34,7 @@ install_requires = semver [options.packages.find] -include = reframe,reframe.* +include = reframe,reframe.*,hpctestlib.* [options.package_data] reframe = schemas/* From 4f6fe9e1a779273dcf87257cc39fedcba4bdd71d Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Mon, 25 Sep 2023 22:46:22 +0200 Subject: [PATCH 45/61] Remove unused imports --- reframe/core/schedulers/ssh.py | 1 - 1 file changed, 1 deletion(-) diff --git a/reframe/core/schedulers/ssh.py b/reframe/core/schedulers/ssh.py index 41be9cfd04..5a789835ac 100644 --- a/reframe/core/schedulers/ssh.py +++ b/reframe/core/schedulers/ssh.py @@ -4,7 +4,6 @@ # SPDX-License-Identifier: BSD-3-Clause import os -import functools import time import reframe.utility.osext as osext From a70944a16bd9156df74b70a2b3b16aac6584f891 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Tue, 26 Sep 2023 00:02:27 +0200 Subject: [PATCH 46/61] Update docs --- docs/config_reference.rst | 9 +++++++-- reframe/core/schedulers/ssh.py | 2 +- reframe/schemas/config.json | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/docs/config_reference.rst b/docs/config_reference.rst index 2808244acc..39f925cbb0 100644 --- a/docs/config_reference.rst +++ b/docs/config_reference.rst @@ -273,7 +273,7 @@ System Partition Configuration This backend does not rely on job accounting to retrieve job statuses, but ReFrame does its best to query the job state as reliably as possible. - ``ssh``: Jobs will be launched on a remote host using SSH. - The remote host will be selected from the list of hosts specified in :attr:`~systems.partitions.sched_options.hosts`. + The remote host will be selected from the list of hosts specified in :attr:`~systems.partitions.sched_options.ssh_hosts`. The scheduler keeps track of the hosts that it has submitted jobs to, and it will select the next available one in a round-robin fashion. For connecting to a remote host, the options specified in :attr:`~systems.partitions.access` will be used. @@ -283,6 +283,11 @@ System Partition Configuration The same :attr:`~systems.partitions.access` options will be used in those operations as well. Please note, that the connection options of ``ssh`` and ``scp`` differ and ReFrame will not attempt to translate any options between the two utilities in case ``scp`` is selected for copying to the remote host. In this case, it is preferable to set up the host connection options in ``~/.ssh/config`` and leave :attr:`~systems.partition.access` blank. + + Job-scheduler command line options can be used to interact with the ``ssh`` backend. + More specifically, if the :option:`--distribute` option is used, a test will be generated for each host listed in :attr:`~systems.partitions.sched_options.ssh_hosts`. + You can also pin a test to a specific host if you pass the ``#host`` directive to the :option:`-J` option, e.g., ``-J '#host=myhost'``. + - ``torque``: Jobs will be launched using the `Torque `__ scheduler. .. versionadded:: 3.7.2 @@ -352,7 +357,7 @@ System Partition Configuration .. warning:: This option is broken in 4.0. -.. py:attribute:: systems.partitions.sched_options.hosts +.. py:attribute:: systems.partitions.sched_options.ssh_hosts :required: No :default: ``[]`` diff --git a/reframe/core/schedulers/ssh.py b/reframe/core/schedulers/ssh.py index 5a789835ac..3984b60a46 100644 --- a/reframe/core/schedulers/ssh.py +++ b/reframe/core/schedulers/ssh.py @@ -43,7 +43,7 @@ def ssh_options(self): @register_scheduler('ssh') class SSHJobScheduler(JobScheduler): def __init__(self): - self._free_hosts = set(self.get_option('hosts')) + self._free_hosts = set(self.get_option('ssh_hosts')) self._allocated_hosts = set() if not self._free_hosts: raise ConfigError(f'no hosts specified for the SSH scheduler: ' diff --git a/reframe/schemas/config.json b/reframe/schemas/config.json index ee71272deb..0cc8ff1ffa 100644 --- a/reframe/schemas/config.json +++ b/reframe/schemas/config.json @@ -619,7 +619,7 @@ "systems/partitions/time_limit": null, "systems/partitions/devices": [], "systems/partitions/extras": {}, - "systems/*/sched_options/hosts": [], + "systems/*/sched_options/ssh_hosts": [], "systems*/sched_options/ignore_reqnodenotavail": false, "systems*/sched_options/job_submit_timeout": 60, "systems*/sched_options/resubmit_on_errors": [], From fe144af7e470357be441510addbdb876f3380a37 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Thu, 28 Sep 2023 22:46:58 +0200 Subject: [PATCH 47/61] Coding style fixes --- reframe/core/schedulers/__init__.py | 1 + reframe/core/schedulers/ssh.py | 3 ++- reframe/utility/osext.py | 1 - 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/reframe/core/schedulers/__init__.py b/reframe/core/schedulers/__init__.py index 781bb031bb..a5daee11f2 100644 --- a/reframe/core/schedulers/__init__.py +++ b/reframe/core/schedulers/__init__.py @@ -628,6 +628,7 @@ def in_state(self, state): :class:`False` otherwise. ''' + class AlwaysIdleNode(Node): def __init__(self, name): self._name = name diff --git a/reframe/core/schedulers/ssh.py b/reframe/core/schedulers/ssh.py index 3984b60a46..0e1dc9137b 100644 --- a/reframe/core/schedulers/ssh.py +++ b/reframe/core/schedulers/ssh.py @@ -78,7 +78,8 @@ def _push_artefacts(self, job): assert isinstance(job, _SSHJob) options = ' '.join(job.ssh_options) - # Create a temporary directory on the remote host and push the job artifacts + # Create a temporary directory on the remote host and push the job + # artifacts completed = osext.run_command( f'ssh -o BatchMode=yes {options} {job.host} ' f'mktemp -td rfm.XXXXXXXX', check=True diff --git a/reframe/utility/osext.py b/reframe/utility/osext.py index ccc2d90365..53d4065158 100644 --- a/reframe/utility/osext.py +++ b/reframe/utility/osext.py @@ -43,7 +43,6 @@ class _ProcFuture: :meta public: .. versionadded:: 4.4 - ''' def __init__(self, check=False, *args, **kwargs): From dea4751cffcb66cd009a133f6a73d3a6051e48a9 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Thu, 28 Sep 2023 23:42:38 +0200 Subject: [PATCH 48/61] Add unit tests --- reframe/core/schedulers/ssh.py | 4 ++-- reframe/schemas/config.json | 2 +- unittests/test_schedulers.py | 28 +++++++++++++++++++++++++--- 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/reframe/core/schedulers/ssh.py b/reframe/core/schedulers/ssh.py index 0e1dc9137b..03e35c6a15 100644 --- a/reframe/core/schedulers/ssh.py +++ b/reframe/core/schedulers/ssh.py @@ -42,8 +42,8 @@ def ssh_options(self): @register_scheduler('ssh') class SSHJobScheduler(JobScheduler): - def __init__(self): - self._free_hosts = set(self.get_option('ssh_hosts')) + def __init__(self, *, hosts=None): + self._free_hosts = set(hosts or self.get_option('ssh_hosts')) self._allocated_hosts = set() if not self._free_hosts: raise ConfigError(f'no hosts specified for the SSH scheduler: ' diff --git a/reframe/schemas/config.json b/reframe/schemas/config.json index 0cc8ff1ffa..70926f87cb 100644 --- a/reframe/schemas/config.json +++ b/reframe/schemas/config.json @@ -619,7 +619,7 @@ "systems/partitions/time_limit": null, "systems/partitions/devices": [], "systems/partitions/extras": {}, - "systems/*/sched_options/ssh_hosts": [], + "systems*/sched_options/ssh_hosts": [], "systems*/sched_options/ignore_reqnodenotavail": false, "systems*/sched_options/job_submit_timeout": 60, "systems*/sched_options/resubmit_on_errors": [], diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index 797fecd2b2..c38914c972 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -26,8 +26,8 @@ def launcher(): return getlauncher('local') -@pytest.fixture(params=['flux', 'local', 'lsf', 'oar', - 'pbs', 'sge', 'slurm', 'squeue', 'torque']) +@pytest.fixture(params=['flux', 'local', 'lsf', 'oar', 'pbs', + 'sge', 'slurm', 'ssh', 'squeue', 'torque']) def scheduler(request): try: return getscheduler(request.param) @@ -73,7 +73,13 @@ def exec_ctx(make_exec_ctx, scheduler): @pytest.fixture def make_job(scheduler, launcher, tmp_path): def _make_job(sched_opts=None, **jobargs): - sched = scheduler(**sched_opts) if sched_opts else scheduler() + if sched_opts: + sched = scheduler(**sched_opts) + elif scheduler.registered_name == 'ssh': + sched = scheduler(hosts=['localhost']) + else: + sched = scheduler() + return Job.create( sched, launcher(), name='testjob', @@ -361,6 +367,18 @@ def _expected_local_directives_no_tasks(job): return set() +def _expected_ssh_directives(job): + return set() + + +def _expected_ssh_directives_minimal(job): + return set() + + +def _expected_ssh_directives_no_tasks(job): + return set() + + def test_prepare(fake_job): sched_name = fake_job.scheduler.registered_name if sched_name == 'pbs': @@ -649,6 +667,10 @@ def test_guess_num_tasks(minimal_job, scheduler): # of the default partition through the use of `scontrol show` minimal_job.scheduler._get_default_partition = lambda: 'pdef' assert minimal_job.guess_num_tasks() == 0 + elif scheduler.registered_name == 'ssh': + minimal_job.num_tasks = 0 + minimal_job._sched_flex_alloc_nodes = 'all' + assert minimal_job.guess_num_tasks() == 1 else: with pytest.raises(NotImplementedError): minimal_job.guess_num_tasks() From 716f8517cb5027aa2ae80d67bb068cf0fd819927 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Thu, 28 Sep 2023 23:44:50 +0200 Subject: [PATCH 49/61] Coding style fixes --- reframe/utility/osext.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/reframe/utility/osext.py b/reframe/utility/osext.py index 53d4065158..c6cd32f553 100644 --- a/reframe/utility/osext.py +++ b/reframe/utility/osext.py @@ -150,7 +150,8 @@ def then(self, future, when=None): ''' if when is None: - def when(fut): return True + def when(fut): + return True if not util.is_trivially_callable(when, non_def_args=1): raise ValueError("the 'when' function must " From a6cdc013f4d39d535bf2e2200acb10bb532a0e65 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Fri, 29 Sep 2023 22:43:03 +0200 Subject: [PATCH 50/61] Treat properly cases when `rsync` is not available --- reframe/core/schedulers/ssh.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reframe/core/schedulers/ssh.py b/reframe/core/schedulers/ssh.py index 03e35c6a15..e1d7727d40 100644 --- a/reframe/core/schedulers/ssh.py +++ b/reframe/core/schedulers/ssh.py @@ -52,7 +52,7 @@ def __init__(self, *, hosts=None): # Determine if rsync is available try: osext.run_command('rsync --version', check=True) - except SpawnedProcessError: + except (FileNotFoundError, SpawnedProcessError): self._has_rsync = False else: self._has_rsync = True From 9f22dc89397f740c52c193a36967638069b3004d Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Sat, 30 Sep 2023 00:42:24 +0200 Subject: [PATCH 51/61] Bump patch level --- reframe/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reframe/__init__.py b/reframe/__init__.py index b954307b0e..3332054761 100644 --- a/reframe/__init__.py +++ b/reframe/__init__.py @@ -6,7 +6,7 @@ import os import sys -VERSION = '4.3.3' +VERSION = '4.3.4' INSTALL_PREFIX = os.path.normpath( os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) ) From 720dc4636bcdb8c6c464011ba70234b469086884 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 1 Oct 2023 15:57:11 +0000 Subject: [PATCH 52/61] Bump setuptools from 59.6.0 to 68.2.2 Bumps [setuptools](https://github.com/pypa/setuptools) from 59.6.0 to 68.2.2. - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v59.6.0...v68.2.2) --- updated-dependencies: - dependency-name: setuptools dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8e2bffa6b5..dac23b17e7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,6 +16,6 @@ semver==2.13.0; python_version == '3.6' semver==3.0.1; python_version >= '3.7' setuptools==59.6.0; python_version == '3.6' setuptools==68.0.0; python_version == '3.7' -setuptools==68.1.2; python_version >= '3.8' +setuptools==68.2.2; python_version >= '3.8' wcwidth==0.2.6 #+pygelf%pygelf==0.4.0 From cccaa1a33125a2eb880ced74ca633e2e0316cacb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 1 Oct 2023 22:06:12 +0000 Subject: [PATCH 53/61] Bump wcwidth from 0.2.6 to 0.2.8 Bumps [wcwidth](https://github.com/jquast/wcwidth) from 0.2.6 to 0.2.8. - [Release notes](https://github.com/jquast/wcwidth/releases) - [Commits](https://github.com/jquast/wcwidth/compare/0.2.6...0.2.8) --- updated-dependencies: - dependency-name: wcwidth dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index dac23b17e7..97a8c221dc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,5 +17,5 @@ semver==3.0.1; python_version >= '3.7' setuptools==59.6.0; python_version == '3.6' setuptools==68.0.0; python_version == '3.7' setuptools==68.2.2; python_version >= '3.8' -wcwidth==0.2.6 +wcwidth==0.2.8 #+pygelf%pygelf==0.4.0 From 5d41cfa4e595c277f29e310eaf6cabd812a9913b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Oct 2023 07:35:38 +0000 Subject: [PATCH 54/61] Bump argcomplete from 3.1.1 to 3.1.2 Bumps [argcomplete](https://github.com/kislyuk/argcomplete) from 3.1.1 to 3.1.2. - [Release notes](https://github.com/kislyuk/argcomplete/releases) - [Changelog](https://github.com/kislyuk/argcomplete/blob/develop/Changes.rst) - [Commits](https://github.com/kislyuk/argcomplete/compare/v3.1.1...v3.1.2) --- updated-dependencies: - dependency-name: argcomplete dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 97a8c221dc..3aaa6d9842 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ archspec==0.2.1 -argcomplete==3.1.1 +argcomplete==3.1.2 importlib_metadata==4.0.1; python_version < '3.8' jsonschema==3.2.0 lxml==4.9.3 From cc07778446eab7ad2dad15d9e7a8eec09be89ee5 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Thu, 21 Sep 2023 23:00:27 +0200 Subject: [PATCH 55/61] Filter tests using arbitrary expressions --- docs/manpage.rst | 21 +++++++++++++++++++++ reframe/frontend/cli.py | 18 ++++++++++++++++++ reframe/frontend/filters.py | 17 ++++++++++------- unittests/test_filters.py | 28 ++++++++++++++++++++++++++++ 4 files changed, 77 insertions(+), 7 deletions(-) diff --git a/docs/manpage.rst b/docs/manpage.rst index ee2348cdb7..3b4585389d 100644 --- a/docs/manpage.rst +++ b/docs/manpage.rst @@ -68,6 +68,21 @@ This happens recursively so that if test ``T1`` depends on ``T2`` and ``T2`` dep The value of this attribute is not required to be non-zero for GPU tests. Tests may or may not make use of it. + .. deprecated:: 4.4 + + Please use ``-E 'not num_gpus_per_node'`` instead. + +.. option:: -E, --filter-expr=EXPR + + Select only tests that satisfy the given expression. + + The expression ``EXPR`` can be any valid Python expression on the test variables or parameters. + For example, ``-E num_tasks > 10`` will select all tests, whose :attr:`~reframe.core.pipeline.RegressionTest.num_tasks` exceeds ``10``. + You may use any test variable in expression, even user-defined. + Multiple variables can also be included such as ``-E num_tasks >= my_param``, where ``my_param`` is user-defined parameter. + + .. versionadded:: 4.4 + .. option:: --failed Select only the failed test cases for a previous run. @@ -77,6 +92,7 @@ This happens recursively so that if test ``T1`` depends on ``T2`` and ``T2`` dep .. versionadded:: 3.4 + .. option:: --gpu-only Select tests that can run on GPUs. @@ -84,6 +100,10 @@ This happens recursively so that if test ``T1`` depends on ``T2`` and ``T2`` dep These are all tests with :attr:`num_gpus_per_node` greater than zero. This option and :option:`--cpu-only` are mutually exclusive. + .. deprecated:: 4.4 + + Please use ``-E num_gpus_per_node`` instead. + .. option:: --maintainer=MAINTAINER Filter tests by maintainer. @@ -101,6 +121,7 @@ This happens recursively so that if test ``T1`` depends on ``T2`` and ``T2`` dep The ``MAINTAINER`` pattern is matched anywhere in the maintainer's name and not at its beginning. If you want to match at the beginning of the name, you should prepend ``^``. + .. option:: -n, --name=NAME Filter tests by name. diff --git a/reframe/frontend/cli.py b/reframe/frontend/cli.py index 7b4ea8260f..8c59a68019 100644 --- a/reframe/frontend/cli.py +++ b/reframe/frontend/cli.py @@ -356,6 +356,10 @@ def main(): metavar='PATTERN', default=[], help='Exclude checks whose name matches PATTERN' ) + select_options.add_argument( + '-E', '--filter-expr', action='store', metavar='EXPR', + help='Select checks that satisfy the expression EXPR' + ) # Action options action_options.add_argument( @@ -1048,6 +1052,16 @@ def print_infoline(param, value): f'Filtering test cases(s) by tags: {len(testcases)} remaining' ) + if options.filter_expr: + testcases = filter(filters.validates(options.filter_expr), + testcases) + + testcases = list(testcases) + printer.verbose( + f'Filtering test cases(s) by {options.filter_expr}: ' + f'{len(testcases)} remaining' + ) + # Filter test cases by maintainers for maint in options.maintainers: testcases = filter(filters.have_maintainer(maint), testcases) @@ -1059,8 +1073,12 @@ def print_infoline(param, value): sys.exit(1) if options.gpu_only: + printer.warning('the `--gpu-only` option is deprecated; ' + 'please use `-E num_gpus_per_node` instead') testcases = filter(filters.have_gpu_only(), testcases) elif options.cpu_only: + printer.warning('the `--cpu-only` option is deprecated; ' + 'please use `-E "not num_gpus_per_node"` instead') testcases = filter(filters.have_cpu_only(), testcases) testcases = list(testcases) diff --git a/reframe/frontend/filters.py b/reframe/frontend/filters.py index f6e80741cf..60ae9c859e 100644 --- a/reframe/frontend/filters.py +++ b/reframe/frontend/filters.py @@ -4,6 +4,7 @@ # SPDX-License-Identifier: BSD-3-Clause import re +from collections import namedtuple from reframe.core.exceptions import ReframeError from reframe.core.runtime import runtime @@ -109,16 +110,18 @@ def _fn(case): def have_gpu_only(): - def _fn(case): - # NOTE: This takes into account num_gpus_per_node being None - return case.check.num_gpus_per_node - - return _fn + return validates('num_gpus_per_node') def have_cpu_only(): + return validates('not num_gpus_per_node') + + +def validates(expr): def _fn(case): - # NOTE: This takes into account num_gpus_per_node being None - return not case.check.num_gpus_per_node + try: + return eval(expr, None, case.check.__dict__) + except Exception as err: + raise ReframeError(f'invalid expression `{expr}`') from err return _fn diff --git a/unittests/test_filters.py b/unittests/test_filters.py index 1a06c2f8e7..30e91b2382 100644 --- a/unittests/test_filters.py +++ b/unittests/test_filters.py @@ -11,6 +11,7 @@ import reframe.frontend.filters as filters import reframe.utility.sanity as sn import unittests.utility as test_util +from reframe.core.exceptions import ReframeError def count_checks(filter_fn, checks): @@ -140,3 +141,30 @@ def test_invalid_regex(sample_cases): with pytest.raises(errors.ReframeError): count_checks(filters.have_tag('*foo'), sample_cases).evaluate() + + +def test_validates_expr(sample_cases, sample_param_cases): + validates = filters.validates + assert count_checks(validates('"a" in tags'), sample_cases) == 2 + assert count_checks(validates('num_gpus_per_node == 1'), sample_cases) == 2 + assert count_checks(validates('p > 5'), sample_param_cases) == 5 + assert count_checks(validates('p > 5 or p < 1'), sample_param_cases) == 6 + assert count_checks(validates('num_tasks in tags'), sample_cases) == 0 + + +def test_validates_expr_invalid(sample_cases): + validates = filters.validates + + # undefined variables + with pytest.raises(ReframeError): + assert count_checks(validates('foo == 3'), sample_cases) + + # invalid syntax + with pytest.raises(ReframeError): + assert count_checks(validates('num_tasks = 2'), sample_cases) + + with pytest.raises(ReframeError): + assert count_checks(validates('import os'), sample_cases) + + with pytest.raises(ReframeError): + assert count_checks(validates('"foo" i tags'), sample_cases) From 238edee780ba4f4b4c6066ee23c260fe048c3992 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Tue, 3 Oct 2023 22:26:25 +0200 Subject: [PATCH 56/61] Remove unused imports --- reframe/frontend/filters.py | 1 - 1 file changed, 1 deletion(-) diff --git a/reframe/frontend/filters.py b/reframe/frontend/filters.py index 60ae9c859e..2bcc105209 100644 --- a/reframe/frontend/filters.py +++ b/reframe/frontend/filters.py @@ -4,7 +4,6 @@ # SPDX-License-Identifier: BSD-3-Clause import re -from collections import namedtuple from reframe.core.exceptions import ReframeError from reframe.core.runtime import runtime From 8ac037097e1cfed6ababfcf3fbff09ebb96132bd Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Tue, 3 Oct 2023 22:47:07 +0200 Subject: [PATCH 57/61] Add CLI unit tests --- unittests/test_cli.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/unittests/test_cli.py b/unittests/test_cli.py index 14ba5fc540..d5d8f148e5 100644 --- a/unittests/test_cli.py +++ b/unittests/test_cli.py @@ -698,6 +698,42 @@ def test_filtering_exclude_hash(run_reframe): assert returncode == 0 +def test_filtering_cpu_only(run_reframe): + returncode, stdout, stderr = run_reframe( + checkpath=['unittests/resources/checks/hellocheck.py'], + action='list', + more_options=['--cpu-only'] + ) + assert 'Traceback' not in stdout + assert 'Traceback' not in stderr + assert 'Found 2 check(s)' in stdout + assert returncode == 0 + + +def test_filtering_gpu_only(run_reframe): + returncode, stdout, stderr = run_reframe( + checkpath=['unittests/resources/checks/hellocheck.py'], + action='list', + more_options=['--gpu-only'] + ) + assert 'Traceback' not in stdout + assert 'Traceback' not in stderr + assert 'Found 0 check(s)' in stdout + assert returncode == 0 + + +def test_filtering_by_expr(run_reframe): + returncode, stdout, stderr = run_reframe( + checkpath=['unittests/resources/checks/hellocheck.py'], + action='list', + more_options=['-E num_tasks==1'] + ) + assert 'Traceback' not in stdout + assert 'Traceback' not in stderr + assert 'Found 2 check(s)' in stdout + assert returncode == 0 + + def test_show_config_all(run_reframe): # Just make sure that this option does not make the frontend crash returncode, stdout, stderr = run_reframe( From 0d60cc089fcaadd28ff69dbaeaca521cebe1720e Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Fri, 6 Oct 2023 19:39:28 +0300 Subject: [PATCH 58/61] Bump dev version --- reframe/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reframe/__init__.py b/reframe/__init__.py index 171cdbbcf9..d13291f0ae 100644 --- a/reframe/__init__.py +++ b/reframe/__init__.py @@ -6,7 +6,7 @@ import os import sys -VERSION = '4.4.0-dev.4' +VERSION = '4.5.0-dev.0' INSTALL_PREFIX = os.path.normpath( os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) ) From e039eb74cf08e1e22b6ae7b714553d8f6e75b64e Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Tue, 17 Oct 2023 23:01:04 +0200 Subject: [PATCH 59/61] Validate docs everywhere --- .github/workflows/main.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 0ebd08eb5d..7948b70a99 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -140,12 +140,15 @@ jobs: docvalidation: runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v3 - - name: Setup up Python 3.8 + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: - python-version: 3.8 + python-version: ${{ matrix.python-version }} - name: Install Doc Requirements run: | python -m pip install -r docs/requirements.txt From 788f25e708f53181efe868e156ced9be26c41eda Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Tue, 17 Oct 2023 23:06:23 +0200 Subject: [PATCH 60/61] Validate docs in Python 3.6 --- .github/workflows/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 7948b70a99..39201daf71 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -44,9 +44,9 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - - name: Install dependencies + - name: Install dependencies and docs run: | - ./bootstrap.sh + ./bootstrap.sh +docs - name: Generic Unittests run: | ./test_reframe.py From 3fb7045f683e7f5d01e4a60bd41af274b2a2f960 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Sat, 21 Oct 2023 21:17:30 +0200 Subject: [PATCH 61/61] Update doc dependencies --- docs/requirements.txt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 7a49945ba2..6d63d93f98 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,7 +1,9 @@ archspec==0.2.1 -docutils==0.17.1 # https://github.com/sphinx-doc/sphinx/issues/9001 +docutils==0.18.1 jsonschema==3.2.0 semver==2.13.0; python_version == '3.6' semver==3.0.1; python_version >= '3.7' -Sphinx==5.3.0 -sphinx-rtd-theme==1.2.2 +Sphinx==5.3.0; python_version < '3.8' +Sphinx==7.1.2; python_version == '3.8' +Sphinx==7.2.6; python_version >= '3.9' +sphinx-rtd-theme==1.3.0