diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 54abae68..0ce7a9b8 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -1,5 +1,5 @@ -.. Created by changelog.py at 2024-12-18, command - '/Users/giffler/.cache/pre-commit/repoecmh3ah8/py_env-python3.12/bin/changelog docs/source/changes compile --categories Added Changed Fixed Security Deprecated --output=docs/source/changelog.rst' +.. Created by changelog.py at 2025-02-04, command + '/Users/giffler/.cache/pre-commit/repoecmh3ah8/py_env-python3.13/bin/changelog docs/source/changes compile --categories Added Changed Fixed Security Deprecated --output=docs/source/changelog.rst' based on the format of 'https://keepachangelog.com/' ######### diff --git a/tardis/adapters/sites/htcondor.py b/tardis/adapters/sites/htcondor.py index d6552e30..5b64ec82 100644 --- a/tardis/adapters/sites/htcondor.py +++ b/tardis/adapters/sites/htcondor.py @@ -176,12 +176,14 @@ async def _condor_tool( except CommandExecutionFailure as cef: # the tool fails if none of the jobs are found – because they all just shut down # report graceful failure for all - if cef.exit_code == 1 and "not found" in cef.stderr: + handle_error_msgs = ("not found", "not running to be") + if cef.exit_code == 1 and any(msg in cef.stderr for msg in handle_error_msgs): return [False] * len(resource_attributes) raise # successes are in stdout, failures in stderr, both in argument order # stdout: Job 15540.0 marked for removal # stderr: Job 15612.0 not found + # stderr: Job 15611.0 not running to be suspended # stderr: Job 15535.0 marked for removal success_jobs = { TOOL_ID_PATTERN.search(line).group(1) diff --git a/tests/adapters_t/sites_t/test_htcondorsiteadapter.py b/tests/adapters_t/sites_t/test_htcondorsiteadapter.py index d44cf59f..07709881 100644 --- a/tests/adapters_t/sites_t/test_htcondorsiteadapter.py +++ b/tests/adapters_t/sites_t/test_htcondorsiteadapter.py @@ -50,7 +50,10 @@ CONDOR_RM_FAILED_MESSAGE = "Run command condor_rm 1351043.0 via ShellExecutor failed" CONDOR_SUSPEND_OUTPUT = """Job 1351043.0 suspended""" -CONDOR_SUSPEND_FAILED_OUTPUT = """Job 1351043.0 not found""" +CONDOR_SUSPEND_FAILED_OUTPUT_NOT_FOUND = """Job 1351043.0 not found""" +CONDOR_SUSPEND_FAILED_OUTPUT_NOT_RUNNING = ( + """Job 1351043.0 not running to be suspended""" +) CONDOR_SUSPEND_FAILED_MESSAGE = """Run command condor_suspend 1351043 via ShellExecutor failed""" @@ -379,12 +382,29 @@ def test_stop_resource(self): raise_exception=CommandExecutionFailure( message=CONDOR_SUSPEND_FAILED_MESSAGE, exit_code=1, - stderr=CONDOR_SUSPEND_FAILED_OUTPUT, + stderr=CONDOR_SUSPEND_FAILED_OUTPUT_NOT_FOUND, stdout="", stdin="", ), ) - def test_stop_resource_failed_redo(self): + def test_stop_resource_failed_redo_not_found(self): + with self.assertRaises(TardisResourceStatusUpdateFailed): + run_async( + self.adapter.stop_resource, + AttributeDict(remote_resource_uuid="1351043.0"), + ) + + @mock_executor_run_command( + stdout="", + raise_exception=CommandExecutionFailure( + message=CONDOR_SUSPEND_FAILED_MESSAGE, + exit_code=1, + stderr=CONDOR_SUSPEND_FAILED_OUTPUT_NOT_RUNNING, + stdout="", + stdin="", + ), + ) + def test_stop_resource_failed_redo_not_running(self): with self.assertRaises(TardisResourceStatusUpdateFailed): run_async( self.adapter.stop_resource, @@ -396,7 +416,7 @@ def test_stop_resource_failed_redo(self): raise_exception=CommandExecutionFailure( message=CONDOR_SUSPEND_FAILED_MESSAGE, exit_code=2, - stderr=CONDOR_SUSPEND_FAILED_OUTPUT, + stderr=CONDOR_SUSPEND_FAILED_OUTPUT_NOT_FOUND, stdout="", stdin="", ),