diff --git a/docs/changelog.md b/docs/changelog.md
index 508613fab..bd25a544c 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -1,5 +1,9 @@
 # Changelog
 
+### 2024-12-05
+
+- Bugfix to no longer stop the reconciliation when a runner's health check fails.
+
 ### 2024-12-04
 
 - Clean up corresponding OpenStack runner resources when a unit of the charm is removed.
diff --git a/github-runner-manager/src-docs/errors.md b/github-runner-manager/src-docs/errors.md
index ec864dc62..941b39cd3 100644
--- a/github-runner-manager/src-docs/errors.md
+++ b/github-runner-manager/src-docs/errors.md
@@ -260,3 +260,14 @@ Base class for all reconcile errors.
 
 
 
+---
+
+<a href="../../github-runner-manager/src/github_runner_manager/errors.py#L100"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+
+## <kbd>class</kbd> `OpenstackHealthCheckError`
+Base class for all health check errors. 
+
+
+
+
+
diff --git a/github-runner-manager/src-docs/manager.cloud_runner_manager.md b/github-runner-manager/src-docs/manager.cloud_runner_manager.md
index 4fad301b9..14ab97e8b 100644
--- a/github-runner-manager/src-docs/manager.cloud_runner_manager.md
+++ b/github-runner-manager/src-docs/manager.cloud_runner_manager.md
@@ -28,7 +28,7 @@ Health state of the runners.
 
 ---
 
-<a href="../../github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py#L39"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../../github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py#L53"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ## <kbd>class</kbd> `CloudRunnerState`
 Represent state of the instance hosting the runner. 
@@ -51,7 +51,7 @@ Represent state of the instance hosting the runner.
 
 ---
 
-<a href="../../github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py#L97"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../../github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py#L111"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ## <kbd>class</kbd> `CloudInitStatus`
 Represents the state of cloud-init script. 
@@ -77,7 +77,7 @@ Refer to the official documentation on cloud-init status: https://cloudinit.read
 
 ---
 
-<a href="../../github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py#L124"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../../github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py#L138"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ## <kbd>class</kbd> `GitHubRunnerConfig`
 Configuration for GitHub runner spawned. 
@@ -107,7 +107,7 @@ __init__(github_path: GitHubOrg | GitHubRepo, labels: list[str]) → None
 
 ---
 
-<a href="../../github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py#L137"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../../github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py#L151"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ## <kbd>class</kbd> `SupportServiceConfig`
 Configuration for supporting services for runners. 
@@ -144,7 +144,7 @@ __init__(
 
 ---
 
-<a href="../../github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py#L154"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../../github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py#L168"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ## <kbd>class</kbd> `CloudRunnerInstance`
 Information on the runner on the cloud. 
@@ -181,7 +181,7 @@ __init__(
 
 ---
 
-<a href="../../github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py#L171"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../../github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py#L185"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ## <kbd>class</kbd> `CloudRunnerManager`
 Manage runner instance on cloud. 
@@ -203,7 +203,7 @@ Get the name prefix of the self-hosted runners.
 
 ---
 
-<a href="../../github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py#L227"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../../github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py#L241"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>method</kbd> `cleanup`
 
@@ -223,7 +223,7 @@ Perform health check on runner and delete the runner if it fails.
 
 ---
 
-<a href="../../github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py#L183"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../../github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py#L197"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>method</kbd> `create_runner`
 
@@ -241,7 +241,7 @@ Create a self-hosted runner.
 
 ---
 
-<a href="../../github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py#L208"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../../github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py#L222"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>method</kbd> `delete_runner`
 
@@ -260,7 +260,7 @@ Delete self-hosted runner.
 
 ---
 
-<a href="../../github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py#L217"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../../github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py#L231"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>method</kbd> `flush_runners`
 
@@ -279,7 +279,7 @@ Stop all runners.
 
 ---
 
-<a href="../../github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py#L191"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../../github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py#L205"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>method</kbd> `get_runner`
 
@@ -297,7 +297,7 @@ Get a self-hosted runner by instance id.
 
 ---
 
-<a href="../../github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py#L199"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../../github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py#L213"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>method</kbd> `get_runners`
 
diff --git a/github-runner-manager/src/github_runner_manager/errors.py b/github-runner-manager/src/github_runner_manager/errors.py
index d358ea1d6..3aa229df9 100644
--- a/github-runner-manager/src/github_runner_manager/errors.py
+++ b/github-runner-manager/src/github_runner_manager/errors.py
@@ -95,3 +95,7 @@ class KeyfileError(SSHError):
 
 class ReconcileError(Exception):
     """Base class for all reconcile errors."""
+
+
+class OpenstackHealthCheckError(Exception):
+    """Base class for all health check errors."""
diff --git a/github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py b/github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py
index e9d25cd5d..a8d9eacb0 100644
--- a/github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py
+++ b/github-runner-manager/src/github_runner_manager/manager/cloud_runner_manager.py
@@ -35,6 +35,20 @@ class HealthState(Enum):
     UNHEALTHY = auto()
     UNKNOWN = auto()
 
+    @staticmethod
+    def from_value(health: bool | None) -> "HealthState":
+        """Create from a health value.
+
+        Args:
+            health: The health value as boolean or None.
+
+        Returns:
+            The health state.
+        """
+        if health is None:
+            return HealthState.UNKNOWN
+        return HealthState.HEALTHY if health else HealthState.UNHEALTHY
+
 
 class CloudRunnerState(str, Enum):
     """Represent state of the instance hosting the runner.
diff --git a/github-runner-manager/src/github_runner_manager/openstack_cloud/health_checks.py b/github-runner-manager/src/github_runner_manager/openstack_cloud/health_checks.py
index 754a48669..4a241dc51 100644
--- a/github-runner-manager/src/github_runner_manager/openstack_cloud/health_checks.py
+++ b/github-runner-manager/src/github_runner_manager/openstack_cloud/health_checks.py
@@ -9,7 +9,7 @@
 import invoke
 from fabric import Connection as SSHConnection
 
-from github_runner_manager.errors import KeyfileError, SSHError
+from github_runner_manager.errors import KeyfileError, OpenstackHealthCheckError, SSHError
 from github_runner_manager.manager.cloud_runner_manager import CloudInitStatus, CloudRunnerState
 from github_runner_manager.openstack_cloud.constants import (
     METRICS_EXCHANGE_PATH,
@@ -26,6 +26,10 @@
 _HealthCheckResult = bool | None  # None indicates that the check can not determine health status
 
 
+class _SSHError(Exception):
+    """Error on SSH command execution."""
+
+
 def check_runner(openstack_cloud: OpenstackCloud, instance: OpenstackInstance) -> bool:
     """Run a general health check on a runner instance.
 
@@ -47,9 +51,17 @@ def check_runner(openstack_cloud: OpenstackCloud, instance: OpenstackInstance) -
         logger.exception(
             "Health check failed due to unable to find keyfile for %s", instance.server_name
         )
+        # KeyfileError indicates that we'll never be able to ssh into the unit,
+        # so we mark it as unhealthy.
         return False
-    except SSHError:
-        logger.exception("SSH Failed on %s, marking as unhealthy.")
+    except _SSHError:
+        logger.exception(
+            "Unable to get SSH connection for instance %s, marking as unhealthy.",
+            instance.server_name,
+        )
+        # We assume that the failure to get the SSH connection is not transient, and mark
+        # the runner as unhealthy.
+        # It is debatable whether we should throw an exception here instead.
         return False
 
     return check_active_runner(ssh_conn, instance)
@@ -69,23 +81,31 @@ def check_active_runner(
             the flag, the health check would fail as it checks for running processes
             which would not be present in this case.
 
+    Raises:
+        OpenstackHealthCheckError: If the health check could not be completed.
+
     Returns:
         Whether the runner should be considered healthy.
     """
-    if (check_ok := _run_health_check_runner_installed(ssh_conn, instance)) is not None:
-        return check_ok
-
-    if (
-        check_ok := _run_health_check_cloud_init(
-            ssh_conn, instance.server_name, accept_finished_job
-        )
-    ) is not None:
-        return check_ok
+    try:
+        if (check_ok := _run_health_check_runner_installed(ssh_conn, instance)) is not None:
+            return check_ok
 
-    if (
-        check_ok := _run_health_check_runner_processes_running(ssh_conn, instance.server_name)
-    ) is not None:
-        return check_ok
+        if (
+            check_ok := _run_health_check_cloud_init(
+                ssh_conn, instance.server_name, accept_finished_job
+            )
+        ) is not None:
+            return check_ok
+
+        if (
+            check_ok := _run_health_check_runner_processes_running(ssh_conn, instance.server_name)
+        ) is not None:
+            return check_ok
+    except _SSHError as exc:
+        raise OpenstackHealthCheckError(
+            "Health check execution failed due to SSH command failure."
+        ) from exc
 
     return True
 
@@ -101,7 +121,7 @@ def _get_ssh_connection(
         instance: The OpenStack instance to conduit the health check.
 
     Raises:
-        SSHError: Unable to get a SSH connection to the instance.
+        _SSHError: Unable to get a SSH connection to the instance.
 
     Returns:
         Whether the runner is healthy.
@@ -109,11 +129,8 @@ def _get_ssh_connection(
     try:
         ssh_conn = openstack_cloud.get_ssh_connection(instance)
 
-    except SSHError:
-        logger.exception(
-            "SSH connection failure with %s during health check", instance.server_name
-        )
-        raise
+    except SSHError as exc:
+        raise _SSHError(f"Unable to get SSH connection to {instance.server_name}") from exc
     return ssh_conn
 
 
@@ -170,7 +187,7 @@ def _run_health_check_cloud_init(
     Returns:
         Whether the cloud-init status indicates the run is healthy or None.
     """
-    result: invoke.runners.Result = ssh_conn.run("cloud-init status", warn=True, timeout=30)
+    result: invoke.runners.Result = _execute_ssh_command(ssh_conn, "cloud-init status")
     if not result.ok:
         logger.warning("cloud-init status command failed on %s: %s.", server_name, result.stderr)
         return False
@@ -206,8 +223,8 @@ def _run_health_check_runner_installed(
         If the run can be considered healthy depending on the existence of
         the runner-installed.timestamp.
     """
-    result = ssh_conn.run(
-        f"[ -f {METRICS_EXCHANGE_PATH}/runner-installed.timestamp ]", warn=True, timeout=30
+    result = _execute_ssh_command(
+        ssh_conn, f"[ -f {METRICS_EXCHANGE_PATH}/runner-installed.timestamp ]"
     )
     if not result.ok:
         logger.info(
@@ -239,7 +256,7 @@ def _run_health_check_runner_processes_running(
     Returns:
         If the run can be considered healthy depending on the existence of the processes.
     """
-    result = ssh_conn.run("ps aux", warn=True, timeout=30)
+    result = _execute_ssh_command(ssh_conn, "ps aux")
     if not result.ok:
         logger.warning("SSH run of `ps aux` failed on %s: %s", server_name, result.stderr)
         return False
@@ -247,3 +264,24 @@ def _run_health_check_runner_processes_running(
         logger.warning("Runner process not found on %s", server_name)
         return False
     return None
+
+
+def _execute_ssh_command(ssh_conn: SSHConnection, command: str) -> invoke.runners.Result:
+    """Run a command on the remote server.
+
+    Args:
+        ssh_conn: The SSH connection to the runner.
+        command: The command to run.
+
+    Returns:
+        The result of the command.
+
+    Raises:
+        _SSHError: If the command execution failed.
+    """
+    try:
+        return ssh_conn.run(command, warn=True, timeout=30)
+    except invoke.exceptions.CommandTimedOut as exc:
+        raise _SSHError(
+            f"SSH command execution timed out for command '{command}' on {ssh_conn.host}"
+        ) from exc
diff --git a/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py b/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py
index 37a3f4fad..964d98ee5 100644
--- a/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py
+++ b/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py
@@ -23,6 +23,7 @@
     KeyfileError,
     MissingServerConfigError,
     OpenStackError,
+    OpenstackHealthCheckError,
     RunnerCreateError,
     RunnerStartError,
     SSHError,
@@ -70,6 +71,8 @@
 
 OUTDATED_METRICS_STORAGE_IN_SECONDS = CREATE_SERVER_TIMEOUT + 30  # add a bit on top of the timeout
 
+HEALTH_CHECK_ERROR_LOG_MSG = "Health check could not be completed for %s"
+
 
 class _GithubRunnerRemoveError(Exception):
     """Represents an error while SSH into a runner and running the remove script."""
@@ -124,10 +127,12 @@ class _RunnerHealth:
     Attributes:
         healthy: The list of healthy runners.
         unhealthy:  The list of unhealthy runners.
+        unknown: The list of runners whose health state could not be determined.
     """
 
     healthy: tuple[OpenstackInstance, ...]
     unhealthy: tuple[OpenstackInstance, ...]
+    unknown: tuple[OpenstackInstance, ...]
 
 
 class OpenStackRunnerManager(CloudRunnerManager):
@@ -233,15 +238,20 @@ def get_runner(self, instance_id: InstanceId) -> CloudRunnerInstance | None:
         logger.debug(
             "Runner info fetched, checking health %s %s", instance_id, instance.server_name
         )
-        healthy = health_checks.check_runner(
-            openstack_cloud=self._openstack_cloud, instance=instance
-        )
-        logger.debug("Runner health check completed %s %s", instance.server_name, healthy)
+
+        try:
+            healthy = health_checks.check_runner(
+                openstack_cloud=self._openstack_cloud, instance=instance
+            )
+            logger.debug("Runner health check completed %s %s", instance.server_name, healthy)
+        except OpenstackHealthCheckError:
+            logger.exception(HEALTH_CHECK_ERROR_LOG_MSG, instance.server_name)
+            healthy = None
         return (
             CloudRunnerInstance(
                 name=instance.server_name,
                 instance_id=instance_id,
-                health=HealthState.HEALTHY if healthy else HealthState.UNHEALTHY,
+                health=HealthState.from_value(healthy),
                 state=CloudRunnerState.from_openstack_server_status(instance.status),
             )
             if instance is not None
@@ -260,27 +270,29 @@ def get_runners(
         Returns:
             Information on the runner instances.
         """
-        instance_list = self._openstack_cloud.get_instances()
-        instance_list = [
-            CloudRunnerInstance(
-                name=instance.server_name,
-                instance_id=instance.instance_id,
-                health=(
-                    HealthState.HEALTHY
-                    if health_checks.check_runner(
-                        openstack_cloud=self._openstack_cloud, instance=instance
-                    )
-                    else HealthState.UNHEALTHY
-                ),
-                state=CloudRunnerState.from_openstack_server_status(instance.status),
+        instances = self._openstack_cloud.get_instances()
+        runners = []
+        for instance in instances:
+            try:
+                healthy = health_checks.check_runner(
+                    openstack_cloud=self._openstack_cloud, instance=instance
+                )
+            except OpenstackHealthCheckError:
+                logger.exception(HEALTH_CHECK_ERROR_LOG_MSG, instance.server_name)
+                healthy = None
+            runners.append(
+                CloudRunnerInstance(
+                    name=instance.server_name,
+                    instance_id=instance.instance_id,
+                    health=HealthState.from_value(healthy),
+                    state=CloudRunnerState.from_openstack_server_status(instance.status),
+                )
             )
-            for instance in instance_list
-        ]
         if states is None:
-            return tuple(instance_list)
+            return tuple(runners)
 
         state_set = set(states)
-        return tuple(instance for instance in instance_list if instance.state in state_set)
+        return tuple(runner for runner in runners if runner.state in state_set)
 
     def delete_runner(
         self, instance_id: InstanceId, remove_token: str
@@ -361,8 +373,10 @@ def cleanup(self, remove_token: str) -> Iterator[runner_metrics.RunnerMetrics]:
 
         healthy_runner_names = {runner.server_name for runner in runners.healthy}
         unhealthy_runner_names = {runner.server_name for runner in runners.unhealthy}
+        unknown_runner_names = {runner.server_name for runner in runners.unknown}
         logger.debug("Healthy runners: %s", healthy_runner_names)
         logger.debug("Unhealthy runners: %s", unhealthy_runner_names)
+        logger.debug("Unknown health runners: %s", unknown_runner_names)
 
         logger.debug("Deleting unhealthy runners.")
         for runner in runners.unhealthy:
@@ -374,33 +388,31 @@ def cleanup(self, remove_token: str) -> Iterator[runner_metrics.RunnerMetrics]:
         logger.debug("Extracting metrics.")
         return self._cleanup_extract_metrics(
             metrics_storage_manager=self._metrics_storage_manager,
-            healthy_runner_names=healthy_runner_names,
-            unhealthy_runner_names=unhealthy_runner_names,
+            ignore_runner_names=healthy_runner_names | unknown_runner_names,
+            include_runner_names=unhealthy_runner_names,
         )
 
     @staticmethod
     def _cleanup_extract_metrics(
         metrics_storage_manager: StorageManager,
-        healthy_runner_names: set[str],
-        unhealthy_runner_names: set[str],
+        ignore_runner_names: set[str],
+        include_runner_names: set[str],
     ) -> Iterator[runner_metrics.RunnerMetrics]:
-        """Extract metrics for unhealthy runners and dangling metrics storage.
+        """Extract metrics for certain runners and dangling metrics storage.
 
         Args:
             metrics_storage_manager: The metrics storage manager.
-            healthy_runner_names: The names of healthy runners.
-            unhealthy_runner_names: The names of unhealthy runners.
+            ignore_runner_names: The names of the runners whose metrics should not be extracted.
+            include_runner_names: The names of the runners whose metrics should be extracted.
 
         Returns:
-            Any metrics retrieved from unhealthy runners and dangling storage.
+            Any metrics retrieved from the include_runner_names and dangling storage.
         """
-        # We want to extract metrics for unhealthy runners(runners to clean up).
-        # But there may be runners under construction
-        # (not marked as healthy and unhealthy because they do not yet exist in OpenStack)
-        # that should not be cleaned up.
+        # There may be runners under construction that are not included in the runner_names sets
+        # because they do not yet exist in OpenStack and that should not be cleaned up.
         # On the other hand, there could be storage for runners from the past that
         # should be cleaned up.
-        all_runner_names = healthy_runner_names | unhealthy_runner_names
+        all_runner_names = ignore_runner_names | include_runner_names
         unmatched_metrics_storage = (
             ms
             for ms in metrics_storage_manager.list_all()
@@ -414,7 +426,7 @@ def _cleanup_extract_metrics(
         }
         return runner_metrics.extract(
             metrics_storage_manager=metrics_storage_manager,
-            runners=unhealthy_runner_names | dangling_storage_runner_names,
+            runners=include_runner_names | dangling_storage_runner_names,
             include=True,
         )
 
@@ -462,13 +474,21 @@ def _get_runners_health(self) -> _RunnerHealth:
         """
         runner_list = self._openstack_cloud.get_instances()
 
-        healthy, unhealthy = [], []
+        healthy, unhealthy, unknown = [], [], []
         for runner in runner_list:
-            if health_checks.check_runner(openstack_cloud=self._openstack_cloud, instance=runner):
-                healthy.append(runner)
-            else:
-                unhealthy.append(runner)
-        return _RunnerHealth(healthy=tuple(healthy), unhealthy=tuple(unhealthy))
+            try:
+                if health_checks.check_runner(
+                    openstack_cloud=self._openstack_cloud, instance=runner
+                ):
+                    healthy.append(runner)
+                else:
+                    unhealthy.append(runner)
+            except OpenstackHealthCheckError:
+                logger.exception(HEALTH_CHECK_ERROR_LOG_MSG, runner.server_name)
+                unknown.append(runner)
+        return _RunnerHealth(
+            healthy=tuple(healthy), unhealthy=tuple(unhealthy), unknown=tuple(unknown)
+        )
 
     def _generate_cloud_init(self, instance_name: str, registration_token: str) -> str:
         """Generate cloud init userdata.
@@ -668,15 +688,21 @@ def _wait_runner_running(self, instance: OpenstackInstance) -> None:
                 f"Failed to SSH connect to {instance.server_name} openstack runner"
             ) from err
 
-        if not health_checks.check_active_runner(
-            ssh_conn=ssh_conn, instance=instance, accept_finished_job=True
-        ):
-            logger.info("Runner process not found on %s", instance.server_name)
+        try:
+            healthy = health_checks.check_active_runner(
+                ssh_conn=ssh_conn, instance=instance, accept_finished_job=True
+            )
+        except OpenstackHealthCheckError as exc:
+            raise RunnerStartError(
+                f"Failed to check health of runner process on {instance.server_name}"
+            ) from exc
+        if not healthy:
+            logger.info("Runner %s not considered healthy", instance.server_name)
             raise RunnerStartError(
-                f"Runner process on {instance.server_name} failed to initialize on after starting"
+                f"Runner {instance.server_name} failed to initialize after starting"
             )
 
-        logger.info("Runner process found to be healthy on %s", instance.server_name)
+        logger.info("Runner %s found to be healthy", instance.server_name)
 
     @staticmethod
     def _generate_instance_id() -> InstanceId:
diff --git a/github-runner-manager/tests/unit/openstack_cloud/test_openstack_runner_manager.py b/github-runner-manager/tests/unit/openstack_cloud/test_openstack_runner_manager.py
index 6dff55ba8..1000ce8d7 100644
--- a/github-runner-manager/tests/unit/openstack_cloud/test_openstack_runner_manager.py
+++ b/github-runner-manager/tests/unit/openstack_cloud/test_openstack_runner_manager.py
@@ -2,19 +2,66 @@
 # See LICENSE file for licensing details.
 
 """Module for unit-testing OpenStack runner manager."""
+import secrets
 from datetime import datetime, timedelta
 from pathlib import Path
 from unittest.mock import MagicMock
 
 import pytest
 
+from github_runner_manager.errors import OpenstackHealthCheckError
+from github_runner_manager.manager.cloud_runner_manager import SupportServiceConfig
 from github_runner_manager.metrics import runner
 from github_runner_manager.metrics.storage import MetricsStorage, StorageManager
-from github_runner_manager.openstack_cloud import openstack_runner_manager
+from github_runner_manager.openstack_cloud import (
+    health_checks,
+    openstack_cloud,
+    openstack_runner_manager,
+)
+from github_runner_manager.openstack_cloud.openstack_cloud import OpenstackCloud
 from github_runner_manager.openstack_cloud.openstack_runner_manager import (
     OUTDATED_METRICS_STORAGE_IN_SECONDS,
     OpenStackRunnerManager,
+    OpenStackRunnerManagerConfig,
 )
+from tests.unit.factories import openstack_factory
+
+OPENSTACK_INSTANCE_PREFIX = "test"
+
+
+@pytest.fixture(name="runner_manager")
+def openstack_runner_manager_fixture(monkeypatch: pytest.MonkeyPatch) -> OpenStackRunnerManager:
+    """Mock required dependencies/configs and return an OpenStackRunnerManager instance."""
+    monkeypatch.setattr(
+        "github_runner_manager.openstack_cloud.openstack_runner_manager.metrics_storage",
+        MagicMock(),
+    )
+    monkeypatch.setattr(
+        "github_runner_manager.openstack_cloud.openstack_runner_manager.OpenstackCloud",
+        MagicMock(),
+    )
+
+    service_config_mock = MagicMock(spec=SupportServiceConfig)
+    service_config_mock.proxy_config = None
+    config = OpenStackRunnerManagerConfig(
+        name="test",
+        prefix="test",
+        credentials=MagicMock(),
+        server_config=MagicMock(),
+        runner_config=MagicMock(),
+        service_config=service_config_mock,
+        system_user_config=MagicMock(),
+    )
+
+    return OpenStackRunnerManager(config=config)
+
+
+@pytest.fixture(name="runner_metrics_mock")
+def runner_metrics_mock_fixture(monkeypatch: pytest.MonkeyPatch) -> MagicMock:
+    """Mock the runner_metrics module."""
+    runner_metrics_mock = MagicMock(spec=runner)
+    monkeypatch.setattr(openstack_runner_manager, "runner_metrics", runner_metrics_mock)
+    return runner_metrics_mock
 
 
 @pytest.mark.parametrize(
@@ -87,6 +134,7 @@ def test__cleanup_extract_metrics(
     undecided_runner_storage: set[tuple[str, datetime]],
     expected_storage_to_be_extracted: set[str],
     monkeypatch: pytest.MonkeyPatch,
+    runner_metrics_mock: MagicMock,
 ):
     """
     arrange: Given different combinations of healthy, unhealthy and undecided runners.
@@ -94,8 +142,6 @@ def test__cleanup_extract_metrics(
     assert: runner_metrics.extract is called with the expected storage to be extracted.
     """
     metric_storage_manager = MagicMock(spec=StorageManager)
-    runner_metrics_mock = MagicMock(spec=runner)
-    monkeypatch.setattr(openstack_runner_manager, "runner_metrics", runner_metrics_mock)
     now = datetime.now()
     all_runner_name_metrics_storage = [
         _create_metrics_storage(runner_name, now)
@@ -111,14 +157,69 @@ def test__cleanup_extract_metrics(
 
     OpenStackRunnerManager._cleanup_extract_metrics(
         metrics_storage_manager=metric_storage_manager,
-        healthy_runner_names=healthy_runner_names,
-        unhealthy_runner_names=unhealthy_runner_names,
+        ignore_runner_names=healthy_runner_names,
+        include_runner_names=unhealthy_runner_names,
     )
 
     assert runner_metrics_mock.extract.call_count == 1
     assert runner_metrics_mock.extract.call_args[1]["runners"] == expected_storage_to_be_extracted
 
 
+@pytest.mark.parametrize(
+    "healthy_count, unhealthy_count, unknown_count",
+    [
+        pytest.param(1, 1, 1, id="one of each"),
+        pytest.param(2, 1, 1, id="two healthy"),
+        pytest.param(1, 2, 1, id="two unhealthy"),
+        pytest.param(1, 1, 2, id="two unknown"),
+        pytest.param(0, 0, 0, id="no runners"),
+        pytest.param(0, 0, 1, id="one unknown"),
+        pytest.param(0, 1, 0, id="one unhealthy"),
+        pytest.param(1, 0, 0, id="one healthy"),
+    ],
+)
+def test_cleanup_ignores_runners_with_health_check_errors(
+    healthy_count: int,
+    unhealthy_count: int,
+    unknown_count,
+    monkeypatch: pytest.MonkeyPatch,
+    runner_manager: OpenStackRunnerManager,
+    runner_metrics_mock: MagicMock,
+):
+    """
+    arrange: Given a combination of healthy/unhealthy/unknown(with a health check error) runners.
+    act: When the cleanup method is called.
+    assert: Only the unhealthy runners are deleted and their metrics are extracted.
+    """
+    names = [
+        f"test-{status}{i}"
+        for status, count in [
+            ("healthy", healthy_count),
+            ("unhealthy", unhealthy_count),
+            ("unknown", unknown_count),
+        ]
+        for i in range(count)
+    ]
+    openstack_cloud_mock = _create_openstack_cloud_mock(names)
+    runner_manager._openstack_cloud = openstack_cloud_mock
+    health_checks_mock = _create_health_checks_mock()
+    monkeypatch.setattr(
+        "github_runner_manager.openstack_cloud.openstack_runner_manager.health_checks",
+        health_checks_mock,
+    )
+    runner_manager.cleanup(secrets.token_hex(16))
+
+    assert openstack_cloud_mock.delete_instance.call_count == unhealthy_count
+    for name in names:
+        instance_id = name[len(OPENSTACK_INSTANCE_PREFIX) + 1 :]
+        if instance_id.startswith("unhealthy"):
+            openstack_cloud_mock.delete_instance.assert_any_call(instance_id)
+    assert runner_metrics_mock.extract.call_count == 1
+    assert runner_metrics_mock.extract.call_args[1]["runners"] == {
+        names for names in names if names.startswith(f"{OPENSTACK_INSTANCE_PREFIX}-unhealthy")
+    }
+
+
 def _create_metrics_storage(runner_name: str, mtime: datetime) -> MetricsStorage:
     """
     Create a metric storage object with a mocked mtime for the storage path.
@@ -136,3 +237,42 @@ def _create_metrics_storage(runner_name: str, mtime: datetime) -> MetricsStorage
     stat.st_mtime = mtime.timestamp()
     metrics_storage.path.stat = stat_mock
     return metrics_storage
+
+
+def _create_openstack_cloud_mock(server_names: list[str]) -> MagicMock:
+    """Create an OpenstackCloud mock which returns servers with a given list of server names."""
+    openstack_cloud_mock = MagicMock(spec=OpenstackCloud)
+    openstack_cloud_mock.get_instances.return_value = [
+        openstack_cloud.OpenstackInstance(
+            server=openstack_factory.ServerFactory(
+                status="ACTIVE",
+                name=name,
+            ),
+            prefix=OPENSTACK_INSTANCE_PREFIX,
+        )
+        for name in server_names
+    ]
+    return openstack_cloud_mock
+
+
+def _create_health_checks_mock() -> MagicMock:
+    """Create a health check mock that returns a boolean or raises an error.
+
+    The logic is that if the server name starts with "test-healthy" it returns True,
+    if it starts with "test-unhealthy" it returns False, and raises an error otherwise.
+    """
+    health_checks_mock = MagicMock(spec=health_checks)
+
+    def _health_checks_side_effect(openstack_cloud, instance):
+        """Mock side effect for the health_checks.check_runner method.
+
+        This implements the logic mentioned in the docstring above.
+        """
+        if instance.server_name.startswith("test-healthy"):
+            return True
+        if instance.server_name.startswith("test-unhealthy"):
+            return False
+        raise OpenstackHealthCheckError("Health check failed")
+
+    health_checks_mock.check_runner.side_effect = _health_checks_side_effect
+    return health_checks_mock