diff --git a/src-docs/lxd.py.md b/src-docs/lxd.py.md index 54da17e03..2b7b384aa 100644 --- a/src-docs/lxd.py.md +++ b/src-docs/lxd.py.md @@ -14,7 +14,7 @@ The LxdClient class offers a low-level interface to isolate the underlying imple ## class `LxdClient` LXD client. - + ### function `__init__` @@ -415,7 +415,7 @@ Create an LXD instance. ## class `LxdNetworkManager` LXD network manager. - + ### function `__init__` @@ -436,7 +436,7 @@ Instantiate the LXD profile manager. --- - + ### function `get` @@ -458,6 +458,55 @@ Get the LXD network information. Information on the LXD network. +--- + +## class `LxdProfile` +LXD profile. + + + +### function `__init__` + +```python +__init__(pylxd_profile: 'Profile') +``` + +Instantiate the LXD profile. + + + +**Args:** + + - `pylxd_profile`: Instance of the pylxd.models.Profile. + + + + +--- + + + +### function `delete` + +```python +delete() +``` + +Delete the profile. + +--- + + + +### function `save` + +```python +save() +``` + +Save the current configuration of profile. + + --- ## class `LxdProfileManager` @@ -540,6 +589,30 @@ Check whether an LXD profile of a given name exists. **Returns:** Whether the LXD profile of the given name exists. +--- + + + +### function `get` + +```python +get(name: 'str') → LxdProfile +``` + +Get an LXD profile. + + + +**Args:** + + - `name`: Name of the LXD profile. + + + +**Raises:** + + - `LxdError`: Unable to get the LXD profile with the name. + --- @@ -548,7 +621,7 @@ An LXD storage pool. Attrs: name (str): Name of the storage pool. driver (str): Type of driver of the storage pool. used_by (list[str]): LXD instances using the storage pool. config (dict[str, any]): Dictionary of the configuration of the storage pool. managed (bool): Whether LXD manages the storage pool. - + ### function `__init__` @@ -569,7 +642,7 @@ Instantiate the LXD storage pool. --- - + ### function `delete` @@ -581,7 +654,7 @@ Delete the storage pool. --- - + ### function `save` @@ -597,7 +670,7 @@ Save the current configuration of storage pool. ## class `LxdStoragePoolManager` LXD storage pool manager. - + ### function `__init__` @@ -618,7 +691,7 @@ Instantiate the LXD storage pool manager. --- - + ### function `all` @@ -635,7 +708,7 @@ Get all LXD storage pool. --- - + ### function `create` @@ -658,7 +731,7 @@ Create an LXD storage pool. --- - + ### function `exists` @@ -681,7 +754,7 @@ Check if an LXD storage pool exists. --- - + ### function `get` diff --git a/src/lxd.py b/src/lxd.py index fb7937efc..685bbb846 100644 --- a/src/lxd.py +++ b/src/lxd.py @@ -351,6 +351,51 @@ def create( logger.exception("Failed to create LXD profile") raise LxdError(f"Unable to create LXD profile {name}") from err + def get(self, name: str) -> LxdProfile: + """Get an LXD profile. + + Args: + name: Name of the LXD profile. + + Raises: + LxdError: Unable to get the LXD profile with the name. + """ + try: + return self._pylxd_client.profiles.get(name) + except pylxd.exceptions.LXDAPIException as err: + logger.exception("Failed to get LXD profile") + raise LxdError(f"Unable to get LXD profile {name}") from err + + +class LxdProfile: + """LXD profile.""" + + def __init__( + self, + pylxd_profile: pylxd.models.Profile, + ): + """Instantiate the LXD profile. + + Args: + pylxd_profile: Instance of the pylxd.models.Profile. + """ + self._pylxd_profile = pylxd_profile + + self.name = self._pylxd_profile.name + self.description = self._pylxd_profile.description + self.config = self._pylxd_profile.config + self.devices = self._pylxd_profile.devices + self.used_by = self._pylxd_profile.used_by + + def save(self): + """Save the current configuration of profile.""" + self._pylxd_profile.config = self.config + self._pylxd_profile.save() + + def delete(self): + """Delete the profile.""" + self._pylxd_profile.delete() + # Disable pylint as public method number check as this class can be extended in the future. class LxdNetworkManager: # pylint: disable=too-few-public-methods diff --git a/src/runner.py b/src/runner.py index c3c0e94f1..59fa9f1b8 100644 --- a/src/runner.py +++ b/src/runner.py @@ -248,7 +248,20 @@ def _create_instance( "profiles": ["default", "runner", resource_profile], } - instance = self._clients.lxd.instances.create(config=instance_config, wait=True) + try: + instance = self._clients.lxd.instances.create(config=instance_config, wait=True) + except LxdError: + logger.exception( + "Removing resource profile and storage profile due to LXD instance create failure" + ) + + # LxdError on creating LXD instance could be caused by improper initialization of + # storage pool. If other runner LXD instance exists then it cannot be the cause. + if not self._clients.lxd.instances.all(): + # Removing the storage pool and retry can solve the problem. + self._remove_runner_storage_pool() + raise + self.status.exist = True return instance @@ -299,6 +312,20 @@ def _ensure_runner_storage_pool(self) -> None: if not self._clients.lxd.storage_pools.exists("runner"): raise RunnerError("Failed to create runner LXD storage pool") + def _remove_runner_storage_pool(self) -> None: + """Remove the runner storage pool if exists.""" + if self._clients.lxd.storage_pools.exists("runner"): + logger.info("Removing existing runner LXD storage pool.") + runner_storage_pool = self._clients.lxd.storage_pools.get("runner") + + # The resource profile needs to be removed first as it uses the storage pool. + for used_by in runner_storage_pool.used_by: + _, profile_name = used_by.rsplit("/", 1) + profile = self._clients.lxd.profiles.get(profile_name) + profile.delete() + + runner_storage_pool.delete() + @classmethod def _get_resource_profile_name(cls, cpu: int, memory: str, disk: str) -> str: """Get the LXD profile name for resource limit. diff --git a/tests/integration/test_charm_one_runner.py b/tests/integration/test_charm_one_runner.py index 3b8535662..10ad2246c 100644 --- a/tests/integration/test_charm_one_runner.py +++ b/tests/integration/test_charm_one_runner.py @@ -7,6 +7,7 @@ from juju.application import Application from juju.model import Model +from charm import GithubRunnerCharm from tests.integration.helpers import ( assert_resource_lxd_profile, get_runner_names, @@ -147,3 +148,43 @@ async def test_token_config_changed(model: Model, app: Application, token_alt: s assert return_code == 0 assert stdout is not None assert f"GITHUB_TOKEN={token_alt}" in stdout + + +@pytest.mark.asyncio +@pytest.mark.abort_on_fail +async def test_reconcile_runners_with_lxd_storage_pool_failure( + model: Model, app: Application +) -> None: + """ + arrange: An working application with no runners. + act: + 1. a. Set virtual-machines config to 0. + b. Run reconcile_runners action. + c. Delete content in the runner LXD storage directory. + 2. a. Set virtual-machines config to 1. + b. Run reconcile_runners action. + assert: + 1. No runner should exist. + 2. One runner should exist. + """ + unit = app.units[0] + + # 1. + await app.set_config({"virtual-machines": "0"}) + + action = await unit.run_action("reconcile-runners") + await action.wait() + await model.wait_for_idle(status=ACTIVE_STATUS_NAME) + await wait_till_num_of_runners(unit, 0) + + exit_code, _ = await run_in_unit(unit, f"rm -rf {GithubRunnerCharm.ram_pool_path}/*") + assert exit_code == 0 + + # 2. + await app.set_config({"virtual-machines": "1"}) + + action = await unit.run_action("reconcile-runners") + await action.wait() + await model.wait_for_idle(status=ACTIVE_STATUS_NAME) + + await wait_till_num_of_runners(unit, 1)