Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Refinement] refine error handling, error report on status API now covers boot controller startup failure #259

Merged
merged 27 commits into from
Nov 7, 2023
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
e17b3e4
otaclient: implement new OTAServicer to replace OTAClientWrapper
Bodong-Yang Nov 1, 2023
3863345
ota_service_stub: adapt according to otaclient.OTAServicer
Bodong-Yang Nov 1, 2023
ab8a2a9
errors: redefine OTA errors
Bodong-Yang Nov 1, 2023
f70df69
grub: adjust according to errors' change
Bodong-Yang Nov 1, 2023
fe5e5a4
cboot: adjust according to errors' change
Bodong-Yang Nov 1, 2023
d3b220a
rpi_boot: adjust according to errors's change
Bodong-Yang Nov 1, 2023
37d3bd0
grub: adjust again according to errors' change
Bodong-Yang Nov 1, 2023
4429288
cboot: adjust again according to errors' change
Bodong-Yang Nov 1, 2023
da0b160
otaclient: adjust according to ota_errors' change
Bodong-Yang Nov 2, 2023
3dd6e11
errors: define OTAClientStartupFailed error
Bodong-Yang Nov 2, 2023
c9595be
otaclient.OTAServicer: re-add missing is_busy and local_used_proxy at…
Bodong-Yang Nov 2, 2023
a4aed9d
test_otaclient_stub: update accordingly to make it work again
Bodong-Yang Nov 2, 2023
073706e
test_otaclient: update accordinlgy to make it work again
Bodong-Yang Nov 2, 2023
31a2500
errors: provide a error report generate API
Bodong-Yang Nov 2, 2023
da31b12
otaclient: fix error logging not properly formatted
Bodong-Yang Nov 2, 2023
36c1ae3
test_otaclient: update accordingly to make it work again
Bodong-Yang Nov 2, 2023
62cf339
otaclient@L262: properly capture downloading group failure
Bodong-Yang Nov 2, 2023
5de37ad
otaclient: on_failure uses OTAError.get_error_report API
Bodong-Yang Nov 2, 2023
9fd339b
errors: get_failure_reason now only returns failure_code + failure_de…
Bodong-Yang Nov 2, 2023
b72be80
errors: get_error_report has new format, get_failure_traceback now by…
Bodong-Yang Nov 2, 2023
bb91ae0
errors: refine the failure descriptions
Bodong-Yang Nov 2, 2023
05a64be
configs: new config DEBUG_MODE(default is False)
Bodong-Yang Nov 2, 2023
a788220
otaclient: only include traceback info in status API resp when DEBUG_…
Bodong-Yang Nov 2, 2023
9d2d0de
rpi_boot: minor fix to make linter happy
Bodong-Yang Nov 2, 2023
a912a83
boot_controllers: do not directly import errors from app.errors
Bodong-Yang Nov 2, 2023
9f1b41f
errors: fix get_error_report missing module name info
Bodong-Yang Nov 2, 2023
16cc689
errors: rename OTAErrorUnRecoverable to OTAErrorUnrecoverable
Bodong-Yang Nov 7, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 81 additions & 76 deletions otaclient/app/boot_control/_cboot.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from typing import Generator, Optional


from .. import log_setting
from .. import log_setting, errors as ota_errors
from ..common import (
copytree_identical,
read_str_from_file,
Expand All @@ -30,17 +30,8 @@
write_str_to_file_sync,
)

from ..errors import (
BootControlInitError,
BootControlPlatformUnsupported,
BootControlPostRollbackFailed,
BootControlPostUpdateFailed,
BootControlPreRollbackFailed,
BootControlPreUpdateFailed,
)
from ..proto import wrapper

from . import _errors
from ._common import (
OTAStatusMixin,
PrepareMountMixin,
Expand All @@ -58,7 +49,7 @@
)


class NvbootctrlError(_errors.BootControlError):
class NvbootctrlError(Exception):
"""Specific internal errors related to nvbootctrl cmd."""


Expand Down Expand Up @@ -167,26 +158,21 @@ def is_slot_marked_successful(cls, slot: str) -> bool:

class _CBootControl:
def __init__(self):
try:
# NOTE: only support rqx-580, rqx-58g platform right now!
# detect the chip id
self.chip_id = read_str_from_file(cfg.TEGRA_CHIP_ID_PATH)
if not self.chip_id or int(self.chip_id) not in cfg.CHIP_ID_MODEL_MAP:
raise NotImplementedError(
f"unsupported platform found (chip_id: {self.chip_id}), abort"
)
# NOTE: only support rqx-580, rqx-58g platform right now!
# detect the chip id
self.chip_id = read_str_from_file(cfg.TEGRA_CHIP_ID_PATH)
if not self.chip_id or int(self.chip_id) not in cfg.CHIP_ID_MODEL_MAP:
raise NotImplementedError(
f"unsupported platform found (chip_id: {self.chip_id}), abort"
)

self.chip_id = int(self.chip_id)
self.model = cfg.CHIP_ID_MODEL_MAP[self.chip_id]
logger.info(f"{self.model=}, (chip_id={hex(self.chip_id)})")
self.chip_id = int(self.chip_id)
self.model = cfg.CHIP_ID_MODEL_MAP[self.chip_id]
logger.info(f"{self.model=}, (chip_id={hex(self.chip_id)})")

# initializing dev info
self._init_dev_info()
logger.info(f"finished cboot control init: {Nvbootctrl.dump_slots_info()=}")
except NotImplementedError as e:
raise BootControlPlatformUnsupported from e
except Exception as e:
raise BootControlInitError from e
# initializing dev info
self._init_dev_info()
logger.info(f"finished cboot control init: {Nvbootctrl.dump_slots_info()=}")

def _init_dev_info(self):
self.current_slot: str = Nvbootctrl.get_current_slot()
Expand Down Expand Up @@ -221,12 +207,12 @@ def _init_dev_info(self):
# ensure rootfs is as expected
if not Nvbootctrl.check_rootdev(self.current_rootfs_dev):
msg = f"rootfs mismatch, expect {self.current_rootfs_dev} as rootfs"
raise ValueError(msg)
raise NvbootctrlError(msg)
elif Nvbootctrl.check_rootdev(self.standby_rootfs_dev):
msg = (
f"rootfs mismatch, expect {self.standby_rootfs_dev} as standby slot dev"
)
raise ValueError(msg)
raise NvbootctrlError(msg)

logger.info("dev info initializing completed")
logger.info(
Expand Down Expand Up @@ -311,38 +297,45 @@ class CBootController(
BootControllerProtocol,
):
def __init__(self) -> None:
self._cboot_control: _CBootControl = _CBootControl()

# load paths
## first try to unmount standby dev if possible
self.standby_slot_dev = self._cboot_control.get_standby_rootfs_dev()
CMDHelperFuncs.umount(self.standby_slot_dev)

self.standby_slot_mount_point = Path(cfg.MOUNT_POINT)
self.standby_slot_mount_point.mkdir(exist_ok=True)

## refroot mount point
_refroot_mount_point = cfg.ACTIVE_ROOT_MOUNT_POINT
# first try to umount refroot mount point
CMDHelperFuncs.umount(_refroot_mount_point)
if not os.path.isdir(_refroot_mount_point):
os.mkdir(_refroot_mount_point)
self.ref_slot_mount_point = Path(_refroot_mount_point)

## ota-status dir
### current slot
self.current_ota_status_dir = Path(cfg.ACTIVE_ROOTFS_PATH) / Path(
cfg.OTA_STATUS_DIR
).relative_to("/")
self.current_ota_status_dir.mkdir(parents=True, exist_ok=True)
### standby slot
# NOTE: might not yet be populated before OTA update applied!
self.standby_ota_status_dir = self.standby_slot_mount_point / Path(
cfg.OTA_STATUS_DIR
).relative_to("/")

# init ota-status
self._init_boot_control()
try:
self._cboot_control: _CBootControl = _CBootControl()

# load paths
## first try to unmount standby dev if possible
self.standby_slot_dev = self._cboot_control.get_standby_rootfs_dev()
CMDHelperFuncs.umount(self.standby_slot_dev)

self.standby_slot_mount_point = Path(cfg.MOUNT_POINT)
self.standby_slot_mount_point.mkdir(exist_ok=True)

## refroot mount point
_refroot_mount_point = cfg.ACTIVE_ROOT_MOUNT_POINT
# first try to umount refroot mount point
CMDHelperFuncs.umount(_refroot_mount_point)
if not os.path.isdir(_refroot_mount_point):
os.mkdir(_refroot_mount_point)
self.ref_slot_mount_point = Path(_refroot_mount_point)

## ota-status dir
### current slot
self.current_ota_status_dir = Path(cfg.ACTIVE_ROOTFS_PATH) / Path(
cfg.OTA_STATUS_DIR
).relative_to("/")
self.current_ota_status_dir.mkdir(parents=True, exist_ok=True)
### standby slot
# NOTE: might not yet be populated before OTA update applied!
self.standby_ota_status_dir = self.standby_slot_mount_point / Path(
cfg.OTA_STATUS_DIR
).relative_to("/")

# init ota-status
self._init_boot_control()
except NotImplementedError as e:
raise ota_errors.BootControlPlatformUnsupported(module=__name__) from e
except Exception as e:
raise ota_errors.BootControlStartupFailed(
f"unspecific boot controller startup failure: {e!r}", module=__name__
) from e

###### private methods ######

Expand Down Expand Up @@ -433,10 +426,10 @@ def _populate_boot_folder_to_separate_bootdev(self):
self._cboot_control.get_standby_boot_dev(),
_boot_dir_mount_point,
)
except _errors.MountError as e:
except Exception as e:
_msg = f"failed to mount standby boot dev: {e!r}"
logger.error(_msg)
raise _errors.BootControlError(_msg) from e
raise NvbootctrlError(_msg) from e

try:
dst = _boot_dir_mount_point / "boot"
Expand All @@ -448,14 +441,14 @@ def _populate_boot_folder_to_separate_bootdev(self):
except Exception as e:
_msg = f"failed to populate boot folder to separate bootdev: {e!r}"
logger.error(_msg)
raise _errors.BootControlError(_msg) from e
raise NvbootctrlError(_msg) from e
finally:
# unmount standby emmc boot dev on finish/failure
try:
CMDHelperFuncs.umount(_boot_dir_mount_point)
except _errors.MountError as e:
except Exception as e:
_failure_msg = f"failed to umount boot dev: {e!r}"
logger.error(_failure_msg)
logger.warning(_failure_msg)
# no need to raise to the caller

###### public methods ######
Expand Down Expand Up @@ -511,8 +504,11 @@ def pre_update(self, version: str, *, standby_as_ref: bool, erase_standby=False)

logger.info("pre-update setting finished")
except Exception as e:
logger.exception(f"failed on pre_update: {e!r}")
raise BootControlPreUpdateFailed from e
_err_msg = f"failed on pre_update: {e!r}"
logger.exception(_err_msg)
raise ota_errors.BootControlPreUpdateFailed(
f"{e!r}", module=__name__
) from e

def post_update(self) -> Generator[None, None, None]:
try:
Expand Down Expand Up @@ -550,8 +546,11 @@ def post_update(self) -> Generator[None, None, None]:
yield # hand over control back to otaclient
CMDHelperFuncs.reboot()
except Exception as e:
logger.exception(f"failed on post_update: {e!r}")
raise BootControlPostUpdateFailed from e
_err_msg = f"failed on post_update: {e!r}"
logger.exception(_err_msg)
raise ota_errors.BootControlPostUpdateFailed(
_err_msg, module=__name__
) from e

def pre_rollback(self):
try:
Expand All @@ -563,13 +562,19 @@ def pre_rollback(self):
# store ROLLBACKING status to standby
self._store_standby_ota_status(wrapper.StatusOta.ROLLBACKING)
except Exception as e:
logger.exception(f"failed on pre_rollback: {e!r}")
raise BootControlPreRollbackFailed from e
_err_msg = f"failed on pre_rollback: {e!r}"
logger.exception(_err_msg)
raise ota_errors.BootControlPreRollbackFailed(
_err_msg, module=__name__
) from e

def post_rollback(self):
try:
self._cboot_control.switch_boot()
CMDHelperFuncs.reboot()
except Exception as e:
logger.exception(f"failed on post_rollback: {e!r}")
raise BootControlPostRollbackFailed from e
_err_msg = f"failed on post_rollback: {e!r}"
logger.exception(_err_msg)
raise ota_errors.BootControlPostRollbackFailed(
_err_msg, module=__name__
) from e
Loading
Loading