From b393ce2a48d38f576e08ca47c525a9a4f2e2d3cc Mon Sep 17 00:00:00 2001 From: Kavitha Ramalingam Date: Tue, 22 Oct 2024 19:43:49 +0530 Subject: [PATCH] Integrated Rebootbackend changes --- azure-pipelines.yml | 32 +- .../rebootbackend/gnoi_reboot.xml | 30 ++ .../rebootbackend/interfaces.cpp | 72 +++++ .../rebootbackend/interfaces.h | 27 ++ .../rebootbackend/reboot_common.cpp | 14 + .../rebootbackend/reboot_common.h | 20 ++ .../rebootbackend/reboot_interfaces.h | 21 ++ .../rebootbackend/reboot_thread.cpp | 264 ++++++++++++++++ .../rebootbackend/reboot_thread.h | 222 ++++++++++++++ .../rebootbackend/rebootbackend.cpp | 12 + .../rebootbackend/rebootbe.cpp | 284 ++++++++++++++++++ src/sonic-framework/rebootbackend/rebootbe.h | 93 ++++++ .../rebootbackend/redis_utils.cpp | 80 +++++ .../rebootbackend/redis_utils.h | 40 +++ 14 files changed, 1195 insertions(+), 16 deletions(-) create mode 100644 src/sonic-framework/rebootbackend/gnoi_reboot.xml create mode 100644 src/sonic-framework/rebootbackend/interfaces.cpp create mode 100644 src/sonic-framework/rebootbackend/interfaces.h create mode 100644 src/sonic-framework/rebootbackend/reboot_common.cpp create mode 100644 src/sonic-framework/rebootbackend/reboot_common.h create mode 100644 src/sonic-framework/rebootbackend/reboot_interfaces.h create mode 100644 src/sonic-framework/rebootbackend/reboot_thread.cpp create mode 100644 src/sonic-framework/rebootbackend/reboot_thread.h create mode 100644 src/sonic-framework/rebootbackend/rebootbackend.cpp create mode 100644 src/sonic-framework/rebootbackend/rebootbe.cpp create mode 100644 src/sonic-framework/rebootbackend/rebootbe.h create mode 100644 src/sonic-framework/rebootbackend/redis_utils.cpp create mode 100644 src/sonic-framework/rebootbackend/redis_utils.h diff --git a/azure-pipelines.yml b/azure-pipelines.yml index b72aa967cc2e..3956a7c94448 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -295,22 +295,22 @@ stages: MGMT_BRANCH: $(BUILD_BRANCH) TEST_SET: onboarding_t1 - - job: onboarding_elastictest_dualtor - displayName: "onboarding dualtor testcases by Elastictest - optional" - timeoutInMinutes: 240 - continueOnError: true - pool: sonic-ubuntu-1c - steps: - - template: .azure-pipelines/run-test-elastictest-template.yml@sonic-mgmt - parameters: - TOPOLOGY: dualtor - STOP_ON_FAILURE: "False" - RETRY_TIMES: 0 - MIN_WORKER: $(T0_DUALTOR_INSTANCE_NUM) - MAX_WORKER: $(T0_DUALTOR_INSTANCE_NUM) - KVM_IMAGE_BRANCH: $(BUILD_BRANCH) - MGMT_BRANCH: $(BUILD_BRANCH) - TEST_SET: onboarding_dualtor +# - job: onboarding_elastictest_dualtor +# displayName: "onboarding dualtor testcases by Elastictest - optional" +# timeoutInMinutes: 240 +# continueOnError: true +# pool: sonic-ubuntu-1c +# steps: +# - template: .azure-pipelines/run-test-elastictest-template.yml@sonic-mgmt +# parameters: +# TOPOLOGY: dualtor +# STOP_ON_FAILURE: "False" +# RETRY_TIMES: 0 +# MIN_WORKER: $(T0_DUALTOR_INSTANCE_NUM) +# MAX_WORKER: $(T0_DUALTOR_INSTANCE_NUM) +# KVM_IMAGE_BRANCH: $(BUILD_BRANCH) +# MGMT_BRANCH: $(BUILD_BRANCH) +# TEST_SET: onboarding_dualtor # - job: wan_elastictest # displayName: "kvmtest-wan by Elastictest" diff --git a/src/sonic-framework/rebootbackend/gnoi_reboot.xml b/src/sonic-framework/rebootbackend/gnoi_reboot.xml new file mode 100644 index 000000000000..63af1963db4b --- /dev/null +++ b/src/sonic-framework/rebootbackend/gnoi_reboot.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/src/sonic-framework/rebootbackend/interfaces.cpp b/src/sonic-framework/rebootbackend/interfaces.cpp new file mode 100644 index 000000000000..5674a33358e1 --- /dev/null +++ b/src/sonic-framework/rebootbackend/interfaces.cpp @@ -0,0 +1,72 @@ +#include "interfaces.h" + +#include // DBus + +//#include "component_state_helper.h" +#include "reboot_interfaces.h" + +constexpr char kRebootBusName[] = "org.SONiC.HostService.gnoi_reboot"; +constexpr char kRebootPath[] = "/org/SONiC/HostService/gnoi_reboot"; + +constexpr char kContainerShutdownBusName[] = "org.SONiC.HostService.gnoi_container_shutdown"; +constexpr char kContainerShutdownPath[] = "/org/SONiC/HostService/gnoi_container_shutdown"; + +// DBus::BusDispatcher dispatcher; +DBus::Connection& HostServiceDbus::getConnection(void) { + static DBus::Connection* connPtr = nullptr; + if (connPtr == nullptr) { + static DBus::BusDispatcher dispatcher; + DBus::default_dispatcher = &dispatcher; + + static DBus::Connection conn = DBus::Connection::SystemBus(); + connPtr = &conn; + } + return *connPtr; +} + +DbusInterface::DbusResponse HostServiceDbus::Reboot( + const std::string& json_reboot_request) { + int32_t status; + std::string ret_string; + std::vector options; + options.push_back(json_reboot_request); + + GnoiDbusReboot reboot_client(getConnection(), kRebootBusName, kRebootPath); + try { + reboot_client.issue_reboot(options, status, ret_string); + } catch (DBus::Error& ex) { + return DbusResponse{ + DbusStatus::DBUS_FAIL, + "HostServiceDbus::Reboot: failed to call reboot host service"}; + } + + // gnoi_reboot.py returns 0 for success, 1 for failure + if (status == 0) { + // Successful reboot response is an empty string. + return DbusResponse{DbusStatus::DBUS_SUCCESS, ""}; + } + return DbusResponse{DbusStatus::DBUS_FAIL, ret_string}; +} + +DbusInterface::DbusResponse HostServiceDbus::RebootStatus( + const std::string& json_status_request) { + int32_t status; + std::string ret_string; + + GnoiDbusReboot reboot_client(getConnection(), kRebootBusName, kRebootPath); + try { + reboot_client.get_reboot_status(status, ret_string); + } catch (DBus::Error& ex) { + return DbusResponse{ + DbusStatus::DBUS_FAIL, + "HostServiceDbus::RebootStatus: failed to call reboot status " + "host service"}; + } + + // gnoi_reboot.py returns 0 for success, 1 for failure + if (status == 0) { + return DbusResponse{DbusStatus::DBUS_SUCCESS, ret_string}; + } + return DbusResponse{DbusStatus::DBUS_FAIL, ret_string}; +} + diff --git a/src/sonic-framework/rebootbackend/interfaces.h b/src/sonic-framework/rebootbackend/interfaces.h new file mode 100644 index 000000000000..977d3518ead0 --- /dev/null +++ b/src/sonic-framework/rebootbackend/interfaces.h @@ -0,0 +1,27 @@ +#pragma once +#include + +#include + +#include "gnoi_reboot_dbus.h" // auto generated gnoi_reboot_proxy +#include "reboot_interfaces.h" + +class GnoiDbusReboot : public org::SONiC::HostService::gnoi_reboot_proxy, + public DBus::IntrospectableProxy, + public DBus::ObjectProxy { + public: + GnoiDbusReboot(DBus::Connection& connection, const char* dbus_bus_name_p, + const char* dbus_obj_name_p) + : DBus::ObjectProxy(connection, dbus_obj_name_p, dbus_bus_name_p) {} +}; + +class HostServiceDbus : public DbusInterface { + public: + DbusInterface::DbusResponse Reboot( + const std::string& json_reboot_request) override; + DbusInterface::DbusResponse RebootStatus( + const std::string& json_status_request) override; + + private: + static DBus::Connection& getConnection(void); +}; diff --git a/src/sonic-framework/rebootbackend/reboot_common.cpp b/src/sonic-framework/rebootbackend/reboot_common.cpp new file mode 100644 index 000000000000..65672ae67841 --- /dev/null +++ b/src/sonic-framework/rebootbackend/reboot_common.cpp @@ -0,0 +1,14 @@ +#include "reboot_common.h" + +#include + +namespace rebootbackend { + +timespec milliseconds_to_timespec(uint64_t time_ms) { + timespec l_timespec; + l_timespec.tv_sec = time_ms / ONE_THOUSAND; + l_timespec.tv_nsec = (time_ms % ONE_THOUSAND) * ONE_THOUSAND * ONE_THOUSAND; + return l_timespec; +} + +} // namespace rebootbackend diff --git a/src/sonic-framework/rebootbackend/reboot_common.h b/src/sonic-framework/rebootbackend/reboot_common.h new file mode 100644 index 000000000000..564b893232f7 --- /dev/null +++ b/src/sonic-framework/rebootbackend/reboot_common.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +#include "status_code_util.h" + +namespace rebootbackend { + +#define ONE_THOUSAND (1000) + +extern bool sigterm_requested; + +extern timespec milliseconds_to_timespec(uint64_t time_ms); + +struct NotificationResponse { + swss::StatusCode status; + std::string json_string; +}; + +} // namespace rebootbackend diff --git a/src/sonic-framework/rebootbackend/reboot_interfaces.h b/src/sonic-framework/rebootbackend/reboot_interfaces.h new file mode 100644 index 000000000000..20f641e3f189 --- /dev/null +++ b/src/sonic-framework/rebootbackend/reboot_interfaces.h @@ -0,0 +1,21 @@ +#pragma once + +#include + +class DbusInterface { + public: + enum class DbusStatus { + DBUS_SUCCESS, + DBUS_FAIL, + }; + + struct DbusResponse { + DbusStatus status; + std::string json_string; + }; + + virtual ~DbusInterface() = default; + virtual DbusResponse Reboot(const std::string& json_reboot_request) = 0; + virtual DbusResponse RebootStatus(const std::string& json_status_request) = 0; +}; + diff --git a/src/sonic-framework/rebootbackend/reboot_thread.cpp b/src/sonic-framework/rebootbackend/reboot_thread.cpp new file mode 100644 index 000000000000..a57a241e302f --- /dev/null +++ b/src/sonic-framework/rebootbackend/reboot_thread.cpp @@ -0,0 +1,264 @@ +#include "reboot_thread.h" +#include +#include +#include "dbconnector.h" +#include "logger.h" +#include "notificationproducer.h" +#include "reboot_common.h" +#include "reboot_interfaces.h" +#include "redis_utils.h" +#include "select.h" +#include "selectableevent.h" +#include "selectabletimer.h" +#include "subscriberstatetable.h" +#include "system/system.pb.h" +#include "timestamp.h" + +namespace rebootbackend { + +using namespace ::gnoi::system; +using steady_clock = std::chrono::steady_clock; +using Progress = ::rebootbackend::RebootThread::Progress; +//using WarmBootStage = ::swss::WarmStart::WarmBootStage; +using WarmStartState = ::swss::WarmStart::WarmStartState; +namespace gpu = ::google::protobuf::util; + +RebootThread::RebootThread(DbusInterface &dbus_interface, + swss::SelectableEvent &m_finished) + : m_db("STATE_DB", 0), + m_finished(m_finished), + m_dbus_interface(dbus_interface) {} + +void RebootThread::Stop(void) { + SWSS_LOG_ENTER(); + // Notify reboot thread that stop has been requested. + m_stop.notify(); +} + +bool RebootThread::Join(void) { + SWSS_LOG_ENTER(); + + if (!m_thread.joinable()) { + SWSS_LOG_ERROR("RebootThread::Join called, but not joinable"); + return false; + } + + try { + m_thread.join(); + m_status.set_inactive(); + return true; + } catch (const std::system_error &e) { + SWSS_LOG_ERROR("Exception calling join: %s", e.what()); + return false; + } +} + +RebootStatusResponse RebootThread::GetResponse(void) { + return m_status.get_response(); +} + +bool RebootThread::HasRun() { return m_status.get_reboot_count() > 0; } + +Progress RebootThread::platform_reboot_select(swss::Select &s, + swss::SelectableTimer &l_timer) { + SWSS_LOG_ENTER(); + + while (true) { + swss::Selectable *sel; + int select_ret; + select_ret = s.select(&sel); + + if (select_ret == swss::Select::ERROR) { + SWSS_LOG_NOTICE("Error: %s!", strerror(errno)); + } else if (select_ret == swss::Select::OBJECT) { + if (sel == &m_stop) { + // SIGTERM expected after platform reboot request + SWSS_LOG_NOTICE( + "m_stop rx'd (SIGTERM) while waiting for platform reboot"); + return Progress::EXIT_EARLY; + } else if (sel == &l_timer) { + return Progress::PROCEED; + } + } + } +} + +Progress RebootThread::wait_for_platform_reboot(swss::Select &s) { + SWSS_LOG_ENTER(); + + // Sleep for a long time: 260 seconds. + // During this time platform should kill us as part of reboot. + swss::SelectableTimer l_timer( + timespec{.tv_sec = m_reboot_timeout, .tv_nsec = 0}); + s.addSelectable(&l_timer); + + l_timer.start(); + + Progress progress = platform_reboot_select(s, l_timer); + + l_timer.stop(); + s.removeSelectable(&l_timer); + return progress; +} + +void RebootThread::do_reboot(void) { + SWSS_LOG_ENTER(); + + swss::Select s; + s.addSelectable(&m_stop); + + // Check if stop was requested before Selectable was setup + if (sigterm_requested) { + SWSS_LOG_ERROR("sigterm_requested was raised, exiting"); + return; + } + + if (m_request.method() == RebootMethod::COLD) { + do_cold_reboot(s); + } else { + // This shouldn't be possible. Reference check_start_preconditions() + SWSS_LOG_ERROR("Received unrecognized method type = %s", + RebootMethod_Name(m_request.method()).c_str()); + } +} + +RebootThread::Progress RebootThread::send_dbus_reboot_request() { + SWSS_LOG_ENTER(); + SWSS_LOG_NOTICE("Sending reboot request to platform"); + + std::string json_string; + gpu::Status status = gpu::MessageToJsonString(m_request, &json_string); + if (!status.ok()) { + std::string error_string = "unable to convert reboot protobuf to json: " + + status.message().as_string(); + log_error_and_set_non_retry_failure(error_string); + return Progress::EXIT_EARLY; + } + + // Send the reboot request to the reboot host service via dbus. + DbusInterface::DbusResponse dbus_response = + m_dbus_interface.Reboot(json_string); + + if (dbus_response.status == DbusInterface::DbusStatus::DBUS_FAIL) { + log_error_and_set_non_retry_failure(dbus_response.json_string); + return Progress::EXIT_EARLY; + } + return Progress::PROCEED; +} + +void RebootThread::do_cold_reboot(swss::Select &s) { + SWSS_LOG_ENTER(); + SWSS_LOG_NOTICE("Sending cold reboot request to platform"); + if (send_dbus_reboot_request() == Progress::EXIT_EARLY) { + return; + } + + // Wait for platform to reboot. If we return, reboot failed. + if (wait_for_platform_reboot(s) == Progress::EXIT_EARLY) { + return; + } + + // We shouldn't be here. Platform reboot should've killed us. + log_error_and_set_non_retry_failure("platform failed to reboot"); + + // Set critical state + //m_critical_interface.report_critical_state("platform failed to reboot"); + return; +} + +void RebootThread::reboot_thread(void) { + SWSS_LOG_ENTER(); + + do_reboot(); + + // Notify calling thread that reboot thread has exited. + // Calling thread will call Join(): join and set thread status to inactive. + m_finished.notify(); +} + + +bool RebootThread::check_start_preconditions(const RebootRequest &request, + NotificationResponse &response) { + // We have to join a previous executing thread before restarting. + // Active is cleared in Join. + if (m_status.get_active()) { + response.json_string = "RebootThread: can't Start while active"; + response.status = swss::StatusCode::SWSS_RC_IN_USE; + } else if (request.method() != RebootMethod::COLD && + request.method() != RebootMethod::WARM) { + response.json_string = "RebootThread: Start rx'd unsupported method"; + response.status = swss::StatusCode::SWSS_RC_INVALID_PARAM; + } else if (request.delay() != 0) { + response.json_string = "RebootThread: delayed start not supported"; + response.status = swss::StatusCode::SWSS_RC_INVALID_PARAM; + } + + if (response.status == swss::StatusCode::SWSS_RC_SUCCESS) { + return true; + } + + SWSS_LOG_ERROR("%s", response.json_string.c_str()); + // Log the reboot request contents. + gpu::Status status; + std::string json_request; + status = gpu::MessageToJsonString(request, &json_request); + if (status.ok()) { + SWSS_LOG_ERROR("check_start_preconditions: RebootRequest = %s", + json_request.c_str()); + } else { + SWSS_LOG_ERROR( + "check_start_preconditions: error calling MessageToJsonString"); + } + return false; +} + + +NotificationResponse RebootThread::Start(const RebootRequest &request) { + SWSS_LOG_ENTER(); + + NotificationResponse response = {.status = swss::StatusCode::SWSS_RC_SUCCESS, + .json_string = ""}; + + // Confirm we're not running, method is supported and we're not delayed. + if (!check_start_preconditions(request, response)) { + // Errors logged in check_start_preconditions. + return response; + } + + m_request = request; + + // From this point errors will be reported via RebootStatusRequest. + m_status.set_start_status(request.method(), request.message()); + + try { + m_thread = std::thread(&RebootThread::reboot_thread, this); + } catch (const std::system_error &e) { + std::string error_string = "Exception launching reboot thread: "; + error_string += e.what(); + log_error_and_set_failure_as_retriable(error_string); + + // Notify calling thread that thread has finished. + // Calling thread MUST call Join, which will join and clear active bit. + m_finished.notify(); + } + return response; +} + +void RebootThread::log_error_and_set_non_retry_failure( + const std::string error_string) { + SWSS_LOG_ENTER(); + SWSS_LOG_ERROR("%s", error_string.c_str()); + m_status.set_completed_status( + RebootStatus_Status::RebootStatus_Status_STATUS_FAILURE, error_string); +} + +void RebootThread::log_error_and_set_failure_as_retriable( + const std::string error_string) { + SWSS_LOG_ENTER(); + SWSS_LOG_ERROR("%s", error_string.c_str()); + m_status.set_completed_status( + RebootStatus_Status::RebootStatus_Status_STATUS_RETRIABLE_FAILURE, + error_string); +} + +} // namespace rebootbackend diff --git a/src/sonic-framework/rebootbackend/reboot_thread.h b/src/sonic-framework/rebootbackend/reboot_thread.h new file mode 100644 index 000000000000..bd0b54aed2a2 --- /dev/null +++ b/src/sonic-framework/rebootbackend/reboot_thread.h @@ -0,0 +1,222 @@ +#pragma once + +#include +#include +#include + +#include "dbconnector.h" +#include "notificationproducer.h" +#include "reboot_common.h" +#include "reboot_interfaces.h" +#include "redis_utils.h" +#include "select.h" +#include "selectableevent.h" +#include "selectabletimer.h" +#include "subscriberstatetable.h" +#include "system/system.pb.h" + +namespace rebootbackend { + +#define SELECT_TIMEOUT_250_MS (250) +#define SELECT_TIMEOUT_500_MS (500) + +// Hold/manage the contents of a RebootStatusResponse as defined +// in system.proto +// Thread-safe: expectation is one thread will write and multiple +// threads can read. +class ThreadStatus { + public: + ThreadStatus() { + m_proto_status.set_active(false); + + // Reason for reboot as specified in message from a RebootRequest. + // This is "message" in RebootRequest. + m_proto_status.set_reason(""); + + // Number of reboots since active. + m_proto_status.set_count(0); + + // RebootMethod is type of of reboot: cold, warm, fast from a + // RebootRequest + m_proto_status.set_method(gnoi::system::RebootMethod::UNKNOWN); + + // Status can be UNKNOWN, SUCCESS, RETRIABLE_FAILURE or FAILURE. + m_proto_status.mutable_status()->set_status( + gnoi::system::RebootStatus_Status::RebootStatus_Status_STATUS_UNKNOWN); + + // In the event of error: message is human readable error explanation. + m_proto_status.mutable_status()->set_message(""); + } + + void set_start_status(const gnoi::system::RebootMethod &method, + const std::string &reason) { + m_mutex.lock(); + + m_proto_status.set_active(true); + m_proto_status.set_reason(reason); + m_proto_status.set_count(m_proto_status.count() + 1); + m_proto_status.set_method(method); + m_proto_status.mutable_status()->set_status( + gnoi::system::RebootStatus_Status::RebootStatus_Status_STATUS_UNKNOWN); + m_proto_status.mutable_status()->set_message(""); + + // set when to time reboot starts + std::chrono::nanoseconds ns = + std::chrono::system_clock::now().time_since_epoch(); + m_proto_status.set_when(ns.count()); + + m_mutex.unlock(); + } + + bool get_active(void) { + m_mutex.lock(); + bool ret = m_proto_status.active(); + m_mutex.unlock(); + return ret; + } + + void set_completed_status(const gnoi::system::RebootStatus_Status &status, + const std::string &message) { + m_mutex.lock(); + + // Status should only be updated while reboot is active + if (m_proto_status.active()) { + m_proto_status.mutable_status()->set_status(status); + m_proto_status.mutable_status()->set_message(message); + } + + m_mutex.unlock(); + } + + void set_inactive(void) { + m_mutex.lock(); + m_proto_status.set_active(false); + m_mutex.unlock(); + } + + int get_reboot_count() { + const std::lock_guard lock(m_mutex); + return m_proto_status.count(); + } + + gnoi::system::RebootStatus_Status get_last_reboot_status(void) { + gnoi::system::RebootStatusResponse response = get_response(); + return response.status().status(); + } + + gnoi::system::RebootStatusResponse get_response(void) { + m_mutex.lock(); + // make a copy + gnoi::system::RebootStatusResponse lstatus = m_proto_status; + m_mutex.unlock(); + + if (lstatus.active()) { + // RebootStatus isn't applicable if we're active + lstatus.mutable_status()->set_status( + gnoi::system::RebootStatus_Status:: + RebootStatus_Status_STATUS_UNKNOWN); + lstatus.mutable_status()->set_message(""); + } else { + // When is only valid while we're active (since delayed + // start isn't supported). Value is set when reboot begins. + lstatus.set_when(0); + } + + return lstatus; + } + + private: + std::mutex m_mutex; + gnoi::system::RebootStatusResponse m_proto_status; +}; + +// RebootThread performs reboot actions leading up to a platform +// request to reboot. +// thread-compatible: expectation is Stop, Start and Join will be +// called from the same thread. +class RebootThread { + public: + enum class Status { SUCCESS, FAILURE, KEEP_WAITING }; + enum class Progress { PROCEED, EXIT_EARLY }; + + // interface: dbus reboot host service access + // m_finished: let launching task know thread has finished + RebootThread(DbusInterface &dbus_interface, + swss::SelectableEvent &m_finished); + + NotificationResponse Start(const gnoi::system::RebootRequest &request); + + // Request thread stop/exit. Only used when platform is shutting down + // all containers/processes. + void Stop(void); + + // Called by launching task after notification sent to m_finished. + bool Join(void); + + // Return Status of last reboot attempt + gnoi::system::RebootStatusResponse GetResponse(); + + // Returns true if the RebootThread has been started since the last reboot, + // and false otherwise. + bool HasRun(); + + private: + void reboot_thread(void); + void do_reboot(void); + Progress send_dbus_reboot_request(); + void do_cold_reboot(swss::Select &s); + + // Inner loop select handler to wait for platform reboot. + // wait for timeout + // wait for a stop request (sigterm) + // Returns: + // EXIT_EARLY: an issue occurred that stops WARM + // PROCEED: if reboot timeout expired + Progress platform_reboot_select(swss::Select &s, + swss::SelectableTimer &l_timer); + + // Wait for platform to reboot while waiting for possible stop + // Returns: + // EXIT_EARLY: an issue occurred that stops WARM + // PROCEED: if reboot timeout expired + Progress wait_for_platform_reboot(swss::Select &s); + + // Log error string, set status to RebootStatus_Status_STATUS_FAILURE + // Set status message to error_string. + void log_error_and_set_non_retry_failure(const std::string error_string); + + // Log error string, set status to + // RebootStatus_Status_STATUS_RETRIABLE_FAILURE Set status message to + // error_string. + void log_error_and_set_failure_as_retriable(const std::string error_string); + + + // Request is input only. + // Response is ouput only. + // Return true if preconditions met, false otherwise. + bool check_start_preconditions(const gnoi::system::RebootRequest &request, + NotificationResponse &response); + std::thread m_thread; + + // Signal m_finished to let main thread know weve completed. + // Main thread should call Join. + swss::SelectableEvent &m_finished; + + // m_stop signalled by main thread on sigterm: cleanup and exit. + swss::SelectableEvent m_stop; + DbusInterface &m_dbus_interface; + swss::DBConnector m_db; + ThreadStatus m_status; + gnoi::system::RebootRequest m_request; + + // Wait for system to reboot: allow unit test to shorten. + // TODO: there is a plan to make these timer values + // available in CONFIG_DB + static constexpr uint32_t kRebootTime = 260; + long m_reboot_timeout = kRebootTime; + + friend class RebootBETestWithoutStop; + friend class RebootThreadTest; +}; + +} // namespace rebootbackend diff --git a/src/sonic-framework/rebootbackend/rebootbackend.cpp b/src/sonic-framework/rebootbackend/rebootbackend.cpp new file mode 100644 index 000000000000..5b48d182f87f --- /dev/null +++ b/src/sonic-framework/rebootbackend/rebootbackend.cpp @@ -0,0 +1,12 @@ +#include "interfaces.h" +#include "reboot_interfaces.h" +#include "rebootbe.h" + +using namespace ::rebootbackend; + +int main(int argc, char** argv) { + HostServiceDbus dbus_interface; + RebootBE rebootbe(dbus_interface); + rebootbe.Start(); + return 0; +} diff --git a/src/sonic-framework/rebootbackend/rebootbe.cpp b/src/sonic-framework/rebootbackend/rebootbe.cpp new file mode 100644 index 000000000000..be8a55230e02 --- /dev/null +++ b/src/sonic-framework/rebootbackend/rebootbe.cpp @@ -0,0 +1,284 @@ +#include "rebootbe.h" +#include +#include + +#include +#include +#include + +#include "logger.h" +#include "notificationconsumer.h" +#include "notificationproducer.h" +#include "reboot_common.h" +#include "reboot_interfaces.h" +#include "select.h" +#include "status_code_util.h" + +namespace rebootbackend { + +namespace gpu = ::google::protobuf::util; + +bool sigterm_requested = false; + +RebootBE::RebootBE(DbusInterface &dbus_interface) + : m_db("STATE_DB", 0), + m_rebootResponse(&m_db, REBOOT_RESPONSE_NOTIFICATION_CHANNEL), + m_notificationConsumer(&m_db, REBOOT_REQUEST_NOTIFICATION_CHANNEL), + m_dbus(dbus_interface), + m_reboot_thread(dbus_interface, + m_reboot_thread_finished) { + swss::Logger::linkToDbNative("rebootbackend"); +} + +RebootBE::RebManagerStatus RebootBE::GetCurrentStatus() { + const std::lock_guard lock(m_status_mutex); + return m_current_status; +} + +void RebootBE::SetCurrentStatus(RebManagerStatus new_status) { + const std::lock_guard lock(m_status_mutex); + m_current_status = new_status; +} + +void RebootBE::Start() { + SWSS_LOG_ENTER(); + SWSS_LOG_NOTICE("--- Starting rebootbackend ---"); + + swss::WarmStart::initialize("rebootbackend", "sonic-framework"); + swss::WarmStart::checkWarmStart("rebootbackend", "sonic-framework", + /*incr_restore_cnt=*/false); + + swss::Select s; + s.addSelectable(&m_notificationConsumer); + s.addSelectable(&m_done); + s.addSelectable(&m_reboot_thread_finished); + + SWSS_LOG_NOTICE("RebootBE entering operational loop"); + while (true) { + swss::Selectable *sel; + int ret; + + ret = s.select(&sel); + if (ret == swss::Select::ERROR) { + SWSS_LOG_NOTICE("Error: %s!", strerror(errno)); + } else if (ret == swss::Select::OBJECT) { + if (sel == &m_notificationConsumer) { + do_task(m_notificationConsumer); + } else if (sel == &m_reboot_thread_finished) { + handle_reboot_finish(); + } else if (sel == &m_done) { + handle_done(); + break; + } + } + } + return; +} + +void RebootBE::Stop() { + SWSS_LOG_ENTER(); + m_done.notify(); + return; +} + +bool RebootBE::retrieve_notification_data( + swss::NotificationConsumer &consumer, + RebootBE::NotificationRequest &request) { + SWSS_LOG_ENTER(); + + request.op = ""; + request.ret_string = ""; + + std::string data; + std::vector values; + consumer.pop(request.op, data, values); + + for (auto &fv : values) { + if (DATA_TUPLE_KEY == fvField(fv)) { + request.ret_string = fvValue(fv); + return true; + } + } + return false; +} + +// Send a response on the Reboot_Response_Channel notification channel.. +// Key is one of: Reboot, RebootStatus, or CancelReboot +// code is swss::StatusCode, hopefully SWSS_RC_SUCCESS. +// message is json formatted RebootResponse, RebootStatusResponse +// or CancelRebootResponse as defined in system.proto +void RebootBE::send_notification_response(const std::string key, + const swss::StatusCode code, + const std::string message) { + SWSS_LOG_ENTER(); + + std::vector ret_values; + ret_values.push_back(swss::FieldValueTuple(DATA_TUPLE_KEY, message)); + + m_rebootResponse.send(key, swss::statusCodeToStr(code), ret_values); +} + +NotificationResponse RebootBE::handle_reboot_request( + const std::string &json_reboot_request) { + using namespace google::protobuf::util; + + SWSS_LOG_ENTER(); + + // On success an emtpy string is returned. RebootResponse in system.proto + // is an empty proto. + NotificationResponse response = {.status = swss::StatusCode::SWSS_RC_SUCCESS, + .json_string = ""}; + + gnoi::system::RebootRequest request; + Status status = gpu::JsonStringToMessage(json_reboot_request, &request); + + if (!status.ok()) { + std::string error_string = + "unable to convert json to rebootRequest protobuf: " + + status.message().as_string(); + SWSS_LOG_ERROR("%s", error_string.c_str()); + SWSS_LOG_ERROR("json = |%s|", json_reboot_request.c_str()); + response.status = swss::StatusCode::SWSS_RC_INTERNAL, + response.json_string = error_string; + return response; + } + + if (!reboot_allowed(request.method())) { + response.status = swss::StatusCode::SWSS_RC_IN_USE; + response.json_string = + "Reboot not allowed at this time. Reboot or " + "post-warmboot in progress"; + SWSS_LOG_WARN("%s", response.json_string.c_str()); + return response; + } + + SWSS_LOG_NOTICE("Forwarding request to RebootThread: %s", + request.DebugString().c_str()); + response = m_reboot_thread.Start(request); + if (response.status == swss::StatusCode::SWSS_RC_SUCCESS) { + if (request.method() == gnoi::system::RebootMethod::COLD) { + SetCurrentStatus(RebManagerStatus::COLD_REBOOT_IN_PROGRESS); + } else if (request.method() == gnoi::system::RebootMethod::WARM) { + SetCurrentStatus(RebManagerStatus::WARM_REBOOT_IN_PROGRESS); + } + } + return response; +} + +bool RebootBE::reboot_allowed(const gnoi::system::RebootMethod reboot_method) { + RebManagerStatus current_status = GetCurrentStatus(); + switch (current_status) { + case RebManagerStatus::COLD_REBOOT_IN_PROGRESS: + case RebManagerStatus::WARM_REBOOT_IN_PROGRESS: { + return false; + } + case RebManagerStatus::WARM_INIT_WAIT: { + return reboot_method == gnoi::system::RebootMethod::COLD; + } + case RebManagerStatus::IDLE: { + return true; + } + default: { + return true; + } + } +} + +NotificationResponse RebootBE::handle_status_request( + const std::string &json_status_request) { + SWSS_LOG_ENTER(); + + gnoi::system::RebootStatusResponse reboot_response = + m_reboot_thread.GetResponse(); + + std::string json_reboot_response_string; + google::protobuf::util::Status status = + gpu::MessageToJsonString(reboot_response, &json_reboot_response_string); + + NotificationResponse response; + if (status.ok()) { + response.status = swss::StatusCode::SWSS_RC_SUCCESS; + response.json_string = json_reboot_response_string; + } else { + std::string error_string = + "unable to convert reboot status response protobuf to json: " + + status.message().as_string(); + SWSS_LOG_ERROR("%s", error_string.c_str()); + response.status = swss::StatusCode::SWSS_RC_INTERNAL; + response.json_string = error_string; + } + + return response; +} + +NotificationResponse RebootBE::handle_cancel_request( + const std::string &json_cancel_request) { + SWSS_LOG_ENTER(); + + NotificationResponse response; + + // CancelReboot isn't supported: not needed until/unless delayed support + // is added: return unimplemented. + response.status = swss::StatusCode::SWSS_RC_UNIMPLEMENTED; + response.json_string = "Cancel reboot isn't supported"; + SWSS_LOG_WARN("%s", response.json_string.c_str()); + return response; +} + +void RebootBE::do_task(swss::NotificationConsumer &consumer) { + SWSS_LOG_ENTER(); + + NotificationResponse response; + RebootBE::NotificationRequest request; + + if (!retrieve_notification_data(consumer, request)) { + // Response is simple string (not json) on error. + response.json_string = + "MESSAGE not present in reboot notification request message, op = " + + request.op; + SWSS_LOG_ERROR("%s", response.json_string.c_str()); + response.status = swss::StatusCode::SWSS_RC_INVALID_PARAM; + } else if (request.op == REBOOT_KEY) { + response = handle_reboot_request(request.ret_string); + } else if (request.op == REBOOT_STATUS_KEY) { + response = handle_status_request(request.ret_string); + } else if (request.op == CANCEL_REBOOT_KEY) { + response = handle_cancel_request(request.ret_string); + } else { + // Response is simple string (not json) on error. + response.json_string = + "Unrecognized op in reboot request, op = " + request.op; + SWSS_LOG_ERROR("%s", response.json_string.c_str()); + response.status = swss::StatusCode::SWSS_RC_INVALID_PARAM; + } + send_notification_response(request.op, response.status, response.json_string); +} + +void RebootBE::handle_init_finish() { + SWSS_LOG_ENTER(); + SWSS_LOG_NOTICE("Receieved notification that InitThread is done"); + RebManagerStatus current_status = GetCurrentStatus(); + if (current_status == RebManagerStatus::WARM_INIT_WAIT) { + SetCurrentStatus(RebManagerStatus::IDLE); + } +} + +void RebootBE::handle_reboot_finish() { + SWSS_LOG_ENTER(); + SWSS_LOG_WARN( + "Receieved notification that reboot has finished. This probably means " + "something is wrong"); + m_reboot_thread.Join(); + SetCurrentStatus(RebManagerStatus::IDLE); +} + +void RebootBE::handle_done() { + SWSS_LOG_INFO("RebootBE received signal to stop"); + + if (m_reboot_thread.GetResponse().active()) { + m_reboot_thread.Stop(); + m_reboot_thread.Join(); + } +} + +} // namespace rebootbackend diff --git a/src/sonic-framework/rebootbackend/rebootbe.h b/src/sonic-framework/rebootbackend/rebootbe.h new file mode 100644 index 000000000000..10913bd30bf5 --- /dev/null +++ b/src/sonic-framework/rebootbackend/rebootbe.h @@ -0,0 +1,93 @@ +#pragma once +#include "dbconnector.h" +#include "notificationconsumer.h" +#include "notificationproducer.h" +#include "reboot_common.h" +#include "reboot_interfaces.h" +#include "reboot_thread.h" +#include "selectableevent.h" +#include "status_code_util.h" + +namespace rebootbackend { + +#define REBOOT_REQUEST_NOTIFICATION_CHANNEL "Reboot_Request_Channel" +#define REBOOT_RESPONSE_NOTIFICATION_CHANNEL "Reboot_Response_Channel" +#define REBOOT_KEY "Reboot" +#define REBOOT_STATUS_KEY "RebootStatus" +#define CANCEL_REBOOT_KEY "CancelReboot" +#define DATA_TUPLE_KEY "MESSAGE" + +class RebootBE { + public: + struct NotificationRequest { + std::string op; + std::string ret_string; + }; + + enum class RebManagerStatus { + WARM_INIT_WAIT, + IDLE, + COLD_REBOOT_IN_PROGRESS, + WARM_REBOOT_IN_PROGRESS + }; + + RebootBE(DbusInterface &interface); + + RebManagerStatus GetCurrentStatus(); + + void Start(); + void Stop(); + + private: + std::mutex m_status_mutex; + RebManagerStatus m_current_status = RebManagerStatus::IDLE; + swss::SelectableEvent m_done; + + swss::DBConnector m_db; + swss::NotificationProducer m_rebootResponse; + swss::NotificationConsumer m_notificationConsumer; + + DbusInterface &m_dbus; + + // Signals for init thread. + swss::SelectableEvent m_init_thread_done; + + // Signalled by reboot thread when thread completes. + swss::SelectableEvent m_reboot_thread_finished; + RebootThread m_reboot_thread; + + void SetCurrentStatus(RebManagerStatus new_status); + + // Reboot_Request_Channel notifications should all contain {"MESSAGE" : Data} + // in the notification Data field. + // Return true if "MESSAGE" is found, false otherwise. + // Set message_value to the Data string if found, "" otherwise. + // consumer is input: this is the consumer from which we pop + // reboot/cancel/status requests. + // request is output: this the request recevied from consumer + bool retrieve_notification_data(swss::NotificationConsumer &consumer, + NotificationRequest &request); + NotificationResponse handle_reboot_request( + const std::string &json_reboot_request); + NotificationResponse handle_status_request( + const std::string &json_status_request); + NotificationResponse handle_cancel_request( + const std::string &json_cancel_request); + void send_notification_response(const std::string key, + const swss::StatusCode code, + const std::string message); + + // Returns true if a reboot is allowed at this time given the current + // warm manager state and reboot type, and false otherwise. + bool reboot_allowed(const gnoi::system::RebootMethod reboot_method); + + void do_task(swss::NotificationConsumer &consumer); + + void handle_init_finish(); + void handle_reboot_finish(); + void handle_done(); + + friend class RebootBETestWithoutStop; +}; + +} // namespace rebootbackend diff --git a/src/sonic-framework/rebootbackend/redis_utils.cpp b/src/sonic-framework/rebootbackend/redis_utils.cpp new file mode 100644 index 000000000000..4010e99d423a --- /dev/null +++ b/src/sonic-framework/rebootbackend/redis_utils.cpp @@ -0,0 +1,80 @@ +#include "redis_utils.h" + +#include +#include +#include +#include + +#include "dbconnector.h" +#include "notificationproducer.h" +//#include "stateverification.h" +#include "table.h" +#include "timestamp.h" +#include "warm_restart.h" + +namespace rebootbackend { + +using WarmStartState = ::swss::WarmStart::WarmStartState; + + +void init_warm_reboot_states(const swss::DBConnector &db) { + swss::Table table(&db, STATE_WARM_RESTART_TABLE_NAME); + std::vector keys; + + table.getKeys(keys); + for (auto &key : keys) { + table.hdel(key, "state"); + table.hdel(key, "timestamp"); + } +} + +void set_warm_restart_enable(const swss::DBConnector &db, bool enabled) { + swss::Table table(&db, STATE_WARM_RESTART_ENABLE_TABLE_NAME); + table.hset("system", "enable", enabled ? "true" : "false"); +} + +bool is_valid_key(const std::string &key, const std::string &separator) { + if (separator.empty()) { + return false; + } + + size_t pos = key.find(separator); + // The separator must exist in the string, and cannot be the first or last + // character. + return !(pos == std::string::npos || pos == 0 || pos == key.size() - 1); +} + +bool get_docker_app_from_key(const std::string &key, + const std::string &separator, std::string &docker, + std::string &app) { + SWSS_LOG_ENTER(); + + size_t pos = key.find(separator); + + if (separator.empty()) { + SWSS_LOG_ERROR("separator [%s] shouldn't be empty", separator.c_str()); + return false; + } + + if (pos == std::string::npos) { + SWSS_LOG_ERROR("key [%s] should contain separator [%s]", key.c_str(), + separator.c_str()); + return false; + } + + docker = key.substr(0, pos); + app = key.substr(pos + separator.length(), std::string::npos); + + if (docker.empty()) { + SWSS_LOG_ERROR("docker name shouldn't be empty, key = %s", key.c_str()); + return false; + } + + if (app.empty()) { + SWSS_LOG_ERROR("app name shouldn't be empty, key = %s", key.c_str()); + return false; + } + return true; +} + +} // namespace rebootbackend diff --git a/src/sonic-framework/rebootbackend/redis_utils.h b/src/sonic-framework/rebootbackend/redis_utils.h new file mode 100644 index 000000000000..05d87c2aef1c --- /dev/null +++ b/src/sonic-framework/rebootbackend/redis_utils.h @@ -0,0 +1,40 @@ +#pragma once +#include +#include +#include + +#include "dbconnector.h" +#include "notificationconsumer.h" +#include "notificationproducer.h" +#include "selectableevent.h" +#include "status_code_util.h" +#include "warm_restart.h" + +namespace rebootbackend { + +// Return string corresponding to state +std::string get_warm_start_state_name( + const swss::WarmStart::WarmStartState state); + +void init_warm_reboot_states(const swss::DBConnector &db); + +// Set the system warm start state to a new enabled/disabled state. +// STATE_WARM_RESTART_TABLE_NAME +// key = system, field = enable, value = "true"/"false" +void set_warm_restart_enable(const swss::DBConnector &db, bool enabled); + +// Returns true if key is in the formm "texttext", and false +// otherwise. +bool is_valid_key(const std::string &key, const std::string &separator); + +// Helper function: given key of form "docker|app" +// extract docker and app. (separator = | in this case) +// return false if docker or app are empty or separator +// isn't present, else true. +// key and separator are inputs +// docker and app are outputs +bool get_docker_app_from_key(const std::string &key, + const std::string &separator, std::string &docker, + std::string &app); + +} // namespace rebootbackend