From a9cc04df59228c03ca5809674a94a6f5e05710a6 Mon Sep 17 00:00:00 2001 From: Stephen Sun Date: Wed, 1 Jan 2025 04:50:37 +0000 Subject: [PATCH 1/3] Avoid segment fault in ASIC/SDK health event handling when vendor SAI passes an invalid timestamp Signed-off-by: Stephen Sun --- orchagent/switchorch.cpp | 11 ++++- tests/mock_tests/switchorch_ut.cpp | 69 +++++++++++++++++++----------- 2 files changed, 55 insertions(+), 25 deletions(-) diff --git a/orchagent/switchorch.cpp b/orchagent/switchorch.cpp index 64442c96af..741cb547d2 100644 --- a/orchagent/switchorch.cpp +++ b/orchagent/switchorch.cpp @@ -1115,8 +1115,17 @@ void SwitchOrch::onSwitchAsicSdkHealthEvent(sai_object_id_t switch_id, const string &severity_str = switch_asic_sdk_health_event_severity_reverse_map.at(severity); const string &category_str = switch_asic_sdk_health_event_category_reverse_map.at(category); string description_str; - const std::time_t &t = (std::time_t)timestamp.tv_sec; + std::time_t t = (std::time_t)timestamp.tv_sec; + const std::time_t now = std::time(0); + const double year_in_seconds = 86400 * 365; stringstream time_ss; + + if (difftime(t, now) > year_in_seconds) + { + SWSS_LOG_ERROR("Invalid timestamp second %lu in received ASIC/SDK health event, reset to current time", timestamp.tv_sec); + t = now; + } + time_ss << std::put_time(std::localtime(&t), "%Y-%m-%d %H:%M:%S"); switch (data.data_type) diff --git a/tests/mock_tests/switchorch_ut.cpp b/tests/mock_tests/switchorch_ut.cpp index a1e8b218e4..e46fb9d07b 100644 --- a/tests/mock_tests/switchorch_ut.cpp +++ b/tests/mock_tests/switchorch_ut.cpp @@ -124,6 +124,44 @@ namespace switchorch_test gSwitchOrch = new SwitchOrch(m_app_db.get(), switch_tables, stateDbSwitchTable); } + void checkAsicSdkHealthEvent(const sai_timespec_t ×tamp, const string &expected_key="") + { + initSwitchOrch(); + + sai_switch_health_data_t data; + memset(&data, 0, sizeof(data)); + data.data_type = SAI_HEALTH_DATA_TYPE_GENERAL; + vector data_from_sai({100, 101, 115, 99, 114, 105, 112, 116, 105, 245, 111, 110, 245, 10, 123, 125, 100, 100}); + sai_u8_list_t description; + description.list = data_from_sai.data(); + description.count = (uint32_t)(data_from_sai.size() - 2); + on_switch_asic_sdk_health_event(gSwitchId, + SAI_SWITCH_ASIC_SDK_HEALTH_SEVERITY_FATAL, + timestamp, + SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_FW, + data, + description); + + string key; + if (expected_key.empty()) + { + vector keys; + gSwitchOrch->m_asicSdkHealthEventTable->getKeys(keys); + key = keys[0]; + } + else + { + key = expected_key; + } + string value; + gSwitchOrch->m_asicSdkHealthEventTable->hget(key, "category", value); + ASSERT_EQ(value, "firmware"); + gSwitchOrch->m_asicSdkHealthEventTable->hget(key, "severity", value); + ASSERT_EQ(value, "fatal"); + gSwitchOrch->m_asicSdkHealthEventTable->hget(key, "description", value); + ASSERT_EQ(value, "description\n{}"); + } + void TearDown() override { ::testing_db::reset(); @@ -289,30 +327,13 @@ namespace switchorch_test TEST_F(SwitchOrchTest, SwitchOrchTestHandleEvent) { - initSwitchOrch(); - sai_timespec_t timestamp = {.tv_sec = 1701160447, .tv_nsec = 538710245}; - sai_switch_health_data_t data; - memset(&data, 0, sizeof(data)); - data.data_type = SAI_HEALTH_DATA_TYPE_GENERAL; - vector data_from_sai({100, 101, 115, 99, 114, 105, 112, 116, 105, 245, 111, 110, 245, 10, 123, 125, 100, 100}); - sai_u8_list_t description; - description.list = data_from_sai.data(); - description.count = (uint32_t)(data_from_sai.size() - 2); - on_switch_asic_sdk_health_event(gSwitchId, - SAI_SWITCH_ASIC_SDK_HEALTH_SEVERITY_FATAL, - timestamp, - SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_FW, - data, - description); - - string key = "2023-11-28 08:34:07"; - string value; - gSwitchOrch->m_asicSdkHealthEventTable->hget(key, "category", value); - ASSERT_EQ(value, "firmware"); - gSwitchOrch->m_asicSdkHealthEventTable->hget(key, "severity", value); - ASSERT_EQ(value, "fatal"); - gSwitchOrch->m_asicSdkHealthEventTable->hget(key, "description", value); - ASSERT_EQ(value, "description\n{}"); + checkAsicSdkHealthEvent(timestamp, "2023-11-28 08:34:07"); + } + + TEST_F(SwitchOrchTest, SwitchOrchTestHandleEventInvalidTimeStamp) + { + sai_timespec_t timestamp = {.tv_sec = 172479515853275099, .tv_nsec = 538710245}; + checkAsicSdkHealthEvent(timestamp); } } From f11bb67e8515634f595a9c973d566a4e9b4ddc29 Mon Sep 17 00:00:00 2001 From: Stephen Sun Date: Wed, 1 Jan 2025 14:46:22 +0000 Subject: [PATCH 2/3] Fix compiling error on ARM Signed-off-by: Stephen Sun --- orchagent/switchorch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/orchagent/switchorch.cpp b/orchagent/switchorch.cpp index 741cb547d2..46ca7fbbef 100644 --- a/orchagent/switchorch.cpp +++ b/orchagent/switchorch.cpp @@ -1122,7 +1122,7 @@ void SwitchOrch::onSwitchAsicSdkHealthEvent(sai_object_id_t switch_id, if (difftime(t, now) > year_in_seconds) { - SWSS_LOG_ERROR("Invalid timestamp second %lu in received ASIC/SDK health event, reset to current time", timestamp.tv_sec); + SWSS_LOG_ERROR("Invalid timestamp second %" PRIx64 " in received ASIC/SDK health event, reset to current time", timestamp.tv_sec); t = now; } From 66748fca4174d41db1cc6fdbc51bf20a062641c4 Mon Sep 17 00:00:00 2001 From: Stephen Sun Date: Thu, 2 Jan 2025 10:42:37 +0000 Subject: [PATCH 3/3] Add comment Signed-off-by: Stephen Sun --- orchagent/switchorch.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/orchagent/switchorch.cpp b/orchagent/switchorch.cpp index 46ca7fbbef..5aeed420f2 100644 --- a/orchagent/switchorch.cpp +++ b/orchagent/switchorch.cpp @@ -1120,6 +1120,11 @@ void SwitchOrch::onSwitchAsicSdkHealthEvent(sai_object_id_t switch_id, const double year_in_seconds = 86400 * 365; stringstream time_ss; + /* + * In case vendor SAI passed a very large timestamp, put_time can cause segment fault which can not be caught by try/catch infra + * We check the difference between the timestamp from SAI and the current time and force to use current time if the gap is too large + * By doing so, we can avoid the segment fault + */ if (difftime(t, now) > year_in_seconds) { SWSS_LOG_ERROR("Invalid timestamp second %" PRIx64 " in received ASIC/SDK health event, reset to current time", timestamp.tv_sec);