From ddab1dc9b4e9cebdd3b5237c71a74f763e4e1e48 Mon Sep 17 00:00:00 2001 From: Bhadram Varka Date: Fri, 11 Apr 2025 17:00:26 +0000 Subject: [PATCH] mgbe: Ensure UPHY is up before reporting link OK Issue: 1) During switch reset, MGBE MAC generates a Local Link Fault common interrupt. 2) The interrupt routine triggers restart_lane_bring_up(), which schedules the link monitor timer inside the Ethernet Server. 3) As part of this routine, link status change interrupts (e.g., Local Fault, Remote Fault, Link OK) are disabled to avoid spurious triggers. 4) Meanwhile, if a PTP Tx packet is sitting in the MTL TX FIFO, it triggers a common interrupt. 5) The ISR for this common interrupt reads the status register. Since the Link OK flag is set, the server assumes the link is up. 6) This schedules the link monitor timer again (as in step 2). 7) Since the Link OK flag is already set, the monitor timer exits without calling OSI_CMD_SET_SPEED (which is responsible for UPHY lane bring-up). 8) As a result, when the switch reset is released, the UPHY lane is not brought up, and the Ethernet link remains broken. Fix: Check UPHY link status before giving link up. Bug 5206852 Change-Id: I98c49f710e2570480583c40255cda912af6ab1b5 Signed-off-by: Bhadram Varka Reviewed-on: https://git-master.nvidia.com/r/c/kernel/nvethernetrm/+/3339051 Reviewed-by: svcacv Reviewed-by: Hareesh Kesireddy Reviewed-by: svc-mobile-coverity Reviewed-by: Srinivas Ramachandran Reviewed-by: svc-mobile-cert --- osi/core/core_common.c | 4 ++++ osi/core/mgbe_core.c | 6 +++++- osi/core/xpcs.c | 6 ++---- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/osi/core/core_common.c b/osi/core/core_common.c index f4adb0a..e88f386 100644 --- a/osi/core/core_common.c +++ b/osi/core/core_common.c @@ -207,6 +207,7 @@ fail: nve32_t hw_set_speed(struct osi_core_priv_data *const osi_core, const nve32_t speed) { + struct core_local *l_core = (struct core_local *)(void *)osi_core; nveu32_t value; nve32_t ret = 0; void *base = osi_core->base; @@ -216,6 +217,8 @@ nve32_t hw_set_speed(struct osi_core_priv_data *const osi_core, const nve32_t sp MGBE_MAC_TMCR }; + l_core->lane_status = OSI_DISABLE; + if (((osi_core->mac == OSI_MAC_HW_EQOS) && (speed > OSI_SPEED_2500)) || (((osi_core->mac == OSI_MAC_HW_MGBE) || (osi_core->mac == OSI_MAC_HW_MGBE_T26X)) && @@ -310,6 +313,7 @@ nve32_t hw_set_speed(struct osi_core_priv_data *const osi_core, const nve32_t sp } } + l_core->lane_status = OSI_ENABLE; osi_core->speed = speed; fail: return ret; diff --git a/osi/core/mgbe_core.c b/osi/core/mgbe_core.c index 1a7daff..9fb4911 100644 --- a/osi/core/mgbe_core.c +++ b/osi/core/mgbe_core.c @@ -3154,6 +3154,7 @@ static inline nveu32_t get_free_ts_idx(struct core_local *l_core) static void mgbe_handle_link_change_and_fpe_intrs(struct osi_core_priv_data *osi_core, nveu32_t mac_isr) { + struct core_local *l_core = (struct core_local *)(void *)osi_core; nveu32_t mac_ier = 0; nveu8_t *base = (nveu8_t *)osi_core->base; nveu32_t value = 0U; @@ -3179,8 +3180,11 @@ static void mgbe_handle_link_change_and_fpe_intrs(struct osi_core_priv_data *osi value &= ~MGBE_IMR_RGSMIIIE; osi_writela(osi_core, value, (nveu8_t *)osi_core->base + MGBE_MAC_IER); + /* Mark that UPHY lane is down */ + l_core->lane_status = OSI_DISABLE; osi_core->osd_ops.restart_lane_bringup(osi_core->osd, OSI_DISABLE); - } else if ((mac_isr & MGBE_MAC_ISR_LS_MASK) == MGBE_MAC_ISR_LS_LINK_OK) { + } else if (((mac_isr & MGBE_MAC_ISR_LS_MASK) == MGBE_MAC_ISR_LS_LINK_OK) && + (l_core->lane_status == OSI_ENABLE)) { osi_core->osd_ops.restart_lane_bringup(osi_core->osd, OSI_ENABLE); #ifdef HSI_SUPPORT link_ok = 1; diff --git a/osi/core/xpcs.c b/osi/core/xpcs.c index 4d71bfe..b5d701c 100644 --- a/osi/core/xpcs.c +++ b/osi/core/xpcs.c @@ -763,14 +763,12 @@ step10: if (l_core->lane_status == OSI_ENABLE) { OSI_CORE_ERR(osi_core->osd, OSI_LOG_ARG_HW_FAIL, "Failed to get PCS block lock\n", 0ULL); - l_core->lane_status = OSI_DISABLE; } ret = -1; goto fail; } else { - OSI_CORE_INFO((osi_core->osd), (OSI_LOG_ARG_HW_FAIL), - ("PCS block lock SUCCESS\n"), (0ULL)); - l_core->lane_status = OSI_ENABLE; + OSI_CORE_INFO(osi_core->osd, OSI_LOG_ARG_HW_FAIL, + "PCS block lock SUCCESS\n", 0ULL); } fail: return ret;