From 31cf6760501aed8748d8c14622109a0736eac38f Mon Sep 17 00:00:00 2001 From: charliej Date: Wed, 7 May 2025 07:16:51 +0000 Subject: [PATCH] DCE KMD: Add timeout into synchronous IPC calls This is for avoiding kernel hang when DCE FW fails to respond. Failures of IPC call will return -ERESTARTSYS or -ETIMEOUT, which will be handled by caller functions: 1. tegra_dce_client_ipc_send_recv (EXPORT_SYMBOL) This is module export symbol and caller have the responsibility of checking return value. 2. DCE FSM event handler Error return will change back to previous state. DCE_IPC_TIMEOUT_MS_MAX is set to 10000[ms] SHA computation time on SC7 entry request can go up 2sec. Host tolerance time must be larger than this. Jira TDS-16567 https://nvbugspro.nvidia.com/bug/5335034 Change-Id: I5d77a9497f14f305d07b98e39a58fbcecafedf92 Signed-off-by: charliej Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3358620 GVS: buildbot_gerritrpt Reviewed-by: Mahesh Kumar Reviewed-by: svcacv Tested-by: Mahesh Kumar Reviewed-by: Vinod Gopalakrishnakurup (cherry picked from commit 6c2ab3c78ce7cba0e88455b263d51d1a88c03927) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3402917 --- drivers/platform/tegra/dce/dce-admin.c | 18 ++++++--------- drivers/platform/tegra/dce/dce-bootstrap.c | 23 ++++++++----------- drivers/platform/tegra/dce/dce-client-ipc.c | 2 +- drivers/platform/tegra/dce/include/dce.h | 8 +++++++ .../os/linux/include/dce-os-cond-internal.h | 4 ++-- 5 files changed, 27 insertions(+), 28 deletions(-) diff --git a/drivers/platform/tegra/dce/dce-admin.c b/drivers/platform/tegra/dce/dce-admin.c index f034ebf0..562b0bbe 100644 --- a/drivers/platform/tegra/dce/dce-admin.c +++ b/drivers/platform/tegra/dce/dce-admin.c @@ -36,22 +36,18 @@ * * @d : Pointer to tegra_dce struct. * - * Return : 0 if successful + * Return : 0 if successful, -ETIMEOUT if timeout, -ERESTARTSYS if interrupted by signal */ int dce_admin_ipc_wait(struct tegra_dce *d) { int ret = 0; - ret = dce_wait_cond_wait_interruptible(d, &d->ipc_waits[DCE_WAIT_ADMIN_IPC], true, 0); + ret = dce_wait_cond_wait_interruptible(d, &d->ipc_waits[DCE_WAIT_ADMIN_IPC], true, + DCE_IPC_TIMEOUT_MS_MAX); if (ret) { - /** - * TODO: Add error handling for abort and retry - */ - dce_os_err(d, "Admin IPC wait was interrupted with err:%d", ret); - goto out; + dce_os_err(d, "Admin IPC wait, interrupted or timedout:%d", ret); } -out: return ret; } @@ -899,10 +895,10 @@ int dce_admin_send_enter_sc7(struct tegra_dce *d, } /* Wait for SC7 Enter done */ - ret = dce_wait_cond_wait_interruptible(d, &d->ipc_waits[DCE_WAIT_SC7_ENTER], true, 0); + ret = dce_wait_cond_wait_interruptible(d, &d->ipc_waits[DCE_WAIT_SC7_ENTER], true, + DCE_IPC_TIMEOUT_MS_MAX); if (ret) { - dce_os_err(d, "SC7 Enter wait was interrupted with err:%d", ret); - goto out; + dce_os_err(d, "SC7 Enter wait, interrupted or timedout:%d", ret); } out: diff --git a/drivers/platform/tegra/dce/dce-bootstrap.c b/drivers/platform/tegra/dce/dce-bootstrap.c index f776f575..f25a26b4 100644 --- a/drivers/platform/tegra/dce/dce-bootstrap.c +++ b/drivers/platform/tegra/dce/dce-bootstrap.c @@ -124,12 +124,10 @@ int dce_handle_boot_complete_requested_event(struct tegra_dce *d, void *params) dce_os_debug(d, "Waiting for dce fw to boot..."); - ret = dce_wait_cond_wait_interruptible(d, &d->ipc_waits[DCE_WAIT_BOOT_COMPLETE], true, 0); + ret = dce_wait_cond_wait_interruptible(d, &d->ipc_waits[DCE_WAIT_BOOT_COMPLETE], true, + 0); if (ret) { - /** - * TODO: Add error handling for abort and retry - */ - dce_os_err(d, "dce boot wait was interrupted with err:%d", ret); + dce_os_err(d, "dce boot wait, interrupted:%d", ret); } boot_done: @@ -388,13 +386,10 @@ static int dce_mailbox_wait_boot_interface(struct tegra_dce *d) u32 status; int ret; - ret = dce_wait_cond_wait_interruptible(d, &d->ipc_waits[DCE_WAIT_BOOT_CMD], true, 0); - if (ret) { - /** - * TODO: Add error handling for abort and retry - */ - dce_os_err(d, "dce mbox wait was interrupted with err:%d", ret); - } + ret = dce_wait_cond_wait_interruptible(d, &d->ipc_waits[DCE_WAIT_BOOT_CMD], true, + DCE_IPC_TIMEOUT_MS_MAX); + if (ret) + dce_os_err(d, "dce mbox wait was interrupted or timedout:%d", ret); status = dce_mailbox_get_interface_status(d, DCE_MAILBOX_BOOT_INTERFACE); @@ -405,8 +400,8 @@ static int dce_mailbox_wait_boot_interface(struct tegra_dce *d) status); return -EBADE; } - - return 0; + /* if boot failure flag is not available, return ETIMEOUT or ERESTARTSYS */ + return ret; } /** diff --git a/drivers/platform/tegra/dce/dce-client-ipc.c b/drivers/platform/tegra/dce/dce-client-ipc.c index 0205d0a2..1c480953 100644 --- a/drivers/platform/tegra/dce/dce-client-ipc.c +++ b/drivers/platform/tegra/dce/dce-client-ipc.c @@ -369,7 +369,7 @@ int dce_client_ipc_wait(struct tegra_dce *d, u32 int_type) } retry_wait: - ret = dce_wait_cond_wait_interruptible(d, &cl->recv_wait, true, 0); + ret = dce_wait_cond_wait_interruptible(d, &cl->recv_wait, true, DCE_IPC_TIMEOUT_MS_MAX); if (ret) { if (ret == -ERESTARTSYS) { /* Interrupt. */ dce_os_debug(d, "Client [%u] wait interrupted: retrying.", type); diff --git a/drivers/platform/tegra/dce/include/dce.h b/drivers/platform/tegra/dce/include/dce.h index 563f3105..c8c9dcb2 100644 --- a/drivers/platform/tegra/dce/include/dce.h +++ b/drivers/platform/tegra/dce/include/dce.h @@ -114,6 +114,14 @@ #define DCE_WAIT_LOG 4 #define DCE_MAX_WAIT 5 +/** + * DCE IPC timeout values. + * This is for avoiding kernel lockup due to infinite wait on ipc channel. + * Max SHA calculation time is 2sec upon SC7 entry request. + * Host wait time must be larger than this. + */ +#define DCE_IPC_TIMEOUT_MS_MAX 10000 /* Max timeout, 10 seconds */ + struct tegra_dce; /** diff --git a/drivers/platform/tegra/dce/os/linux/include/dce-os-cond-internal.h b/drivers/platform/tegra/dce/os/linux/include/dce-os-cond-internal.h index cc146f21..06fe78a9 100644 --- a/drivers/platform/tegra/dce/os/linux/include/dce-os-cond-internal.h +++ b/drivers/platform/tegra/dce/os/linux/include/dce-os-cond-internal.h @@ -32,8 +32,8 @@ struct dce_os_cond { condition, msecs_to_jiffies(timeout_ms)); \ if (_ret == 0) \ ret = -ETIMEDOUT; \ - else if (_ret == -ERESTARTSYS) \ - ret = -ERESTARTSYS; \ + else if (_ret < 0) \ + ret = _ret; \ } else { \ ret = wait_event_interruptible((c)->wq, condition); \ } \