DCE KMD: Add timeout into synchronous IPC calls

This is for avoiding kernel hang when DCE FW fails to respond.
Failures of IPC call will return -ERESTARTSYS or -ETIMEOUT, which
will be handled by caller functions:
1. tegra_dce_client_ipc_send_recv (EXPORT_SYMBOL)
   This is module export symbol and caller have the responsibility
   of checking return value.
2. DCE FSM event handler
   Error return will change back to previous state.

DCE_IPC_TIMEOUT_MS_MAX is set to 10000[ms]
SHA computation time on SC7 entry request can go up 2sec.
Host tolerance time must be larger than this.

Jira TDS-16567
https://nvbugspro.nvidia.com/bug/5335034

Change-Id: I5d77a9497f14f305d07b98e39a58fbcecafedf92
Signed-off-by: charliej <charliej@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3358620
GVS: buildbot_gerritrpt <buildbot_gerritrpt@nvidia.com>
Reviewed-by: Mahesh Kumar <mahkumar@nvidia.com>
Reviewed-by: svcacv <svcacv@nvidia.com>
Tested-by: Mahesh Kumar <mahkumar@nvidia.com>
Reviewed-by: Vinod Gopalakrishnakurup <vinodg@nvidia.com>
(cherry picked from commit 6c2ab3c78ce7cba0e88455b263d51d1a88c03927)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3402917
This commit is contained in:
charliej
2025-05-07 07:16:51 +00:00
committed by Jon Hunter
parent 9a57665a0f
commit 31cf676050
5 changed files with 27 additions and 28 deletions

View File

@@ -36,22 +36,18 @@
* *
* @d : Pointer to tegra_dce struct. * @d : Pointer to tegra_dce struct.
* *
* Return : 0 if successful * Return : 0 if successful, -ETIMEOUT if timeout, -ERESTARTSYS if interrupted by signal
*/ */
int dce_admin_ipc_wait(struct tegra_dce *d) int dce_admin_ipc_wait(struct tegra_dce *d)
{ {
int ret = 0; int ret = 0;
ret = dce_wait_cond_wait_interruptible(d, &d->ipc_waits[DCE_WAIT_ADMIN_IPC], true, 0); ret = dce_wait_cond_wait_interruptible(d, &d->ipc_waits[DCE_WAIT_ADMIN_IPC], true,
DCE_IPC_TIMEOUT_MS_MAX);
if (ret) { if (ret) {
/** dce_os_err(d, "Admin IPC wait, interrupted or timedout:%d", ret);
* TODO: Add error handling for abort and retry
*/
dce_os_err(d, "Admin IPC wait was interrupted with err:%d", ret);
goto out;
} }
out:
return ret; return ret;
} }
@@ -899,10 +895,10 @@ int dce_admin_send_enter_sc7(struct tegra_dce *d,
} }
/* Wait for SC7 Enter done */ /* Wait for SC7 Enter done */
ret = dce_wait_cond_wait_interruptible(d, &d->ipc_waits[DCE_WAIT_SC7_ENTER], true, 0); ret = dce_wait_cond_wait_interruptible(d, &d->ipc_waits[DCE_WAIT_SC7_ENTER], true,
DCE_IPC_TIMEOUT_MS_MAX);
if (ret) { if (ret) {
dce_os_err(d, "SC7 Enter wait was interrupted with err:%d", ret); dce_os_err(d, "SC7 Enter wait, interrupted or timedout:%d", ret);
goto out;
} }
out: out:

View File

@@ -124,12 +124,10 @@ int dce_handle_boot_complete_requested_event(struct tegra_dce *d, void *params)
dce_os_debug(d, "Waiting for dce fw to boot..."); dce_os_debug(d, "Waiting for dce fw to boot...");
ret = dce_wait_cond_wait_interruptible(d, &d->ipc_waits[DCE_WAIT_BOOT_COMPLETE], true, 0); ret = dce_wait_cond_wait_interruptible(d, &d->ipc_waits[DCE_WAIT_BOOT_COMPLETE], true,
0);
if (ret) { if (ret) {
/** dce_os_err(d, "dce boot wait, interrupted:%d", ret);
* TODO: Add error handling for abort and retry
*/
dce_os_err(d, "dce boot wait was interrupted with err:%d", ret);
} }
boot_done: boot_done:
@@ -388,13 +386,10 @@ static int dce_mailbox_wait_boot_interface(struct tegra_dce *d)
u32 status; u32 status;
int ret; int ret;
ret = dce_wait_cond_wait_interruptible(d, &d->ipc_waits[DCE_WAIT_BOOT_CMD], true, 0); ret = dce_wait_cond_wait_interruptible(d, &d->ipc_waits[DCE_WAIT_BOOT_CMD], true,
if (ret) { DCE_IPC_TIMEOUT_MS_MAX);
/** if (ret)
* TODO: Add error handling for abort and retry dce_os_err(d, "dce mbox wait was interrupted or timedout:%d", ret);
*/
dce_os_err(d, "dce mbox wait was interrupted with err:%d", ret);
}
status = dce_mailbox_get_interface_status(d, status = dce_mailbox_get_interface_status(d,
DCE_MAILBOX_BOOT_INTERFACE); DCE_MAILBOX_BOOT_INTERFACE);
@@ -405,8 +400,8 @@ static int dce_mailbox_wait_boot_interface(struct tegra_dce *d)
status); status);
return -EBADE; return -EBADE;
} }
/* if boot failure flag is not available, return ETIMEOUT or ERESTARTSYS */
return 0; return ret;
} }
/** /**

View File

@@ -369,7 +369,7 @@ int dce_client_ipc_wait(struct tegra_dce *d, u32 int_type)
} }
retry_wait: retry_wait:
ret = dce_wait_cond_wait_interruptible(d, &cl->recv_wait, true, 0); ret = dce_wait_cond_wait_interruptible(d, &cl->recv_wait, true, DCE_IPC_TIMEOUT_MS_MAX);
if (ret) { if (ret) {
if (ret == -ERESTARTSYS) { /* Interrupt. */ if (ret == -ERESTARTSYS) { /* Interrupt. */
dce_os_debug(d, "Client [%u] wait interrupted: retrying.", type); dce_os_debug(d, "Client [%u] wait interrupted: retrying.", type);

View File

@@ -114,6 +114,14 @@
#define DCE_WAIT_LOG 4 #define DCE_WAIT_LOG 4
#define DCE_MAX_WAIT 5 #define DCE_MAX_WAIT 5
/**
* DCE IPC timeout values.
* This is for avoiding kernel lockup due to infinite wait on ipc channel.
* Max SHA calculation time is 2sec upon SC7 entry request.
* Host wait time must be larger than this.
*/
#define DCE_IPC_TIMEOUT_MS_MAX 10000 /* Max timeout, 10 seconds */
struct tegra_dce; struct tegra_dce;
/** /**

View File

@@ -32,8 +32,8 @@ struct dce_os_cond {
condition, msecs_to_jiffies(timeout_ms)); \ condition, msecs_to_jiffies(timeout_ms)); \
if (_ret == 0) \ if (_ret == 0) \
ret = -ETIMEDOUT; \ ret = -ETIMEDOUT; \
else if (_ret == -ERESTARTSYS) \ else if (_ret < 0) \
ret = -ERESTARTSYS; \ ret = _ret; \
} else { \ } else { \
ret = wait_event_interruptible((c)->wq, condition); \ ret = wait_event_interruptible((c)->wq, condition); \
} \ } \