mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: wait for stalling interrupts to complete during TSG unbind preempt
Some of the engine stalling interrupts can block the context save off the engine if not handled during fifo.preempt_tsg. They need to be handled while polling for engine ctxsw status. Bug 200711183 Change-Id: I7418a9e0354013b81fbefd8c0cab5068404fc44e Signed-off-by: Sagar Kamble <skamble@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2521971 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit
This commit is contained in:
committed by
mobile promotions
parent
6672914980
commit
07d8a39647
@@ -42,6 +42,9 @@ u32 nvgpu_preempt_get_timeout(struct gk20a *g)
|
|||||||
int nvgpu_fifo_preempt_tsg(struct gk20a *g, struct nvgpu_tsg *tsg)
|
int nvgpu_fifo_preempt_tsg(struct gk20a *g, struct nvgpu_tsg *tsg)
|
||||||
{
|
{
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
u32 preempt_retry_count = 10U;
|
||||||
|
u32 preempt_retry_timeout =
|
||||||
|
nvgpu_preempt_get_timeout(g) / preempt_retry_count;
|
||||||
#ifdef CONFIG_NVGPU_LS_PMU
|
#ifdef CONFIG_NVGPU_LS_PMU
|
||||||
u32 token = PMU_INVALID_MUTEX_OWNER_ID;
|
u32 token = PMU_INVALID_MUTEX_OWNER_ID;
|
||||||
int mutex_ret = 0;
|
int mutex_ret = 0;
|
||||||
@@ -53,6 +56,7 @@ int nvgpu_fifo_preempt_tsg(struct gk20a *g, struct nvgpu_tsg *tsg)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
do {
|
||||||
nvgpu_mutex_acquire(&tsg->runlist->runlist_lock);
|
nvgpu_mutex_acquire(&tsg->runlist->runlist_lock);
|
||||||
|
|
||||||
if (nvgpu_is_errata_present(g, NVGPU_ERRATA_2016608)) {
|
if (nvgpu_is_errata_present(g, NVGPU_ERRATA_2016608)) {
|
||||||
@@ -64,20 +68,26 @@ int nvgpu_fifo_preempt_tsg(struct gk20a *g, struct nvgpu_tsg *tsg)
|
|||||||
mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
|
mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
|
||||||
PMU_MUTEX_ID_FIFO, &token);
|
PMU_MUTEX_ID_FIFO, &token);
|
||||||
#endif
|
#endif
|
||||||
nvgpu_log_fn(g, "preempt id: %d", tsg->tsgid);
|
|
||||||
|
|
||||||
g->ops.fifo.preempt_trigger(g, tsg->tsgid, ID_TYPE_TSG);
|
g->ops.fifo.preempt_trigger(g, tsg->tsgid, ID_TYPE_TSG);
|
||||||
|
|
||||||
/* poll for preempt done */
|
/*
|
||||||
ret = g->ops.fifo.is_preempt_pending(g, tsg->tsgid, ID_TYPE_TSG);
|
* Poll for preempt done. if stalling interrupts are pending
|
||||||
|
* while preempt is in progress we poll for stalling interrupts
|
||||||
|
* to finish based on return value from this function and
|
||||||
|
* retry preempt again.
|
||||||
|
* If HW is hung, on the last retry instance we try to identify
|
||||||
|
* the engines hung and set the runlist reset_eng_bitmask
|
||||||
|
* and mark preemption completion.
|
||||||
|
*/
|
||||||
|
ret = g->ops.fifo.is_preempt_pending(g, tsg->tsgid,
|
||||||
|
ID_TYPE_TSG, preempt_retry_count > 1U);
|
||||||
|
|
||||||
#ifdef CONFIG_NVGPU_LS_PMU
|
#ifdef CONFIG_NVGPU_LS_PMU
|
||||||
if (mutex_ret == 0) {
|
if (mutex_ret == 0) {
|
||||||
int err = nvgpu_pmu_lock_release(g, g->pmu, PMU_MUTEX_ID_FIFO,
|
int err = nvgpu_pmu_lock_release(g, g->pmu,
|
||||||
&token);
|
PMU_MUTEX_ID_FIFO, &token);
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
nvgpu_err(g, "PMU_MUTEX_ID_FIFO not released err=%d",
|
nvgpu_err(g, "PMU_MUTEX_ID_FIFO not released err=%d", err);
|
||||||
err);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@@ -88,6 +98,16 @@ int nvgpu_fifo_preempt_tsg(struct gk20a *g, struct nvgpu_tsg *tsg)
|
|||||||
|
|
||||||
nvgpu_mutex_release(&tsg->runlist->runlist_lock);
|
nvgpu_mutex_release(&tsg->runlist->runlist_lock);
|
||||||
|
|
||||||
|
if (ret != -EAGAIN) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = nvgpu_wait_for_stall_interrupts(g, preempt_retry_timeout);
|
||||||
|
if (ret != 0) {
|
||||||
|
nvgpu_log_info(g, "wait for stall interrupts failed %d", ret);
|
||||||
|
}
|
||||||
|
} while (--preempt_retry_count != 0U);
|
||||||
|
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
if (nvgpu_platform_is_silicon(g)) {
|
if (nvgpu_platform_is_silicon(g)) {
|
||||||
nvgpu_err(g, "preempt timed out for tsgid: %u, "
|
nvgpu_err(g, "preempt timed out for tsgid: %u, "
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
@@ -62,11 +62,11 @@ static int gk20a_fifo_preempt_locked(struct gk20a *g, u32 id,
|
|||||||
g->ops.fifo.preempt_trigger(g, id, id_type);
|
g->ops.fifo.preempt_trigger(g, id, id_type);
|
||||||
|
|
||||||
/* wait for preempt */
|
/* wait for preempt */
|
||||||
return g->ops.fifo.is_preempt_pending(g, id, id_type);
|
return g->ops.fifo.is_preempt_pending(g, id, id_type, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
|
int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
|
||||||
unsigned int id_type)
|
unsigned int id_type, bool preempt_retries_left)
|
||||||
{
|
{
|
||||||
struct nvgpu_timeout timeout;
|
struct nvgpu_timeout timeout;
|
||||||
u32 delay = POLL_DELAY_MIN_US;
|
u32 delay = POLL_DELAY_MIN_US;
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
@@ -32,6 +32,6 @@ void gk20a_fifo_preempt_trigger(struct gk20a *g, u32 id, unsigned int id_type);
|
|||||||
int gk20a_fifo_preempt_channel(struct gk20a *g, struct nvgpu_channel *ch);
|
int gk20a_fifo_preempt_channel(struct gk20a *g, struct nvgpu_channel *ch);
|
||||||
int gk20a_fifo_preempt_tsg(struct gk20a *g, struct nvgpu_tsg *tsg);
|
int gk20a_fifo_preempt_tsg(struct gk20a *g, struct nvgpu_tsg *tsg);
|
||||||
int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
|
int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
|
||||||
unsigned int id_type);
|
unsigned int id_type, bool preempt_retries_left);
|
||||||
|
|
||||||
#endif /* FIFO_PREEMPT_GK20A_H */
|
#endif /* FIFO_PREEMPT_GK20A_H */
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
@@ -33,7 +33,7 @@ struct nvgpu_tsg;
|
|||||||
void gv11b_fifo_preempt_trigger(struct gk20a *g, u32 id, unsigned int id_type);
|
void gv11b_fifo_preempt_trigger(struct gk20a *g, u32 id, unsigned int id_type);
|
||||||
int gv11b_fifo_preempt_channel(struct gk20a *g, struct nvgpu_channel *ch);
|
int gv11b_fifo_preempt_channel(struct gk20a *g, struct nvgpu_channel *ch);
|
||||||
int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id,
|
int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id,
|
||||||
unsigned int id_type);
|
unsigned int id_type, bool preempt_retries_left);
|
||||||
int gv11b_fifo_preempt_poll_pbdma(struct gk20a *g, u32 tsgid, u32 pbdma_id);
|
int gv11b_fifo_preempt_poll_pbdma(struct gk20a *g, u32 tsgid, u32 pbdma_id);
|
||||||
|
|
||||||
#endif /* FIFO_PREEMPT_GV11B_H */
|
#endif /* FIFO_PREEMPT_GV11B_H */
|
||||||
|
|||||||
@@ -163,16 +163,16 @@ int gv11b_fifo_preempt_poll_pbdma(struct gk20a *g, u32 tsgid,
|
|||||||
static int gv11b_fifo_check_eng_intr_pending(struct gk20a *g, u32 id,
|
static int gv11b_fifo_check_eng_intr_pending(struct gk20a *g, u32 id,
|
||||||
struct nvgpu_engine_status_info *engine_status,
|
struct nvgpu_engine_status_info *engine_status,
|
||||||
u32 eng_intr_pending,
|
u32 eng_intr_pending,
|
||||||
u32 engine_id, u32 *reset_eng_bitmask)
|
u32 engine_id, u32 *reset_eng_bitmask,
|
||||||
|
bool preempt_retries_left)
|
||||||
{
|
{
|
||||||
|
bool check_preempt_retry = false;
|
||||||
int ret = -EBUSY;
|
int ret = -EBUSY;
|
||||||
|
|
||||||
if (engine_status->ctxsw_status == NVGPU_CTX_STATUS_CTXSW_SWITCH) {
|
if (engine_status->ctxsw_status == NVGPU_CTX_STATUS_CTXSW_SWITCH) {
|
||||||
/* Eng save hasn't started yet. Continue polling */
|
/* Eng save hasn't started yet. Continue polling */
|
||||||
if (eng_intr_pending != 0U) {
|
if (eng_intr_pending != 0U) {
|
||||||
/* if eng intr, stop polling */
|
check_preempt_retry = true;
|
||||||
*reset_eng_bitmask |= BIT32(engine_id);
|
|
||||||
ret = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if ((engine_status->ctxsw_status == NVGPU_CTX_STATUS_VALID) ||
|
} else if ((engine_status->ctxsw_status == NVGPU_CTX_STATUS_VALID) ||
|
||||||
@@ -180,9 +180,7 @@ static int gv11b_fifo_check_eng_intr_pending(struct gk20a *g, u32 id,
|
|||||||
|
|
||||||
if (id == engine_status->ctx_id) {
|
if (id == engine_status->ctx_id) {
|
||||||
if (eng_intr_pending != 0U) {
|
if (eng_intr_pending != 0U) {
|
||||||
/* preemption will not finish */
|
check_preempt_retry = true;
|
||||||
*reset_eng_bitmask |= BIT32(engine_id);
|
|
||||||
ret = 0;
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
/* context is not running on the engine */
|
/* context is not running on the engine */
|
||||||
@@ -193,24 +191,38 @@ static int gv11b_fifo_check_eng_intr_pending(struct gk20a *g, u32 id,
|
|||||||
|
|
||||||
if (id == engine_status->ctx_next_id) {
|
if (id == engine_status->ctx_next_id) {
|
||||||
if (eng_intr_pending != 0U) {
|
if (eng_intr_pending != 0U) {
|
||||||
/* preemption will not finish */
|
check_preempt_retry = true;
|
||||||
*reset_eng_bitmask |= BIT32(engine_id);
|
|
||||||
ret = 0;
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
/* context is not running on the engine */
|
/* context is not running on the engine */
|
||||||
ret = 0;
|
ret = 0;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
if (eng_intr_pending != 0U) {
|
||||||
|
check_preempt_retry = true;
|
||||||
} else {
|
} else {
|
||||||
/* Preempt should be finished */
|
/* Preempt should be finished */
|
||||||
ret = 0;
|
ret = 0;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if eng intr, stop polling and check if we can retry preempts. */
|
||||||
|
if (check_preempt_retry) {
|
||||||
|
if (preempt_retries_left) {
|
||||||
|
ret = -EAGAIN;
|
||||||
|
} else {
|
||||||
|
/* preemption will not finish */
|
||||||
|
*reset_eng_bitmask |= BIT32(engine_id);
|
||||||
|
ret = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int gv11b_fifo_preempt_poll_eng(struct gk20a *g, u32 id,
|
static int gv11b_fifo_preempt_poll_eng(struct gk20a *g, u32 id,
|
||||||
u32 engine_id, u32 *reset_eng_bitmask)
|
u32 engine_id, u32 *reset_eng_bitmask,
|
||||||
|
bool preempt_retries_left)
|
||||||
{
|
{
|
||||||
struct nvgpu_timeout timeout;
|
struct nvgpu_timeout timeout;
|
||||||
u32 delay = POLL_DELAY_MIN_US;
|
u32 delay = POLL_DELAY_MIN_US;
|
||||||
@@ -283,8 +295,8 @@ static int gv11b_fifo_preempt_poll_eng(struct gk20a *g, u32 id,
|
|||||||
}
|
}
|
||||||
ret = gv11b_fifo_check_eng_intr_pending(g, id, &engine_status,
|
ret = gv11b_fifo_check_eng_intr_pending(g, id, &engine_status,
|
||||||
eng_intr_pending, engine_id,
|
eng_intr_pending, engine_id,
|
||||||
reset_eng_bitmask);
|
reset_eng_bitmask, preempt_retries_left);
|
||||||
if (ret == 0) {
|
if (ret == 0 || ret == -EAGAIN) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -292,7 +304,7 @@ static int gv11b_fifo_preempt_poll_eng(struct gk20a *g, u32 id,
|
|||||||
delay = min_t(u32, delay << 1U, POLL_DELAY_MAX_US);
|
delay = min_t(u32, delay << 1U, POLL_DELAY_MAX_US);
|
||||||
} while (nvgpu_timeout_expired(&timeout) == 0);
|
} while (nvgpu_timeout_expired(&timeout) == 0);
|
||||||
|
|
||||||
if (ret != 0) {
|
if (ret != 0 && ret != -EAGAIN) {
|
||||||
/*
|
/*
|
||||||
* The reasons a preempt can fail are:
|
* The reasons a preempt can fail are:
|
||||||
* 1.Some other stalling interrupt is asserted preventing
|
* 1.Some other stalling interrupt is asserted preventing
|
||||||
@@ -309,7 +321,7 @@ static int gv11b_fifo_preempt_poll_eng(struct gk20a *g, u32 id,
|
|||||||
}
|
}
|
||||||
|
|
||||||
int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id,
|
int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id,
|
||||||
unsigned int id_type)
|
unsigned int id_type, bool preempt_retries_left)
|
||||||
{
|
{
|
||||||
struct nvgpu_fifo *f = &g->fifo;
|
struct nvgpu_fifo *f = &g->fifo;
|
||||||
struct nvgpu_runlist *rl;
|
struct nvgpu_runlist *rl;
|
||||||
@@ -350,7 +362,7 @@ int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id,
|
|||||||
engine_id = U32(bit);
|
engine_id = U32(bit);
|
||||||
err = gv11b_fifo_preempt_poll_eng(g,
|
err = gv11b_fifo_preempt_poll_eng(g,
|
||||||
tsgid, engine_id,
|
tsgid, engine_id,
|
||||||
&rl->reset_eng_bitmask);
|
&rl->reset_eng_bitmask, preempt_retries_left);
|
||||||
if ((err != 0) && (ret == 0)) {
|
if ((err != 0) && (ret == 0)) {
|
||||||
ret = err;
|
ret = err;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -109,7 +109,7 @@ int gk20a_fifo_reschedule_preempt_next(struct nvgpu_channel *ch,
|
|||||||
#endif
|
#endif
|
||||||
if (wait_preempt) {
|
if (wait_preempt) {
|
||||||
if (g->ops.fifo.is_preempt_pending(g, preempt_id,
|
if (g->ops.fifo.is_preempt_pending(g, preempt_id,
|
||||||
preempt_type) != 0) {
|
preempt_type, false) != 0) {
|
||||||
nvgpu_err(g, "fifo preempt timed out");
|
nvgpu_err(g, "fifo preempt timed out");
|
||||||
/*
|
/*
|
||||||
* This function does not care if preempt
|
* This function does not care if preempt
|
||||||
|
|||||||
@@ -177,7 +177,7 @@ struct gops_fifo {
|
|||||||
int (*preempt_poll_pbdma)(struct gk20a *g, u32 tsgid,
|
int (*preempt_poll_pbdma)(struct gk20a *g, u32 tsgid,
|
||||||
u32 pbdma_id);
|
u32 pbdma_id);
|
||||||
int (*is_preempt_pending)(struct gk20a *g, u32 id,
|
int (*is_preempt_pending)(struct gk20a *g, u32 id,
|
||||||
unsigned int id_type);
|
unsigned int id_type, bool preempt_retries_left);
|
||||||
void (*intr_set_recover_mask)(struct gk20a *g);
|
void (*intr_set_recover_mask)(struct gk20a *g);
|
||||||
void (*intr_unset_recover_mask)(struct gk20a *g);
|
void (*intr_unset_recover_mask)(struct gk20a *g);
|
||||||
void (*intr_top_enable)(struct gk20a *g, bool enable);
|
void (*intr_top_enable)(struct gk20a *g, bool enable);
|
||||||
|
|||||||
@@ -205,13 +205,15 @@ static void stub_fifo_preempt_trigger(struct gk20a *g, u32 id,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int stub_fifo_is_preempt_pending_ebusy(struct gk20a *g, u32 id,
|
static int stub_fifo_is_preempt_pending_ebusy(struct gk20a *g, u32 id,
|
||||||
unsigned int id_type)
|
unsigned int id_type,
|
||||||
|
bool preempt_retries_left)
|
||||||
{
|
{
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int stub_fifo_is_preempt_pending_pass(struct gk20a *g, u32 id,
|
static int stub_fifo_is_preempt_pending_pass(struct gk20a *g, u32 id,
|
||||||
unsigned int id_type)
|
unsigned int id_type,
|
||||||
|
bool preempt_retries_left)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -463,7 +465,7 @@ int test_gv11b_fifo_is_preempt_pending(struct unit_module *m, struct gk20a *g,
|
|||||||
/* Modify eng_stat for engine 0 */
|
/* Modify eng_stat for engine 0 */
|
||||||
nvgpu_writel(g, fifo_engine_status_r(0U), stub.eng_stat);
|
nvgpu_writel(g, fifo_engine_status_r(0U), stub.eng_stat);
|
||||||
|
|
||||||
err = gv11b_fifo_is_preempt_pending(g, 0U, id_type);
|
err = gv11b_fifo_is_preempt_pending(g, 0U, id_type, false);
|
||||||
|
|
||||||
if (branches & F_PREEMPT_PENDING_POLL_PBDMA_FAIL) {
|
if (branches & F_PREEMPT_PENDING_POLL_PBDMA_FAIL) {
|
||||||
unit_assert(err == -ETIMEDOUT, goto done);
|
unit_assert(err == -ETIMEDOUT, goto done);
|
||||||
|
|||||||
Reference in New Issue
Block a user