diff --git a/drivers/gpu/nvgpu/common/fifo/preempt.c b/drivers/gpu/nvgpu/common/fifo/preempt.c index 6f3433b42..c6f8a0e8c 100644 --- a/drivers/gpu/nvgpu/common/fifo/preempt.c +++ b/drivers/gpu/nvgpu/common/fifo/preempt.c @@ -112,7 +112,7 @@ int nvgpu_preempt_channel(struct gk20a *g, struct nvgpu_channel *ch) } /* called from rc */ -void nvgpu_preempt_poll_tsg_on_pbdma(struct gk20a *g, +int nvgpu_preempt_poll_tsg_on_pbdma(struct gk20a *g, struct nvgpu_tsg *tsg) { struct nvgpu_fifo *f = &g->fifo; @@ -122,11 +122,7 @@ void nvgpu_preempt_poll_tsg_on_pbdma(struct gk20a *g, u32 tsgid, pbdma_id; if (g->ops.fifo.preempt_poll_pbdma == NULL) { - return; - } - - if (tsg == NULL) { - return; + return 0; } tsgid = tsg->tsgid; @@ -142,12 +138,11 @@ void nvgpu_preempt_poll_tsg_on_pbdma(struct gk20a *g, * memory system would be blocked. */ if (g->ops.fifo.preempt_poll_pbdma(g, tsgid, pbdma_id) != 0) { - nvgpu_report_host_err(g, NVGPU_ERR_MODULE_HOST, - pbdma_id, - GPU_HOST_PBDMA_PREEMPT_ERROR, 0); nvgpu_err(g, "PBDMA preempt failed"); + return -EBUSY; } } + return 0; } /* diff --git a/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c b/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c index 2800c54f5..e19d94ede 100644 --- a/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c +++ b/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c @@ -41,6 +41,7 @@ #ifdef CONFIG_NVGPU_LS_PMU #include #endif +#include #include "rc_gv11b.h" @@ -223,8 +224,12 @@ void gv11b_fifo_recover(struct gk20a *g, u32 act_eng_bitmask, * For each PBDMA which serves the runlist, poll to verify the TSG is no * longer on the PBDMA and the engine phase of the preempt has started. */ - if (tsg != NULL) { - nvgpu_preempt_poll_tsg_on_pbdma(g, tsg); + if (tsg != NULL && (nvgpu_preempt_poll_tsg_on_pbdma(g, tsg) != 0)) { + nvgpu_err(g, "TSG preemption on PBDMA failed; " + "PBDMA seems stuck; cannot recover stuck PBDMA."); + /* Trigger Quiesce as recovery failed on hung PBDMA. */ + nvgpu_sw_quiesce(g); + return; } #ifdef CONFIG_NVGPU_DEBUGGER diff --git a/drivers/gpu/nvgpu/include/nvgpu/preempt.h b/drivers/gpu/nvgpu/include/nvgpu/preempt.h index f848a6f4f..b1511a827 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/preempt.h +++ b/drivers/gpu/nvgpu/include/nvgpu/preempt.h @@ -70,7 +70,7 @@ int nvgpu_preempt_channel(struct gk20a *g, struct nvgpu_channel *ch); * Called from recovery handling for volta onwards. This will * not be part of safety build after recovery is not supported in safety build. */ -void nvgpu_preempt_poll_tsg_on_pbdma(struct gk20a *g, +int nvgpu_preempt_poll_tsg_on_pbdma(struct gk20a *g, struct nvgpu_tsg *tsg); /** * @brief Preempt a set of runlists. diff --git a/userspace/units/fifo/preempt/nvgpu-preempt.c b/userspace/units/fifo/preempt/nvgpu-preempt.c index d2662b98a..1fae89ad7 100644 --- a/userspace/units/fifo/preempt/nvgpu-preempt.c +++ b/userspace/units/fifo/preempt/nvgpu-preempt.c @@ -162,13 +162,11 @@ done: } #define F_PREEMPT_POLL_PBDMA_NULL BIT(0) -#define F_PREEMPT_POLL_TSG_NULL BIT(1) -#define F_PREEMPT_POLL_PBDMA_BUSY BIT(2) -#define F_PREEMPT_POLL_LAST BIT(3) +#define F_PREEMPT_POLL_PBDMA_BUSY BIT(1) +#define F_PREEMPT_POLL_LAST BIT(2) static const char *f_preempt_poll[] = { "preempt_poll_pbdma_null", - "tsg_null", "preempt_poll_pbdma_busy", }; @@ -197,7 +195,7 @@ int test_preempt_poll_tsg_on_pbdma(struct unit_module *m, struct gk20a *g, u32 branches = 0U; int ret = UNIT_FAIL; - u32 prune = F_PREEMPT_POLL_PBDMA_NULL | F_PREEMPT_POLL_TSG_NULL; + u32 prune = F_PREEMPT_POLL_PBDMA_NULL; tsg = nvgpu_tsg_open(g, getpid()); unit_assert(tsg != NULL, goto done); @@ -221,14 +219,11 @@ int test_preempt_poll_tsg_on_pbdma(struct unit_module *m, struct gk20a *g, stub_fifo_preempt_poll_pbdma_busy : stub_fifo_preempt_poll_pbdma); - if (branches & F_PREEMPT_POLL_TSG_NULL) { - nvgpu_preempt_poll_tsg_on_pbdma(g, NULL); - } else { - nvgpu_preempt_poll_tsg_on_pbdma(g, tsg); - } + nvgpu_preempt_poll_tsg_on_pbdma(g, tsg); - if (branches & F_PREEMPT_POLL_TSG_NULL) { - unit_assert(stub[0].tsgid == NVGPU_INVALID_TSG_ID, + if (branches & F_PREEMPT_POLL_PBDMA_BUSY) { + unit_assert(stub[0].pbdma_id != + nvgpu_ffs(f->runlist_info[0]->pbdma_bitmask), goto done); } else if (!(branches & F_PREEMPT_POLL_PBDMA_NULL)) { unit_assert(stub[0].tsgid == 0, goto done);