mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: non abortable TSG for vidmem-clear
When an engine faults due to unbound instance block, all active TSGs are currently aborted. This includes the TSG used by vidmem-clear task to clear vidmem buffers. From this point nvgpu_vidmem_clear cannot submit jobs anymore. Define TSG in MM CE context as non-abortable, and skip it when aborting active TSGs. Bug 2486146 Change-Id: I221259aec468e8ee3a24e80fab8d8fb7ee8607b0 Signed-off-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2008954 (cherry picked from commit 6f2444dc5e128aa2b870796bd1e9dee7853f90af) Reviewed-on: https://git-master.nvidia.com/r/2008942 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
bccb49d8fb
commit
13afcc24c3
@@ -228,6 +228,7 @@ int gk20a_init_tsg_support(struct gk20a *g, u32 tsgid)
|
||||
|
||||
tsg->in_use = false;
|
||||
tsg->tsgid = tsgid;
|
||||
tsg->abortable = true;
|
||||
|
||||
nvgpu_init_list_node(&tsg->ch_list);
|
||||
nvgpu_rwsem_init(&tsg->ch_list_lock);
|
||||
|
||||
@@ -128,6 +128,7 @@ static void gk20a_ce_delete_gpu_context(struct gk20a_gpu_ctx *ce_ctx)
|
||||
struct nvgpu_list_node *list = &ce_ctx->list;
|
||||
|
||||
ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_DELETED;
|
||||
ce_ctx->tsg->abortable = true;
|
||||
|
||||
nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
|
||||
|
||||
@@ -478,6 +479,9 @@ u32 gk20a_ce_create_context(struct gk20a *g,
|
||||
goto end;
|
||||
}
|
||||
|
||||
/* this TSG should never be aborted */
|
||||
ce_ctx->tsg->abortable = false;
|
||||
|
||||
/* always kernel client needs privileged channel */
|
||||
ce_ctx->ch = gk20a_open_new_channel(g, runlist_id, true,
|
||||
nvgpu_current_pid(g), nvgpu_current_tid(g));
|
||||
|
||||
@@ -1232,6 +1232,8 @@ void gk20a_fifo_abort_tsg(struct gk20a *g, struct tsg_gk20a *tsg, bool preempt)
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
WARN_ON(tsg->abortable == false);
|
||||
|
||||
g->ops.fifo.disable_tsg(tsg);
|
||||
|
||||
if (preempt) {
|
||||
|
||||
@@ -944,8 +944,16 @@ static void gv11b_fifo_locked_abort_runlist_active_tsgs(struct gk20a *g,
|
||||
|
||||
for_each_set_bit(tsgid, runlist->active_tsgs,
|
||||
g->fifo.num_channels) {
|
||||
nvgpu_log(g, gpu_dbg_info, "abort tsg id %lu", tsgid);
|
||||
tsg = &g->fifo.tsg[tsgid];
|
||||
|
||||
if (!tsg->abortable) {
|
||||
nvgpu_log(g, gpu_dbg_info,
|
||||
"tsg %lu is not abortable, skipping",
|
||||
tsgid);
|
||||
continue;
|
||||
}
|
||||
nvgpu_log(g, gpu_dbg_info, "abort tsg id %lu", tsgid);
|
||||
|
||||
gk20a_disable_tsg(tsg);
|
||||
|
||||
/* assume all pbdma and eng faulted are set */
|
||||
|
||||
@@ -71,6 +71,7 @@ struct tsg_gk20a {
|
||||
u8 tpc_pg_enabled;
|
||||
bool tpc_num_initialized;
|
||||
bool in_use;
|
||||
bool abortable;
|
||||
|
||||
struct nvgpu_tsg_sm_error_state *sm_error_states;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user