gpu: nvgpu: acquire tsg ctx_init_lock when changing ctx state

GR context associated with channel is updated in various driver paths.
Sequence to do the same is disable the TSG, preempt the TSG, update
the GR context or instance block and then enable the TSG.
These operations and runlist updates for channel have to be done under
TSG specific ctx_init_lock to avoid the race.

suspend_contexts and resume_contexts needs special handling which is
not covered in this patch.

Bug 3677982

Change-Id: I837257fe9d9ef3eb6f69f5d7e0707e0bb6d4ea72
Signed-off-by: Sagar Kamble <skamble@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2720222
Reviewed-by: Scott Long <scottl@nvidia.com>
Reviewed-by: Ankur Kishore <ankkishore@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
This commit is contained in:
Sagar Kamble
2022-05-30 11:17:17 +05:30
committed by mobile promotions
parent ef99d9f010
commit f1896e0a64
5 changed files with 66 additions and 14 deletions

View File

@@ -1963,27 +1963,33 @@ static int nvgpu_dbg_gpu_ioctl_get_gr_context(struct dbg_session_gk20a *dbg_s,
return -EINVAL;
}
nvgpu_mutex_acquire(&tsg->ctx_init_lock);
ctx_mem = nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx, NVGPU_GR_CTX_CTX);
if (ctx_mem == NULL || !nvgpu_mem_is_valid(ctx_mem)) {
nvgpu_err(g, "invalid context mem");
return -EINVAL;
err = -EINVAL;
goto out;
}
if (ctx_mem->size > (u64)UINT_MAX) {
nvgpu_err(ch->g, "ctx size is larger than expected");
return -EINVAL;
err = -EINVAL;
goto out;
}
/* Check if the input buffer size equals the gr context size */
size = (u32)ctx_mem->size;
if (args->size != size) {
nvgpu_err(g, "size mismatch: %d != %d", args->size, size);
return -EINVAL;
err = -EINVAL;
goto out;
}
if (nvgpu_channel_disable_tsg(g, ch) != 0) {
nvgpu_err(g, "failed to disable channel/TSG");
return -EINVAL;
err = -EINVAL;
goto out;
}
err = nvgpu_preempt_channel(g, ch);
@@ -1998,9 +2004,13 @@ done:
enable_err = nvgpu_channel_enable_tsg(g, ch);
if (enable_err != 0) {
nvgpu_err(g, "failed to re-enable channel/TSG");
nvgpu_mutex_release(&tsg->ctx_init_lock);
return (err != 0) ? err : enable_err;
}
out:
nvgpu_mutex_release(&tsg->ctx_init_lock);
return err;
}