gpu: nvgpu: multiple address spaces support for subcontexts

This patch introduces following relationships among various nvgpu
objects to support multiple address spaces with subcontexts.
IOCTLs setting the relationships are shown in the braces.

nvgpu_tsg             1<---->n nvgpu_tsg_subctx (TSG_BIND_CHANNEL_EX)
nvgpu_tsg             1<---->n nvgpu_gr_ctx_mappings (ALLOC_OBJ_CTX)

nvgpu_tsg_subctx      1<---->1 nvgpu_gr_subctx (ALLOC_OBJ_CTX)
nvgpu_tsg_subctx      1<---->n nvgpu_channel (TSG_BIND_CHANNEL_EX)

nvgpu_gr_ctx_mappings 1<---->n nvgpu_gr_subctx (ALLOC_OBJ_CTX)
nvgpu_gr_ctx_mappings 1<---->1 vm_gk20a (ALLOC_OBJ_CTX)

On unbinding the channel, objects are deleted according
to dependencies.

Without subcontexts, gr_ctx buffers mappings are maintained in the
struct nvgpu_gr_ctx. For subcontexts, they are maintained in the
struct nvgpu_gr_subctx.

Preemption buffer with index NVGPU_GR_CTX_PREEMPT_CTXSW and PM
buffer with index NVGPU_GR_CTX_PM_CTX are to be mapped in all
subcontexts when they are programmed from respective ioctls.

Global GR context buffers are to be programmed only for VEID0.
Based on the channel object class the state is patched in
the patch buffer in every ALLOC_OBJ_CTX call unlike
setting it for only first channel like before.

PM and preemptions buffers programming is protected under TSG
ctx_init_lock.

tsg->vm is now removed. VM reference for gr_ctx buffers mappings
is managed through gr_ctx or gr_subctx mappings object.

For vGPU, gr_subctx and mappings objects are created to reference
VMs for the gr_ctx lifetime.

The functions nvgpu_tsg_subctx_alloc_gr_subctx and nvgpu_tsg_-
subctx_setup_subctx_header sets up the subcontext struct header
for native driver.

The function nvgpu_tsg_subctx_alloc_gr_subctx is called from
vgpu to manage the gr ctx mapping references.

free_subctx is now done when unbinding channel considering
references to the subcontext by other channels. It will unmap
the buffers in native driver case. It will just release the
VM reference in vgpu case.

Note that TEGRA_VGPU_CMD_FREE_CTX_HEADER ioctl is not called
by vgpu any longer as it would be taken care by native driver.

Bug 3677982

Change-Id: Ia439b251ff452a49f8514498832e24d04db86d2f
Signed-off-by: Sagar Kamble <skamble@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2718760
Reviewed-by: Scott Long <scottl@nvidia.com>
Reviewed-by: Ankur Kishore <ankkishore@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
This commit is contained in:
Sagar Kamble
2022-05-18 17:32:18 +05:30
committed by mobile promotions
parent 9e13b61d4e
commit f55fd5dc8c
37 changed files with 1963 additions and 404 deletions

View File

@@ -33,6 +33,7 @@
#include <nvgpu/gr/gr_instances.h>
#include <nvgpu/channel.h>
#include <nvgpu/preempt.h>
#include <nvgpu/tsg_subctx.h>
#include "gr_priv.h"
@@ -140,22 +141,6 @@ static int nvgpu_gr_setup_validate_channel_and_class(struct gk20a *g,
return err;
}
static int nvgpu_gr_setup_alloc_subctx(struct gk20a *g, struct nvgpu_channel *c)
{
int err = 0;
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
if (c->subctx == NULL) {
c->subctx = nvgpu_gr_subctx_alloc(g, c->vm);
if (c->subctx == NULL) {
err = -ENOMEM;
}
}
}
return err;
}
int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num,
u32 flags)
{
@@ -165,6 +150,9 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num,
int err = 0;
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
struct nvgpu_gr_ctx_mappings *mappings = NULL;
#ifdef CONFIG_NVGPU_FECS_TRACE
struct nvgpu_gr_subctx *gr_subctx = NULL;
#endif
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr,
"GR%u: allocate object context for channel %u",
@@ -195,54 +183,53 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num,
return -EINVAL;
}
err = nvgpu_gr_setup_alloc_subctx(g, c);
nvgpu_mutex_acquire(&tsg->ctx_init_lock);
err = nvgpu_tsg_subctx_alloc_gr_subctx(g, c);
if (err != 0) {
nvgpu_err(g, "failed to allocate gr subctx buffer");
nvgpu_err(g, "failed to alloc gr subctx");
nvgpu_mutex_release(&tsg->ctx_init_lock);
goto out;
}
nvgpu_mutex_acquire(&tsg->ctx_init_lock);
err = nvgpu_tsg_subctx_setup_subctx_header(g, c);
if (err != 0) {
nvgpu_err(g, "failed to setup subctx header");
nvgpu_mutex_release(&tsg->ctx_init_lock);
goto out;
}
gr_ctx = tsg->gr_ctx;
mappings = nvgpu_gr_ctx_alloc_or_get_mappings(g, tsg, c->vm);
mappings = nvgpu_gr_ctx_alloc_or_get_mappings(g, tsg, c);
if (mappings == NULL) {
nvgpu_err(g, "fail to allocate/get ctx mappings struct");
nvgpu_mutex_release(&tsg->ctx_init_lock);
goto out;
}
if (!nvgpu_mem_is_valid(nvgpu_gr_ctx_get_ctx_mem(gr_ctx,
NVGPU_GR_CTX_CTX))) {
tsg->vm = c->vm;
nvgpu_vm_get(tsg->vm);
err = nvgpu_gr_obj_ctx_alloc(g, gr->golden_image,
gr->global_ctx_buffer, gr->gr_ctx_desc,
gr->config, gr_ctx, c->subctx,
mappings, &c->inst_block, class_num, flags,
c->cde, c->vpr);
if (err != 0) {
nvgpu_err(g,
"failed to allocate gr ctx buffer");
nvgpu_gr_ctx_free_mappings(g, gr_ctx);
nvgpu_mutex_release(&tsg->ctx_init_lock);
nvgpu_vm_put(tsg->vm);
tsg->vm = NULL;
goto out;
}
nvgpu_gr_ctx_set_tsgid(gr_ctx, tsg->tsgid);
} else {
/* commit gr ctx buffer */
nvgpu_gr_obj_ctx_commit_inst(g, &c->inst_block, gr_ctx,
c->subctx, mappings);
err = nvgpu_gr_obj_ctx_alloc(g, gr->golden_image,
gr->global_ctx_buffer, gr->gr_ctx_desc,
gr->config, gr_ctx, c->subctx,
mappings, &c->inst_block, class_num, flags,
c->cde, c->vpr);
if (err != 0) {
nvgpu_err(g,
"failed to allocate gr ctx buffer");
nvgpu_mutex_release(&tsg->ctx_init_lock);
goto out;
}
nvgpu_gr_ctx_set_tsgid(gr_ctx, tsg->tsgid);
#ifdef CONFIG_NVGPU_FECS_TRACE
if (g->ops.gr.fecs_trace.bind_channel && !c->vpr) {
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
gr_subctx = nvgpu_tsg_subctx_get_gr_subctx(c->subctx);
}
err = g->ops.gr.fecs_trace.bind_channel(g, &c->inst_block,
c->subctx, gr_ctx, mappings, tsg->tgid, 0);
gr_subctx, gr_ctx, mappings, tsg->tgid, 0);
if (err != 0) {
nvgpu_warn(g,
"fail to bind channel for ctxsw trace");
@@ -274,11 +261,6 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num,
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
return 0;
out:
if (c->subctx != NULL) {
nvgpu_gr_subctx_free(g, c->subctx, c->vm);
c->subctx = NULL;
}
/* 1. gr_ctx, patch_ctx and global ctx buffer mapping
can be reused so no need to release them.
2. golden image init and load is a one time thing so if
@@ -320,13 +302,12 @@ void nvgpu_gr_setup_free_subctx(struct nvgpu_channel *c)
return;
}
if (c->subctx != NULL) {
nvgpu_gr_subctx_free(c->g, c->subctx, c->vm);
c->subctx = NULL;
}
nvgpu_gr_subctx_free(c->g, c->subctx, c->vm, true);
nvgpu_log_fn(c->g, "done");
}
static bool nvgpu_gr_setup_validate_preemption_mode(u32 *graphics_preempt_mode,
bool nvgpu_gr_setup_validate_preemption_mode(u32 *graphics_preempt_mode,
u32 *compute_preempt_mode,
struct nvgpu_gr_ctx *gr_ctx)
{
@@ -383,9 +364,19 @@ int nvgpu_gr_setup_set_preemption_mode(struct nvgpu_channel *ch,
gr_ctx = tsg->gr_ctx;
nvgpu_mutex_acquire(&tsg->ctx_init_lock);
g->ops.tsg.disable(tsg);
err = nvgpu_preempt_channel(g, ch);
if (err != 0) {
nvgpu_err(g, "failed to preempt channel/TSG");
goto enable_ch;
}
if (nvgpu_gr_setup_validate_preemption_mode(&graphics_preempt_mode,
&compute_preempt_mode, gr_ctx) == false) {
return 0;
goto enable_ch;
}
nvgpu_log(g, gpu_dbg_gr | gpu_dbg_sched, "chid=%d tsgid=%d pid=%d "
@@ -398,13 +389,14 @@ int nvgpu_gr_setup_set_preemption_mode(struct nvgpu_channel *ch,
graphics_preempt_mode, compute_preempt_mode);
if (err != 0) {
nvgpu_err(g, "set_ctxsw_preemption_mode failed");
return err;
goto enable_ch;
}
mappings = nvgpu_gr_ctx_get_mappings(tsg);
mappings = nvgpu_gr_ctx_get_mappings(tsg, ch);
if (mappings == NULL) {
nvgpu_err(g, "failed to get gr_ctx mappings");
return -EINVAL;
err = -EINVAL;
goto enable_ch;
}
#ifdef CONFIG_NVGPU_GFXP
@@ -412,29 +404,21 @@ int nvgpu_gr_setup_set_preemption_mode(struct nvgpu_channel *ch,
gr->gr_ctx_desc, gr_ctx);
if (err != 0) {
nvgpu_err(g, "fail to allocate ctx preemption buffers");
return err;
goto enable_ch;
}
err = nvgpu_gr_ctx_mappings_map_ctx_preemption_buffers(g,
gr_ctx, mappings);
gr_ctx, ch->subctx, mappings);
if (err != 0) {
nvgpu_err(g, "fail to map ctx preemption buffers");
return err;
}
#endif
g->ops.tsg.disable(tsg);
err = nvgpu_preempt_channel(g, ch);
if (err != 0) {
nvgpu_err(g, "failed to preempt channel/TSG");
goto enable_ch;
}
#endif
nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(g, gr->config, gr_ctx,
ch->subctx, mappings);
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
if (nvgpu_gr_obj_ctx_is_gfx_engine(g, ch->subctx)) {
nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true);
g->ops.gr.init.commit_global_cb_manager(g, gr->config, gr_ctx,
true);
@@ -443,9 +427,12 @@ int nvgpu_gr_setup_set_preemption_mode(struct nvgpu_channel *ch,
g->ops.tsg.enable(tsg);
nvgpu_mutex_release(&tsg->ctx_init_lock);
return err;
enable_ch:
g->ops.tsg.enable(tsg);
nvgpu_mutex_release(&tsg->ctx_init_lock);
return err;
}