gpu: nvgpu: multiple address spaces support for subcontexts

This patch introduces following relationships among various nvgpu objects to support multiple address spaces with subcontexts. IOCTLs setting the relationships are shown in the braces. nvgpu_tsg 1<---->n nvgpu_tsg_subctx (TSG_BIND_CHANNEL_EX) nvgpu_tsg 1<---->n nvgpu_gr_ctx_mappings (ALLOC_OBJ_CTX) nvgpu_tsg_subctx 1<---->1 nvgpu_gr_subctx (ALLOC_OBJ_CTX) nvgpu_tsg_subctx 1<---->n nvgpu_channel (TSG_BIND_CHANNEL_EX) nvgpu_gr_ctx_mappings 1<---->n nvgpu_gr_subctx (ALLOC_OBJ_CTX) nvgpu_gr_ctx_mappings 1<---->1 vm_gk20a (ALLOC_OBJ_CTX) On unbinding the channel, objects are deleted according to dependencies. Without subcontexts, gr_ctx buffers mappings are maintained in the struct nvgpu_gr_ctx. For subcontexts, they are maintained in the struct nvgpu_gr_subctx. Preemption buffer with index NVGPU_GR_CTX_PREEMPT_CTXSW and PM buffer with index NVGPU_GR_CTX_PM_CTX are to be mapped in all subcontexts when they are programmed from respective ioctls. Global GR context buffers are to be programmed only for VEID0. Based on the channel object class the state is patched in the patch buffer in every ALLOC_OBJ_CTX call unlike setting it for only first channel like before. PM and preemptions buffers programming is protected under TSG ctx_init_lock. tsg->vm is now removed. VM reference for gr_ctx buffers mappings is managed through gr_ctx or gr_subctx mappings object. For vGPU, gr_subctx and mappings objects are created to reference VMs for the gr_ctx lifetime. The functions nvgpu_tsg_subctx_alloc_gr_subctx and nvgpu_tsg_- subctx_setup_subctx_header sets up the subcontext struct header for native driver. The function nvgpu_tsg_subctx_alloc_gr_subctx is called from vgpu to manage the gr ctx mapping references. free_subctx is now done when unbinding channel considering references to the subcontext by other channels. It will unmap the buffers in native driver case. It will just release the VM reference in vgpu case. Note that TEGRA_VGPU_CMD_FREE_CTX_HEADER ioctl is not called by vgpu any longer as it would be taken care by native driver. Bug 3677982 Change-Id: Ia439b251ff452a49f8514498832e24d04db86d2f Signed-off-by: Sagar Kamble <skamble@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2718760 Reviewed-by: Scott Long <scottl@nvidia.com> Reviewed-by: Ankur Kishore <ankkishore@nvidia.com> GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
2025-12-24 10:34:43 +03:00 · 2022-05-18 17:32:18 +05:30
parent 9e13b61d4e
commit f55fd5dc8c
37 changed files with 1963 additions and 404 deletions
--- a/userspace/units/fifo/tsg/nvgpu-tsg.c
+++ b/userspace/units/fifo/tsg/nvgpu-tsg.c
@@ -654,7 +654,6 @@ int test_tsg_release(struct unit_module *m,
 	struct nvgpu_fifo *f = &g->fifo;
 	struct gpu_ops gops = g->ops;
 	struct nvgpu_tsg *tsg = NULL;
-	struct vm_gk20a vm;
 	u32 branches = 0U;
 	int ret = UNIT_FAIL;
 	u32 free_gr_ctx_mask =
@@ -706,12 +705,6 @@ int test_tsg_release(struct unit_module *m,
 		if (branches & F_TSG_RELEASE_MEM) {
 			ret = nvgpu_gr_ctx_alloc_ctx_buffers(g, gr_ctx_desc, tsg->gr_ctx);
 			unit_assert(ret == UNIT_SUCCESS, goto done);
-			tsg->vm = &vm;
-			/* prevent nvgpu_vm_remove */
-			nvgpu_ref_init(&vm.ref);
-			nvgpu_ref_get(&vm.ref);
-		} else {
-			tsg->vm = NULL;
 		}

 		if ((branches & free_gr_ctx_mask) == free_gr_ctx_mask) {
@@ -755,7 +748,6 @@ int test_tsg_release(struct unit_module *m,

 		unit_assert(!f->tsg[tsg->tsgid].in_use, goto done);
 		unit_assert(tsg->gr_ctx == NULL, goto done);
-		unit_assert(tsg->vm == NULL, goto done);
 		unit_assert(tsg->sm_error_states == NULL, goto done);
 	}
 	ret = UNIT_SUCCESS;
--- a/userspace/units/gr/ctx/Makefile.tmk
+++ b/userspace/units/gr/ctx/Makefile.tmk
@@ -28,7 +28,8 @@ NVGPU_UNIT_NAME = nvgpu-gr-ctx
 NVGPU_UNIT_SRCS = nvgpu-gr-ctx.c

 NVGPU_UNIT_INTERFACE_DIRS := \
-	$(NV_COMPONENT_DIR)/..
+	$(NV_COMPONENT_DIR)/.. \
+	$(NV_COMPONENT_DIR)/../../fifo

 include $(NV_COMPONENT_DIR)/../../Makefile.units.common.tmk

--- a/userspace/units/gr/ctx/nvgpu-gr-ctx.c
+++ b/userspace/units/gr/ctx/nvgpu-gr-ctx.c
@@ -42,6 +42,8 @@
 #include "../nvgpu-gr.h"
 #include "nvgpu-gr-ctx.h"

+#include "../../fifo/nvgpu-fifo-common.h"
+
 #define DUMMY_SIZE	0xF0U

 static u64 nvgpu_gmmu_map_locked_stub(struct vm_gk20a *vm,
@@ -92,14 +94,24 @@ int test_gr_ctx_error_injection(struct unit_module *m,
 	u64 low_hole = SZ_4K * 16UL;
 	struct nvgpu_channel *channel = (struct nvgpu_channel *)
 		malloc(sizeof(struct nvgpu_channel));
-	struct nvgpu_tsg *tsg = (struct nvgpu_tsg *)
-		malloc(sizeof(struct nvgpu_tsg));
+	struct nvgpu_tsg *tsg;
 	u32 i;

-	if (channel == NULL || tsg == NULL) {
+	if (channel == NULL) {
 		unit_return_fail(m, "failed to allocate channel/tsg");
 	}

+	err = test_fifo_init_support(m, g, NULL);
+	if (err != 0) {
+		unit_return_fail(m, "failed to init fifo support\n");
+		return err;
+	}
+
+	tsg = nvgpu_tsg_open(g, 0);
+	if (!tsg) {
+		unit_return_fail(m, "failed to allocate tsg");
+	}
+
 	desc = nvgpu_gr_ctx_desc_alloc(g);
 	if (!desc) {
 		unit_return_fail(m, "failed to allocate memory");
@@ -147,7 +159,7 @@ int test_gr_ctx_error_injection(struct unit_module *m,

 	tsg->gr_ctx = gr_ctx;

-	mappings = nvgpu_gr_ctx_alloc_or_get_mappings(g, tsg, vm);
+	mappings = nvgpu_gr_ctx_alloc_or_get_mappings(g, tsg, channel);
 	if (mappings == NULL) {
 		unit_return_fail(m, "failed to allocate gr_ctx mappings");
 	}
@@ -179,7 +191,7 @@ int test_gr_ctx_error_injection(struct unit_module *m,
 	/* Inject kmem alloc failures to trigger mapping failures */
 	for (i = 0; i < 2; i++) {
 		nvgpu_posix_enable_fault_injection(kmem_fi, true, 2 * i);
-		err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx,
+		err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, NULL,
 					global_desc, mappings, false);
 		if (err == 0) {
 			unit_return_fail(m, "unexpected success");
@@ -188,8 +200,8 @@ int test_gr_ctx_error_injection(struct unit_module *m,
 	}

 	/* global ctx_desc size is not set. */
-	err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, global_desc,
-				       mappings, false);
+	err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, NULL,
+					global_desc, mappings, false);
 	if (err == 0) {
 		unit_return_fail(m, "unexpected success");
 	}
@@ -211,8 +223,8 @@ int test_gr_ctx_error_injection(struct unit_module *m,
 	/* Fail global ctx buffer mappings */
 	for (i = 0; i < 4; i++) {
 		nvgpu_posix_enable_fault_injection(kmem_fi, true, 4 + (2 * i));
-		err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, global_desc,
-					       mappings, false);
+		err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, NULL,
+						global_desc, mappings, false);
 		if (err == 0) {
 			unit_return_fail(m, "unexpected success");
 		}
@@ -221,8 +233,8 @@ int test_gr_ctx_error_injection(struct unit_module *m,


 	/* Successful mapping */
-	err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, global_desc,
-				       mappings, false);
+	err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, NULL,
+					global_desc, mappings, false);
 	if (err != 0) {
 		unit_return_fail(m, "failed to map global buffers");
 	}
@@ -253,6 +265,12 @@ int test_gr_ctx_error_injection(struct unit_module *m,
 	nvgpu_gr_ctx_desc_free(g, desc);
 	nvgpu_vm_put(g->mm.bar1.vm);

+	err = test_fifo_remove_support(m, g, NULL);
+	if (err != 0) {
+		unit_return_fail(m, "failed to remove fifo support\n");
+		return err;
+	}
+
 	return UNIT_SUCCESS;
 }

--- a/userspace/units/gr/intr/nvgpu-gr-intr.c
+++ b/userspace/units/gr/intr/nvgpu-gr-intr.c
@@ -37,7 +37,9 @@
 #include <nvgpu/runlist.h>
 #include <nvgpu/tsg.h>
 #include <nvgpu/class.h>
+#include <nvgpu/gr/ctx.h>
 #include <nvgpu/gr/gr_intr.h>
+#include <nvgpu/tsg_subctx.h>

 #include <nvgpu/hw/gv11b/hw_gr_gv11b.h>

@@ -264,12 +266,45 @@ static int gr_test_intr_cache_current_ctx(struct gk20a *g,
 	return g->ops.gr.intr.stall_isr(g);
 }

+static u64 nvgpu_gmmu_map_locked_stub(struct vm_gk20a *vm,
+			  u64 vaddr,
+			  struct nvgpu_sgt *sgt,
+			  u64 buffer_offset,
+			  u64 size,
+			  u32 pgsz_idx,
+			  u8 kind_v,
+			  u32 ctag_offset,
+			  u32 flags,
+			  enum gk20a_mem_rw_flag rw_flag,
+			  bool clear_ctags,
+			  bool sparse,
+			  bool priv,
+			  struct vm_gk20a_mapping_batch *batch,
+			  enum nvgpu_aperture aperture)
+{
+	return 1;
+}
+
+static void nvgpu_gmmu_unmap_locked_stub(struct vm_gk20a *vm,
+			     u64 vaddr,
+			     u64 size,
+			     u32 pgsz_idx,
+			     bool va_allocated,
+			     enum gk20a_mem_rw_flag rw_flag,
+			     bool sparse,
+			     struct vm_gk20a_mapping_batch *batch)
+{
+	return;
+}
+
 static int gr_test_intr_allocate_ch_tsg(struct unit_module *m,
 					struct gk20a *g)
 {
 	u32 tsgid = getpid();
+	struct nvgpu_gr_ctx_mappings *mappings = NULL;
 	struct nvgpu_channel *ch = NULL;
 	struct nvgpu_tsg *tsg = NULL;
+	struct vm_gk20a *vm = NULL;
 	bool sema_init, notify_init;
 	int err;

@@ -295,12 +330,46 @@ static int gr_test_intr_allocate_ch_tsg(struct unit_module *m,
 		goto ch_cleanup;
 	}

+	/* Setup VM */
+	vm = nvgpu_vm_init(g, SZ_4K, SZ_4K << 10,
+		nvgpu_safe_sub_u64(1ULL << 37, SZ_4K << 10),
+		(1ULL << 32), 0ULL,
+		false, false, false, "dummy");
+	if (!vm) {
+		unit_err(m, "failed to allocate VM");
+		goto ch_cleanup;
+	}
+
+	ch->g = g;
+	ch->vm = vm;
+
 	err = nvgpu_tsg_bind_channel(tsg, ch);
 	if (err != 0) {
 		unit_err(m, "failed tsg channel bind\n");
 		goto ch_cleanup;
 	}

+	g->ops.mm.gmmu.map = nvgpu_gmmu_map_locked_stub;
+	g->ops.mm.gmmu.unmap = nvgpu_gmmu_unmap_locked_stub;
+
+	err = nvgpu_tsg_subctx_alloc_gr_subctx(g, ch);
+	if (err != 0) {
+		unit_err(m, "failed to alloc gr subctx");
+		goto ch_cleanup;
+	}
+
+	err = nvgpu_tsg_subctx_setup_subctx_header(g, ch);
+	if (err != 0) {
+		unit_err(m, "failed to setup subctx header");
+		goto ch_cleanup;
+	}
+
+	mappings = nvgpu_gr_ctx_alloc_or_get_mappings(g, tsg, ch);
+	if (mappings == NULL) {
+		unit_err(m, "failed to allocate gr_ctx mappings");
+		goto ch_cleanup;
+	}
+
 	err = gr_test_intr_block_ptr_as_current_ctx(m, g, ch, tsg, tsgid);
 	if (err != 0) {
 		unit_err(m, "isr failed with block_ptr as current_ctx\n");
--- a/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.c
+++ b/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.c
@@ -37,6 +37,7 @@
 #include <nvgpu/gr/ctx.h>
 #include <nvgpu/gr/ctx_mappings.h>
 #include <nvgpu/gr/obj_ctx.h>
+#include <nvgpu/tsg_subctx.h>

 #include <nvgpu/posix/posix-fault-injection.h>
 #include <nvgpu/posix/dma.h>
@@ -119,7 +120,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	struct nvgpu_gr_global_ctx_buffer_desc *global_desc;
 	struct nvgpu_gr_ctx *gr_ctx = NULL;
 	struct nvgpu_gr_ctx_mappings *mappings = NULL;
-	struct nvgpu_gr_subctx *subctx = NULL;
+	struct nvgpu_tsg_subctx *subctx = NULL;
 	struct nvgpu_mem inst_block;
 	struct nvgpu_gr_config *config = nvgpu_gr_get_config_ptr(g);
 	struct nvgpu_posix_fault_inj *kmem_fi =
@@ -132,6 +133,8 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 		struct nvgpu_gr_config *config);
 	struct nvgpu_tsg *tsg = (struct nvgpu_tsg *)
 		malloc(sizeof(struct nvgpu_tsg));
+	struct nvgpu_channel *channel = (struct nvgpu_channel *)
+		malloc(sizeof(struct nvgpu_channel));

 	/* Inject allocation failures and initialize obj_ctx, should fail */
 	nvgpu_posix_enable_fault_injection(kmem_fi, true, 0);
@@ -196,16 +199,31 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 		unit_return_fail(m, "failed to allocate global buffers");
 	}

-	subctx = nvgpu_gr_subctx_alloc(g, vm);
-	if (!subctx) {
-		unit_return_fail(m, "failed to allocate subcontext");
+	channel->g = g;
+	channel->vm = vm;
+
+	err = nvgpu_tsg_subctx_bind_channel(tsg, channel);
+	if (err != 0) {
+		unit_return_fail(m, "tsg subctx bind failed");
 	}

-	mappings = nvgpu_gr_ctx_mappings_create(g, tsg, vm);
-	if (mappings == NULL) {
-		unit_return_fail(m, "failed to allocate gr_ctx mappings");
+	err = nvgpu_tsg_subctx_alloc_gr_subctx(g, channel);
+	if (err != 0) {
+		unit_return_fail(m, "failed to allocate gr_subctx");
 	}

+	err = nvgpu_tsg_subctx_setup_subctx_header(g, channel);
+	if (err != 0) {
+		unit_return_fail(m, "failed to setup subctx header");
+	}
+
+	mappings = nvgpu_gr_ctx_alloc_or_get_mappings(g, tsg, channel);
+	if (mappings == NULL) {
+		unit_return_fail(m, "failed to allocate or get mappings");
+	}
+
+	subctx = channel->subctx;
+
 	/* Fail gr_ctx allocation */
 	nvgpu_posix_enable_fault_injection(kmem_fi, true, 0);
 	err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc,
@@ -396,7 +414,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m,
 	}

 	/* Cleanup */
-	nvgpu_gr_subctx_free(g, subctx, vm);
+	nvgpu_tsg_subctx_unbind_channel(tsg, channel);
 	nvgpu_gr_ctx_free(g, gr_ctx, global_desc);
 	nvgpu_free_gr_ctx_struct(g, gr_ctx);
 	nvgpu_gr_ctx_desc_free(g, desc);
--- a/userspace/units/gr/setup/nvgpu-gr-setup.c
+++ b/userspace/units/gr/setup/nvgpu-gr-setup.c
@@ -209,12 +209,6 @@ static int gr_test_setup_allocate_ch_tsg(struct unit_module *m,
 		goto ch_cleanup;
 	}

-	err = nvgpu_tsg_bind_channel(tsg, ch);
-	if (err != 0) {
-		unit_err(m, "failed tsg channel bind\n");
-		goto ch_cleanup;
-	}
-
 	err = gk20a_as_alloc_share(g,
 		0U, NVGPU_AS_ALLOC_UNIFIED_VA,
 		U64(SZ_4K) << U64(10),
@@ -230,6 +224,12 @@ static int gr_test_setup_allocate_ch_tsg(struct unit_module *m,
 		goto tsg_unbind;
 	}

+	err = nvgpu_tsg_bind_channel(tsg, ch);
+	if (err != 0) {
+		unit_err(m, "failed tsg channel bind\n");
+		goto ch_cleanup;
+	}
+
 	gr_setup_ch = ch;
 	gr_setup_tsg = tsg;

@@ -574,7 +574,7 @@ static int gr_setup_alloc_no_tsg_subcontext(struct unit_module *m, struct gk20a

 static void gr_setup_fake_free_obj_ctx(struct unit_module *m, struct gk20a *g)
 {
-	struct nvgpu_gr_subctx *gr_subctx = gr_setup_ch->subctx;
+	struct nvgpu_tsg_subctx *gr_subctx = gr_setup_ch->subctx;

 	/* pass NULL variable*/
 	gr_setup_ch->subctx = NULL;