gpu: nvgpu: remove circular dependency between gr and fifo

channel.c calling nvgpu_gr_flush_channel_tlb() creating circular dependency between gr and fifo. Avoid this by moving channel tlb related data to struct nvgpu_gr_intr in gr_intr_priv.h and initialized this data in gr_intr.c. Created following new gr intr hal and called this new hal from channel.c void (*flush_channel_tlb)(struct gk20a *g); JIRA NVGPU-3214 Change-Id: I2d259bf52db967273030680f50065af94a17f417 Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2109274 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 10:34:43 +03:00 · 2019-04-30 20:30:43 -07:00
parent 7581601f80
commit 57be9a09fd
16 changed files with 111 additions and 39 deletions
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -398,7 +398,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
 		ch->usermode_submit_enabled = false;
 	}

-	nvgpu_gr_flush_channel_tlb(g);
+	g->ops.gr.intr.flush_channel_tlb(g);

 	nvgpu_dma_unmap_free(ch_vm, &ch->gpfifo.mem);
 	nvgpu_big_free(g, ch->gpfifo.pipe);
--- a/drivers/gpu/nvgpu/common/gr/gr.c
+++ b/drivers/gpu/nvgpu/common/gr/gr.c
@@ -25,6 +25,7 @@
 #include <nvgpu/unit.h>
 #include <nvgpu/gr/gr.h>
 #include <nvgpu/gr/config.h>
+#include <nvgpu/gr/gr_intr.h>
 #include <nvgpu/gr/zbc.h>
 #include <nvgpu/gr/zcull.h>
 #include <nvgpu/netlist.h>
@@ -151,7 +152,7 @@ int nvgpu_gr_suspend(struct gk20a *g)
 	/* disable all exceptions */
 	g->ops.gr.intr.enable_exceptions(g, g->gr->config, false);

-	nvgpu_gr_flush_channel_tlb(g);
+	g->ops.gr.intr.flush_channel_tlb(g);

 	g->gr->initialized = false;

@@ -159,15 +160,6 @@ int nvgpu_gr_suspend(struct gk20a *g)
 	return ret;
 }

-/* invalidate channel lookup tlb */
-void nvgpu_gr_flush_channel_tlb(struct gk20a *g)
-{
-	nvgpu_spinlock_acquire(&g->gr->ch_tlb_lock);
-	(void) memset(g->gr->chid_tlb, 0,
-		sizeof(struct gr_channel_map_tlb_entry) *
-		GR_CHANNEL_MAP_TLB_SIZE);
-	nvgpu_spinlock_release(&g->gr->ch_tlb_lock);
-}

 static int gr_init_setup_hw(struct gk20a *g)
 {
@@ -279,6 +271,9 @@ static void gr_remove_support(struct gk20a *g)
 	nvgpu_gr_falcon_remove_support(g, gr->falcon);
 	gr->falcon = NULL;

+	nvgpu_gr_intr_remove_support(g, gr->intr);
+	gr->intr = NULL;
+
 	nvgpu_gr_zbc_deinit(g, gr->zbc);
 	nvgpu_gr_zcull_deinit(g, gr->zcull);
 	nvgpu_gr_obj_ctx_deinit(g, gr->golden_image);
@@ -458,7 +453,11 @@ static int gr_init_setup_sw(struct gk20a *g)
 		goto clean_up;
 	}

-	nvgpu_spinlock_init(&gr->ch_tlb_lock);
+	gr->intr = nvgpu_gr_intr_init_support(g);
+	if (gr->intr == NULL) {
+		err = -ENOMEM;
+		goto clean_up;
+	}

 	gr->remove_support = gr_remove_support;
 	gr->sw_ready = true;
--- a/drivers/gpu/nvgpu/common/gr/gr_intr.c
+++ b/drivers/gpu/nvgpu/common/gr/gr_intr.c
@@ -236,7 +236,7 @@ struct channel_gk20a *nvgpu_gr_intr_get_channel_from_ctx(struct gk20a *g,
 			u32 curr_ctx, u32 *curr_tsgid)
 {
 	struct fifo_gk20a *f = &g->fifo;
-	struct nvgpu_gr *gr = g->gr;
+	struct nvgpu_gr_intr *intr = g->gr->intr;
 	u32 chid;
 	u32 tsgid = NVGPU_INVALID_TSG_ID;
 	u32 i;
@@ -248,13 +248,13 @@ struct channel_gk20a *nvgpu_gr_intr_get_channel_from_ctx(struct gk20a *g,
 	 * unloaded. No need to check ctx_valid bit
 	 */

-	nvgpu_spinlock_acquire(&gr->ch_tlb_lock);
+	nvgpu_spinlock_acquire(&intr->ch_tlb_lock);

 	/* check cache first */
 	for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
-		if (gr->chid_tlb[i].curr_ctx == curr_ctx) {
-			chid = gr->chid_tlb[i].chid;
-			tsgid = gr->chid_tlb[i].tsgid;
+		if (intr->chid_tlb[i].curr_ctx == curr_ctx) {
+			chid = intr->chid_tlb[i].chid;
+			tsgid = intr->chid_tlb[i].tsgid;
 			ret_ch = gk20a_channel_from_id(g, chid);
 			goto unlock;
 		}
@@ -284,25 +284,25 @@ struct channel_gk20a *nvgpu_gr_intr_get_channel_from_ctx(struct gk20a *g,

 	/* add to free tlb entry */
 	for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
-		if (gr->chid_tlb[i].curr_ctx == 0U) {
-			gr->chid_tlb[i].curr_ctx = curr_ctx;
-			gr->chid_tlb[i].chid = chid;
-			gr->chid_tlb[i].tsgid = tsgid;
+		if (intr->chid_tlb[i].curr_ctx == 0U) {
+			intr->chid_tlb[i].curr_ctx = curr_ctx;
+			intr->chid_tlb[i].chid = chid;
+			intr->chid_tlb[i].tsgid = tsgid;
 			goto unlock;
 		}
 	}

 	/* no free entry, flush one */
-	gr->chid_tlb[gr->channel_tlb_flush_index].curr_ctx = curr_ctx;
-	gr->chid_tlb[gr->channel_tlb_flush_index].chid = chid;
-	gr->chid_tlb[gr->channel_tlb_flush_index].tsgid = tsgid;
+	intr->chid_tlb[intr->channel_tlb_flush_index].curr_ctx = curr_ctx;
+	intr->chid_tlb[intr->channel_tlb_flush_index].chid = chid;
+	intr->chid_tlb[intr->channel_tlb_flush_index].tsgid = tsgid;

-	gr->channel_tlb_flush_index =
-		(gr->channel_tlb_flush_index + 1U) &
+	intr->channel_tlb_flush_index =
+		(intr->channel_tlb_flush_index + 1U) &
 		(GR_CHANNEL_MAP_TLB_SIZE - 1U);

 unlock:
-	nvgpu_spinlock_release(&gr->ch_tlb_lock);
+	nvgpu_spinlock_release(&intr->ch_tlb_lock);
 	if (curr_tsgid != NULL) {
 		*curr_tsgid = tsgid;
 	}
@@ -864,3 +864,41 @@ int nvgpu_gr_intr_stall_isr(struct gk20a *g)

 	return 0;
 }
+
+/* invalidate channel lookup tlb */
+void nvgpu_gr_intr_flush_channel_tlb(struct gk20a *g)
+{
+	struct nvgpu_gr_intr *intr = g->gr->intr;
+
+	nvgpu_spinlock_acquire(&intr->ch_tlb_lock);
+	(void) memset(intr->chid_tlb, 0,
+		sizeof(struct gr_channel_map_tlb_entry) *
+		GR_CHANNEL_MAP_TLB_SIZE);
+	nvgpu_spinlock_release(&intr->ch_tlb_lock);
+}
+
+struct nvgpu_gr_intr *nvgpu_gr_intr_init_support(struct gk20a *g)
+{
+	struct nvgpu_gr_intr *intr;
+
+	nvgpu_log_fn(g, " ");
+
+	intr = nvgpu_kzalloc(g, sizeof(*intr));
+	if (intr == NULL) {
+		return intr;
+	}
+
+	nvgpu_spinlock_init(&intr->ch_tlb_lock);
+
+	return intr;
+}
+
+void nvgpu_gr_intr_remove_support(struct gk20a *g, struct nvgpu_gr_intr *intr)
+{
+	nvgpu_log_fn(g, " ");
+
+	if (intr == NULL) {
+		return;
+	}
+	nvgpu_kfree(g, intr);
+}
--- a/drivers/gpu/nvgpu/common/gr/gr_intr_priv.h
+++ b/drivers/gpu/nvgpu/common/gr/gr_intr_priv.h
@@ -24,6 +24,7 @@
 #define NVGPU_GR_INTR_PRIV_H

 #include <nvgpu/types.h>
+#include <nvgpu/lock.h>

 struct channel_gk20a;

@@ -56,5 +57,20 @@ struct nvgpu_gr_isr_data {
 	u32 class_num;
 };

+struct gr_channel_map_tlb_entry {
+	u32 curr_ctx;
+	u32 chid;
+	u32 tsgid;
+};
+
+struct nvgpu_gr_intr {
+
+#define GR_CHANNEL_MAP_TLB_SIZE		2U /* must of power of 2 */
+	struct gr_channel_map_tlb_entry chid_tlb[GR_CHANNEL_MAP_TLB_SIZE];
+	u32 channel_tlb_flush_index;
+	struct nvgpu_spinlock ch_tlb_lock;
+
+};
+
 #endif /* NVGPU_GR_INTR_PRIV_H */

--- a/drivers/gpu/nvgpu/common/gr/gr_priv.h
+++ b/drivers/gpu/nvgpu/common/gr/gr_priv.h
@@ -35,12 +35,6 @@ struct nvgpu_gr_hwpm_map;
 struct nvgpu_gr_zcull;
 struct gk20a_cs_snapshot;

-struct gr_channel_map_tlb_entry {
-	u32 curr_ctx;
-	u32 chid;
-	u32 tsgid;
-};
-
 struct nvgpu_gr {
 	struct gk20a *g;

@@ -66,10 +60,7 @@ struct nvgpu_gr {

 	struct nvgpu_gr_falcon *falcon;

-#define GR_CHANNEL_MAP_TLB_SIZE		2U /* must of power of 2 */
-	struct gr_channel_map_tlb_entry chid_tlb[GR_CHANNEL_MAP_TLB_SIZE];
-	u32 channel_tlb_flush_index;
-	struct nvgpu_spinlock ch_tlb_lock;
+	struct nvgpu_gr_intr *intr;

 	void (*remove_support)(struct gk20a *g);
 	bool sw_ready;
--- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -27,6 +27,7 @@
 #include <nvgpu/error_notifier.h>
 #include <nvgpu/channel.h>
 #include <nvgpu/gr/gr.h>
+#include <nvgpu/gr/gr_intr.h>

 #include <nvgpu/vgpu/ce_vgpu.h>
 #include <nvgpu/vgpu/vm_vgpu.h>
@@ -358,6 +359,10 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 			.get_default_preemption_modes =
 				gp10b_gr_init_get_default_preemption_modes,
 		},
+
+		.intr = {
+			.flush_channel_tlb = nvgpu_gr_intr_flush_channel_tlb,
+		},
 	},
 	.class = {
 		.is_valid = gp10b_class_is_valid,
--- a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c
@@ -37,6 +37,7 @@
 #include <nvgpu/gr/global_ctx.h>
 #include <nvgpu/gr/ctx.h>
 #include <nvgpu/gr/config.h>
+#include <nvgpu/gr/gr_intr.h>
 #include <nvgpu/gr/gr_falcon.h>
 #include <nvgpu/gr/zbc.h>
 #include <nvgpu/gr/zcull.h>
@@ -55,6 +56,7 @@

 #include "common/gr/gr_config_priv.h"
 #include "common/gr/gr_falcon_priv.h"
+#include "common/gr/gr_intr_priv.h"
 #include "common/gr/ctx_priv.h"
 #include "common/gr/zcull_priv.h"
 #include "common/gr/zbc_priv.h"
@@ -711,6 +713,14 @@ static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)

 	gr->g = g;

+	if (gr->intr == NULL) {
+		gr->intr = nvgpu_gr_intr_init_support(g);
+		if (gr->intr == NULL) {
+			err = -ENOMEM;
+			goto clean_up;
+		}
+	}
+
 	if (gr->falcon == NULL) {
 		gr->falcon = nvgpu_gr_falcon_init_support(g);
 		if (gr->falcon == NULL) {
@@ -761,7 +771,7 @@ static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)
 	nvgpu_gr_ctx_set_size(gr->gr_ctx_desc, NVGPU_GR_CTX_PREEMPT_CTXSW,
 			nvgpu_gr_falcon_get_preempt_image_size(g->gr->falcon));

-	nvgpu_spinlock_init(&gr->ch_tlb_lock);
+	nvgpu_spinlock_init(&g->gr->intr->ch_tlb_lock);

 	gr->remove_support = vgpu_remove_gr_support;
 	gr->sw_ready = true;
--- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -86,6 +86,7 @@

 #include <nvgpu/gk20a.h>
 #include <nvgpu/gr/gr.h>
+#include <nvgpu/gr/gr_intr.h>
 #include <nvgpu/vgpu/vgpu.h>
 #include <nvgpu/error_notifier.h>

@@ -423,6 +424,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 			.handle_tpc_mpc_exception =
 					gv11b_gr_intr_handle_tpc_mpc_exception,
 			.handle_tex_exception = NULL,
+			.flush_channel_tlb = nvgpu_gr_intr_flush_channel_tlb,
 		},
 	},
 	.class = {
--- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
@@ -539,6 +539,7 @@ static const struct gpu_ops gm20b_ops = {
 			.handle_sm_exception =
 				nvgpu_gr_intr_handle_sm_exception,
 			.stall_isr = nvgpu_gr_intr_stall_isr,
+			.flush_channel_tlb = nvgpu_gr_intr_flush_channel_tlb,
 		},
 		.falcon = {
 			.read_fecs_ctxsw_mailbox =
--- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
@@ -601,6 +601,7 @@ static const struct gpu_ops gp10b_ops = {
 			.handle_sm_exception =
 				gp10b_gr_intr_handle_sm_exception,
 			.stall_isr = nvgpu_gr_intr_stall_isr,
+			.flush_channel_tlb = nvgpu_gr_intr_flush_channel_tlb,
 		},
 		.falcon = {
 			.read_fecs_ctxsw_mailbox =
--- a/drivers/gpu/nvgpu/hal/init/hal_gv100.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv100.c
@@ -749,6 +749,7 @@ static const struct gpu_ops gv100_ops = {
 			.handle_sm_exception =
 				nvgpu_gr_intr_handle_sm_exception,
 			.stall_isr = nvgpu_gr_intr_stall_isr,
+			.flush_channel_tlb = nvgpu_gr_intr_flush_channel_tlb,
 		},
 		.falcon = {
 			.handle_fecs_ecc_error =
--- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
@@ -723,6 +723,7 @@ static const struct gpu_ops gv11b_ops = {
 			.handle_sm_exception =
 				nvgpu_gr_intr_handle_sm_exception,
 			.stall_isr = nvgpu_gr_intr_stall_isr,
+			.flush_channel_tlb = nvgpu_gr_intr_flush_channel_tlb,
 		},
 		.falcon = {
 			.handle_fecs_ecc_error =
--- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
@@ -783,6 +783,7 @@ static const struct gpu_ops tu104_ops = {
 			.handle_sm_exception =
 				nvgpu_gr_intr_handle_sm_exception,
 			.stall_isr = nvgpu_gr_intr_stall_isr,
+			.flush_channel_tlb = nvgpu_gr_intr_flush_channel_tlb,
 		},
 		.falcon = {
 			.handle_fecs_ecc_error =
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -846,6 +846,7 @@ struct gpu_ops {
 				bool *post_event, struct channel_gk20a *fault_ch,
 				u32 *hww_global_esr);
 			int (*stall_isr)(struct gk20a *g);
+			void (*flush_channel_tlb)(struct gk20a *g);
 		} intr;

 		u32 (*get_ctxsw_checksum_mismatch_mailbox_val)(void);
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/gr.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/gr.h
@@ -35,7 +35,7 @@ int nvgpu_gr_init_support(struct gk20a *g);
 u32 nvgpu_gr_gpc_offset(struct gk20a *g, u32 gpc);
 u32 nvgpu_gr_tpc_offset(struct gk20a *g, u32 tpc);
 int nvgpu_gr_suspend(struct gk20a *g);
-void nvgpu_gr_flush_channel_tlb(struct gk20a *g);
+
 void nvgpu_gr_wait_initialized(struct gk20a *g);
 void nvgpu_gr_init(struct gk20a *g);
 int nvgpu_gr_alloc(struct gk20a *g);
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_intr.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_intr.h
@@ -30,6 +30,7 @@ struct channel_gk20a;
 struct nvgpu_gr_intr_info;
 struct nvgpu_gr_tpc_exception;
 struct nvgpu_gr_isr_data;
+struct nvgpu_gr_intr;

 int nvgpu_gr_intr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
 					struct nvgpu_gr_isr_data *isr_data);
@@ -50,4 +51,8 @@ int nvgpu_gr_intr_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
 		bool *post_event, struct channel_gk20a *fault_ch,
 		u32 *hww_global_esr);
 int nvgpu_gr_intr_stall_isr(struct gk20a *g);
+
+void nvgpu_gr_intr_flush_channel_tlb(struct gk20a *g);
+struct nvgpu_gr_intr *nvgpu_gr_intr_init_support(struct gk20a *g);
+void nvgpu_gr_intr_remove_support(struct gk20a *g, struct nvgpu_gr_intr *intr);
 #endif /* NVGPU_GR_INTR_H */