gpu: nvgpu: move nvgpu_report_gr_exception to common.gr.intr

Move the nvgpu_report_gr_exception call from gr_gk20a to gr_intr.c as nvgpu_gr_intr_report_exception Move local function gk20a_gr_get_channel_from_ctx to gr_intr.c as nvgpu_gr_intr_get_channel_from_ctx JIRA NVGPU-1891 Change-Id: I21521ad50989582d8f166a98a21ea3b1dcd3bbff Signed-off-by: Vinod G <vinodg@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2098229 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 02:22:34 +03:00 · 2019-04-15 17:49:04 -07:00
parent 3a764030b1
commit 3d2942e412
7 changed files with 142 additions and 148 deletions
--- a/drivers/gpu/nvgpu/common/gr/gr_intr.c
+++ b/drivers/gpu/nvgpu/common/gr/gr_intr.c
@@ -24,6 +24,7 @@
 #include <nvgpu/io.h>
 #include <nvgpu/channel.h>
 #include <nvgpu/regops.h>
+#include <nvgpu/nvgpu_err.h>

 #include <nvgpu/gr/gr.h>
 #include <nvgpu/gr/gr_intr.h>
@@ -104,6 +105,128 @@ static int gr_intr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc,
 	return ret;
 }

+/* Used by sw interrupt thread to translate current ctx to chid.
+ * Also used by regops to translate current ctx to chid and tsgid.
+ * For performance, we don't want to go through 128 channels every time.
+ * curr_ctx should be the value read from gr falcon get_current_ctx op
+ * A small tlb is used here to cache translation.
+ *
+ * Returned channel must be freed with gk20a_channel_put() */
+struct channel_gk20a *nvgpu_gr_intr_get_channel_from_ctx(struct gk20a *g,
+			u32 curr_ctx, u32 *curr_tsgid)
+{
+	struct fifo_gk20a *f = &g->fifo;
+	struct gr_gk20a *gr = &g->gr;
+	u32 chid;
+	u32 tsgid = NVGPU_INVALID_TSG_ID;
+	u32 i;
+	struct channel_gk20a *ret_ch = NULL;
+
+	/* when contexts are unloaded from GR, the valid bit is reset
+	 * but the instance pointer information remains intact.
+	 * This might be called from gr_isr where contexts might be
+	 * unloaded. No need to check ctx_valid bit
+	 */
+
+	nvgpu_spinlock_acquire(&gr->ch_tlb_lock);
+
+	/* check cache first */
+	for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
+		if (gr->chid_tlb[i].curr_ctx == curr_ctx) {
+			chid = gr->chid_tlb[i].chid;
+			tsgid = gr->chid_tlb[i].tsgid;
+			ret_ch = gk20a_channel_from_id(g, chid);
+			goto unlock;
+		}
+	}
+
+	/* slow path */
+	for (chid = 0; chid < f->num_channels; chid++) {
+		struct channel_gk20a *ch = gk20a_channel_from_id(g, chid);
+
+		if (ch == NULL) {
+			continue;
+		}
+
+		if (nvgpu_inst_block_ptr(g, &ch->inst_block) ==
+				g->ops.gr.falcon.get_ctx_ptr(curr_ctx)) {
+			tsgid = ch->tsgid;
+			/* found it */
+			ret_ch = ch;
+			break;
+		}
+		gk20a_channel_put(ch);
+	}
+
+	if (ret_ch == NULL) {
+		goto unlock;
+	}
+
+	/* add to free tlb entry */
+	for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
+		if (gr->chid_tlb[i].curr_ctx == 0U) {
+			gr->chid_tlb[i].curr_ctx = curr_ctx;
+			gr->chid_tlb[i].chid = chid;
+			gr->chid_tlb[i].tsgid = tsgid;
+			goto unlock;
+		}
+	}
+
+	/* no free entry, flush one */
+	gr->chid_tlb[gr->channel_tlb_flush_index].curr_ctx = curr_ctx;
+	gr->chid_tlb[gr->channel_tlb_flush_index].chid = chid;
+	gr->chid_tlb[gr->channel_tlb_flush_index].tsgid = tsgid;
+
+	gr->channel_tlb_flush_index =
+		(gr->channel_tlb_flush_index + 1U) &
+		(GR_CHANNEL_MAP_TLB_SIZE - 1U);
+
+unlock:
+	nvgpu_spinlock_release(&gr->ch_tlb_lock);
+	if (curr_tsgid != NULL) {
+		*curr_tsgid = tsgid;
+	}
+	return ret_ch;
+}
+
+void nvgpu_gr_intr_report_exception(struct gk20a *g, u32 inst,
+		u32 err_type, u32 status)
+{
+	int ret = 0;
+	struct channel_gk20a *ch;
+	struct gr_exception_info err_info;
+	struct gr_err_info info;
+	u32 tsgid, chid, curr_ctx;
+
+	if (g->ops.gr.err_ops.report_gr_err == NULL) {
+		return;
+	}
+
+	tsgid = NVGPU_INVALID_TSG_ID;
+	curr_ctx = g->ops.gr.falcon.get_current_ctx(g);
+	ch = nvgpu_gr_intr_get_channel_from_ctx(g, curr_ctx, &tsgid);
+	chid = ch != NULL ? ch->chid : FIFO_INVAL_CHANNEL_ID;
+	if (ch != NULL) {
+		gk20a_channel_put(ch);
+	}
+
+	(void) memset(&err_info, 0, sizeof(err_info));
+	(void) memset(&info, 0, sizeof(info));
+	err_info.curr_ctx = curr_ctx;
+	err_info.chid = chid;
+	err_info.tsgid = tsgid;
+	err_info.status = status;
+	info.exception_info = &err_info;
+	ret = g->ops.gr.err_ops.report_gr_err(g,
+			NVGPU_ERR_MODULE_PGRAPH, inst, err_type,
+			&info);
+	if (ret != 0) {
+		nvgpu_err(g, "Failed to report PGRAPH exception: "
+				"inst=%u, err_type=%u, status=%u",
+				inst, err_type, status);
+	}
+}
+
 int nvgpu_gr_intr_handle_gpc_exception(struct gk20a *g, bool *post_event,
 	struct nvgpu_gr_config *gr_config, struct channel_gk20a *fault_ch,
 	u32 *hww_global_esr)