gpu: nvgpu: Add support for multiple GPC/TPCs

Add support for multiple GPCs/TPCs to the GPC/TPC exception handling code. Change-Id: Ifb4b53a016e90cb54c4d985a9e17760f87c6046f Signed-off-by: Mayank Kaushik <mkaushik@nvidia.com> Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/411660 Reviewed-by: Automatic_Commit_Validation_User
2025-12-25 02:52:51 +03:00 · 2014-05-19 14:00:13 -07:00
parent 87373abc95
commit 04efcaf97e
2 changed files with 150 additions and 81 deletions
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -4127,13 +4127,16 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr)

 static void gk20a_gr_enable_gpc_exceptions(struct gk20a *g)
 {
-	/* enable tpc exception forwarding */
-	gk20a_writel(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r(),
-		gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f());
+	struct gr_gk20a *gr = &g->gr;
+	u32 tpc_mask;

-	/* enable gpc exception forwarding */
-	gk20a_writel(g, gr_gpc0_gpccs_gpc_exception_en_r(),
-		gr_gpc0_gpccs_gpc_exception_en_tpc_0_enabled_f());
+	gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(),
+			gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f());
+
+	tpc_mask =
+		gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->tpc_count) - 1);
+
+	gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), tpc_mask);
 }


@@ -4316,7 +4319,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
 	g->ops.gr.enable_hww_exceptions(g);
 	g->ops.gr.set_hww_esr_report_mask(g);

-	/* enable per GPC exceptions */
+	/* enable TPC exceptions per GPC */
 	gk20a_gr_enable_gpc_exceptions(g);

 	/* TBD: ECC for L1/SM */
@@ -5262,26 +5265,35 @@ unlock:
 	return chid;
 }

-static int gk20a_gr_lock_down_sm(struct gk20a *g, u32 global_esr_mask)
+static int gk20a_gr_lock_down_sm(struct gk20a *g,
+				 u32 gpc, u32 tpc, u32 global_esr_mask)
 {
 	unsigned long end_jiffies = jiffies +
 		msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
 	u32 delay = GR_IDLE_CHECK_DEFAULT;
 	bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g);
+	u32 offset =
+		proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc;
 	u32 dbgr_control0;

-	gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "locking down SM");
+	gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
+			"GPC%d TPC%d: locking down SM", gpc, tpc);

 	/* assert stop trigger */
-	dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
+	dbgr_control0 =
+		gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset);
 	dbgr_control0 |= gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f();
-	gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r(), dbgr_control0);
+	gk20a_writel(g,
+		gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);

 	/* wait for the sm to lock down */
 	do {
-		u32 global_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r());
-		u32 warp_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_warp_esr_r());
-		u32 dbgr_status0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_status0_r());
+		u32 global_esr = gk20a_readl(g,
+				gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
+		u32 warp_esr = gk20a_readl(g,
+				gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
+		u32 dbgr_status0 = gk20a_readl(g,
+				gr_gpc0_tpc0_sm_dbgr_status0_r() + offset);
 		bool locked_down =
 			(gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) ==
 			 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v());
@@ -5291,11 +5303,14 @@ static int gk20a_gr_lock_down_sm(struct gk20a *g, u32 global_esr_mask)
 			((global_esr & ~global_esr_mask) != 0);

 		if (locked_down || !error_pending) {
-			gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "locked down SM");
+			gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
+				  "GPC%d TPC%d: locked down SM", gpc, tpc);

 			/* de-assert stop trigger */
 			dbgr_control0 &= ~gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f();
-			gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r(), dbgr_control0);
+			gk20a_writel(g,
+				     gr_gpc0_tpc0_sm_dbgr_control0_r() + offset,
+				     dbgr_control0);

 			return 0;
 		}
@@ -5303,8 +5318,9 @@ static int gk20a_gr_lock_down_sm(struct gk20a *g, u32 global_esr_mask)
 		/* if an mmu fault is pending and mmu debug mode is not
 		 * enabled, the sm will never lock down. */
 		if (!mmu_debug_mode_enabled && gk20a_fifo_mmu_fault_pending(g)) {
-			gk20a_err(dev_from_gk20a(g), "mmu fault pending, sm will"
-				   " never lock down!");
+			gk20a_err(dev_from_gk20a(g),
+					"GPC%d TPC%d: mmu fault pending,"
+					" sm will never lock down!", gpc, tpc);
 			return -EFAULT;
 		}

@@ -5314,7 +5330,9 @@ static int gk20a_gr_lock_down_sm(struct gk20a *g, u32 global_esr_mask)
 	} while (time_before(jiffies, end_jiffies)
 			|| !tegra_platform_is_silicon());

-	gk20a_err(dev_from_gk20a(g), "timed out while trying to lock down SM");
+	gk20a_err(dev_from_gk20a(g),
+		  "GPC%d TPC%d: timed out while trying to lock down SM",
+		  gpc, tpc);

 	return -EAGAIN;
 }
@@ -5323,7 +5341,9 @@ bool gk20a_gr_sm_debugger_attached(struct gk20a *g)
 {
 	u32 dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());

-	/* check if an sm debugger is attached */
+	/* check if an sm debugger is attached.
+	 * assumption: all SMs will have debug mode enabled/disabled
+	 * uniformly. */
 	if (gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_v(dbgr_control0) ==
 			gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v())
 		return true;
@@ -5331,12 +5351,17 @@ bool gk20a_gr_sm_debugger_attached(struct gk20a *g)
 	return false;
 }

-static void gk20a_gr_clear_sm_hww(struct gk20a *g, u32 global_esr)
+static void gk20a_gr_clear_sm_hww(struct gk20a *g,
+				  u32 gpc, u32 tpc, u32 global_esr)
 {
-	gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r(), global_esr);
+	u32 offset = proj_gpc_stride_v() * gpc +
+		     proj_tpc_in_gpc_stride_v() * tpc;
+
+	gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
+			global_esr);

 	/* clear the warp hww */
-	gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r(),
+	gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset,
 			gr_gpc0_tpc0_sm_hww_warp_esr_error_none_f());
 }

@@ -5346,11 +5371,14 @@ channel_from_hw_chid(struct gk20a *g, u32 hw_chid)
 	return g->fifo.channel+hw_chid;
 }

-static int gk20a_gr_handle_sm_exception(struct gk20a *g,
-		struct gr_isr_data *isr_data)
+static int gk20a_gr_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
+		bool *post_event)
 {
 	int ret = 0;
 	bool do_warp_sync = false;
+	u32 offset = proj_gpc_stride_v() * gpc +
+		     proj_tpc_in_gpc_stride_v() * tpc;
+
 	/* these three interrupts don't require locking down the SM. They can
 	 * be handled by usermode clients as they aren't fatal. Additionally,
 	 * usermode clients may wish to allow some warps to execute while others
@@ -5361,75 +5389,112 @@ static int gk20a_gr_handle_sm_exception(struct gk20a *g,
 			  gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
 	u32 global_esr, warp_esr;
 	bool sm_debugger_attached = gk20a_gr_sm_debugger_attached(g);
-	struct channel_gk20a *fault_ch;

 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");

-	global_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r());
-	warp_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_warp_esr_r());
+	global_esr = gk20a_readl(g,
+				 gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
+	warp_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);

 	/* if an sm debugger is attached, disable forwarding of tpc exceptions.
 	 * the debugger will reenable exceptions after servicing them. */
 	if (sm_debugger_attached) {
-		u32 tpc_exception_en = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r());
+		u32 tpc_exception_en = gk20a_readl(g,
+				gr_gpc0_tpc0_tpccs_tpc_exception_en_r() +
+				offset);
 		tpc_exception_en &= ~gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f();
-		gk20a_writel(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r(), tpc_exception_en);
+		gk20a_writel(g,
+			     gr_gpc0_tpc0_tpccs_tpc_exception_en_r() + offset,
+			     tpc_exception_en);
 		gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "SM debugger attached");
 	}

 	/* if a debugger is present and an error has occurred, do a warp sync */
-	if (sm_debugger_attached && ((warp_esr != 0) || ((global_esr & ~global_mask) != 0))) {
+	if (sm_debugger_attached &&
+	    ((warp_esr != 0) || ((global_esr & ~global_mask) != 0))) {
 		gk20a_dbg(gpu_dbg_intr, "warp sync needed");
 		do_warp_sync = true;
 	}

 	if (do_warp_sync) {
-		ret = gk20a_gr_lock_down_sm(g, global_mask);
+		ret = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask);
 		if (ret) {
 			gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n");
 			return ret;
 		}
 	}

-	/* finally, signal any client waiting on an event */
-	fault_ch = channel_from_hw_chid(g, isr_data->chid);
-	if (fault_ch)
-		gk20a_dbg_gpu_post_events(fault_ch);
+	*post_event |= true;

 	return ret;
 }

-static int gk20a_gr_handle_tpc_exception(struct gk20a *g,
-		struct gr_isr_data *isr_data)
+static int gk20a_gr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc,
+		bool *post_event)
 {
 	int ret = 0;
-	u32 tpc_exception = gk20a_readl(g, gr_gpcs_tpcs_tpccs_tpc_exception_r());
+	u32 offset = proj_gpc_stride_v() * gpc +
+		     proj_tpc_in_gpc_stride_v() * tpc;
+	u32 tpc_exception = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_r()
+			+ offset);

 	gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "");

-	/* check if an sm exeption is pending  */
-	if (gr_gpcs_tpcs_tpccs_tpc_exception_sm_v(tpc_exception) ==
-			gr_gpcs_tpcs_tpccs_tpc_exception_sm_pending_v()) {
-		gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "SM exception pending");
-		ret = gk20a_gr_handle_sm_exception(g, isr_data);
+	/* check if an sm exeption is pending */
+	if (gr_gpc0_tpc0_tpccs_tpc_exception_sm_v(tpc_exception) ==
+			gr_gpc0_tpc0_tpccs_tpc_exception_sm_pending_v()) {
+		gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
+				"GPC%d TPC%d: SM exception pending", gpc, tpc);
+		ret = gk20a_gr_handle_sm_exception(g, gpc, tpc, post_event);
 	}

 	return ret;
 }

-static int gk20a_gr_handle_gpc_exception(struct gk20a *g,
-		struct gr_isr_data *isr_data)
+static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event)
 {
 	int ret = 0;
-	u32 gpc_exception = gk20a_readl(g, gr_gpcs_gpccs_gpc_exception_r());
+	u32 gpc_offset, tpc_offset, gpc, tpc;
+	struct gr_gk20a *gr = &g->gr;
+	u32 exception1 = gk20a_readl(g, gr_exception1_r());
+	u32 gpc_exception, global_esr;

 	gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "");

-	/* check if tpc 0 has an exception */
-	if (gr_gpcs_gpccs_gpc_exception_tpc_v(gpc_exception) ==
-			gr_gpcs_gpccs_gpc_exception_tpc_0_pending_v()) {
-		gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "TPC exception pending");
-		ret = gk20a_gr_handle_tpc_exception(g, isr_data);
+	for (gpc = 0; gpc < gr->gpc_count; gpc++) {
+		if ((exception1 & (1 << gpc)) == 0)
+			continue;
+
+		gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
+				"GPC%d exception pending", gpc);
+
+		gpc_offset = proj_gpc_stride_v() * gpc;
+
+		gpc_exception = gk20a_readl(g, gr_gpc0_gpccs_gpc_exception_r()
+				+ gpc_offset);
+
+		/* check if any tpc has an exception */
+		for (tpc = 0; tpc < gr->tpc_count; tpc++) {
+			if ((gr_gpc0_gpccs_gpc_exception_tpc_v(gpc_exception) &
+				(1 << tpc)) == 0)
+				continue;
+
+			gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
+				  "GPC%d: TPC%d exception pending", gpc, tpc);
+
+			tpc_offset = proj_tpc_in_gpc_stride_v() * tpc;
+
+			global_esr = gk20a_readl(g,
+					gr_gpc0_tpc0_sm_hww_global_esr_r() +
+					gpc_offset + tpc_offset);
+
+			ret = gk20a_gr_handle_tpc_exception(g, gpc, tpc,
+					post_event);
+
+			/* clear the hwws, also causes tpc and gpc
+			 * exceptions to be cleared */
+			gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr);
+		}
 	}

 	return ret;
@@ -5569,8 +5634,7 @@ int gk20a_gr_isr(struct gk20a *g)

 		/* check if a gpc exception has occurred */
 		if (exception & gr_exception_gpc_m() && need_reset == 0) {
-			u32 exception1 = gk20a_readl(g, gr_exception1_r());
-			u32 global_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r());
+			struct channel_gk20a *fault_ch;

 			gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "GPC exception pending");

@@ -5580,12 +5644,17 @@ int gk20a_gr_isr(struct gk20a *g)
 					   "SM debugger not attached, clearing interrupt");
 				need_reset |= -EFAULT;
 			} else {
-				/* check if gpc 0 has an exception */
-				if (exception1 & gr_exception1_gpc_0_pending_f())
-					need_reset |= gk20a_gr_handle_gpc_exception(g, &isr_data);
-				/* clear the hwws, also causes tpc and gpc
-				 * exceptions to be cleared */
-				gk20a_gr_clear_sm_hww(g, global_esr);
+				bool post_event = false;
+
+				/* check if any gpc has an exception */
+				need_reset |= gk20a_gr_handle_gpc_exception(g,
+						&post_event);
+
+				/* signal clients waiting on an event */
+				fault_ch = channel_from_hw_chid(g,
+								isr_data.chid);
+				if (post_event && fault_ch)
+					gk20a_dbg_gpu_post_events(fault_ch);
 			}

 			if (need_reset)
--- a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -2790,6 +2790,14 @@ static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_single_step_complet
 {
 	return 0x40;
 }
+static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_en_r(void)
+{
+	return 0x00419d0c;
+}
+static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f(void)
+{
+	return 0x2;
+}
 static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_r(void)
 {
 	return 0x0050450c;
@@ -2798,43 +2806,35 @@ static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f(void)
 {
 	return 0x2;
 }
-static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_disabled_f(void)
+static inline u32 gr_gpcs_gpccs_gpc_exception_en_r(void)
 {
-	return 0x0;
+	return 0x0041ac94;
 }
-static inline u32 gr_gpc0_gpccs_gpc_exception_en_r(void)
+static inline u32 gr_gpcs_gpccs_gpc_exception_en_tpc_f(u32 v)
 {
-	return 0x00502c94;
+	return (v & 0xff) << 16;
 }
-static inline u32 gr_gpc0_gpccs_gpc_exception_en_tpc_0_enabled_f(void)
+static inline u32 gr_gpc0_gpccs_gpc_exception_r(void)
 {
-	return 0x10000;
+	return 0x00502c90;
 }
-static inline u32 gr_gpc0_gpccs_gpc_exception_en_tpc_0_disabled_f(void)
-{
-	return 0x0;
-}
-static inline u32 gr_gpcs_gpccs_gpc_exception_r(void)
-{
-	return 0x0041ac90;
-}
-static inline u32 gr_gpcs_gpccs_gpc_exception_tpc_v(u32 r)
+static inline u32 gr_gpc0_gpccs_gpc_exception_tpc_v(u32 r)
 {
 	return (r >> 16) & 0xff;
 }
-static inline u32 gr_gpcs_gpccs_gpc_exception_tpc_0_pending_v(void)
+static inline u32 gr_gpc0_gpccs_gpc_exception_tpc_0_pending_v(void)
 {
 	return 0x00000001;
 }
-static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_r(void)
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_r(void)
 {
-	return 0x00419d08;
+	return 0x00504508;
 }
-static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_sm_v(u32 r)
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_sm_v(u32 r)
 {
 	return (r >> 1) & 0x1;
 }
-static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_sm_pending_v(void)
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_sm_pending_v(void)
 {
 	return 0x00000001;
 }