gpu: nvgpu: Split L2 interrupt handling to MC and L2

L2 interrupt is processed by first reading from MC which L2 triggered the interrupt and then calling a function per L2 slice to get the details. Move the outer loop to MC unit, and the inner loop and L2 accesses to LTC unit. JIRA NVGPU-954 Change-Id: I69b7bb82e4574b0519cdcd73b94d7d3e3fa6ef9e Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1851328 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-23 09:57:08 +03:00 · 2018-09-28 15:04:15 -07:00
parent bccb1690c5
commit bc379d5eed
21 changed files with 90 additions and 75 deletions
--- a/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.c
+++ b/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.c
@@ -32,7 +32,6 @@
 #include <nvgpu/utils.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/hw/gm20b/hw_mc_gm20b.h>
 #include <nvgpu/hw/gm20b/hw_ltc_gm20b.h>
 #include <nvgpu/hw/gm20b/hw_top_gm20b.h>
 #include <nvgpu/hw/gm20b/hw_pri_ringmaster_gm20b.h>
@@ -238,30 +237,29 @@ void gm20b_ltc_init_fs_state(struct gk20a *g)
 	gk20a_writel(g, ltc_ltcs_ltss_intr_r(), reg);
 }
-void gm20b_ltc_isr(struct gk20a *g)
+void gm20b_ltc_lts_isr(struct gk20a *g, unsigned int ltc, unsigned int slice)
 {
-	u32 mc_intr, ltc_intr;
+	u32 ltc_intr;
 	unsigned int ltc, slice;
 	u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
 	u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
-	mc_intr = gk20a_readl(g, mc_intr_ltc_r());
+	ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() +
-	nvgpu_err(g, "mc_ltc_intr: %08x", mc_intr);
+			   ltc_stride * ltc +
-	for (ltc = 0; ltc < g->ltc_count; ltc++) {
+			   lts_stride * slice);
-		if ((mc_intr & 1U << ltc) == 0) {
+	nvgpu_err(g, "ltc%d, slice %d: %08x",
-			continue;
+		  ltc, slice, ltc_intr);
-		}
+	gk20a_writel(g, ltc_ltc0_lts0_intr_r() +
-		for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
+			   ltc_stride * ltc +
-			ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() +
+			   lts_stride * slice,
-					   ltc_stride * ltc +
+		     ltc_intr);
-					   lts_stride * slice);
+}
-			nvgpu_err(g, "ltc%d, slice %d: %08x",
+
-				  ltc, slice, ltc_intr);
+void gm20b_ltc_isr(struct gk20a *g, unsigned int ltc)
-			gk20a_writel(g, ltc_ltc0_lts0_intr_r() +
+{
-					   ltc_stride * ltc +
+	unsigned int slice;
-					   lts_stride * slice,
+
-				     ltc_intr);
+	for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
-		}
+		gm20b_ltc_lts_isr(g, ltc, slice);
 	}
 }
--- a/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.h
+++ b/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.h
@@ -46,7 +46,8 @@ void gm20b_ltc_set_enabled(struct gk20a *g, bool enabled);
 void gm20b_ltc_init_fs_state(struct gk20a *g);
 int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
 		       u32 min, u32 max);
-void gm20b_ltc_isr(struct gk20a *g);
+void gm20b_ltc_isr(struct gk20a *g, unsigned int ltc);
 void gm20b_ltc_lts_isr(struct gk20a *g, unsigned int ltc, unsigned int slice);
 u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base);
 void gm20b_flush_ltc(struct gk20a *g);
 int gm20b_ltc_alloc_phys_cbc(struct gk20a *g,
--- a/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c
@@ -31,7 +31,6 @@
 #include <nvgpu/timers.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/hw/gp10b/hw_mc_gp10b.h>
 #include <nvgpu/hw/gp10b/hw_ltc_gp10b.h>
 #include "ltc_gm20b.h"
@@ -232,8 +231,7 @@ out:
 	return err;
 }
-void gp10b_ltc_lts_isr(struct gk20a *g,
+void gp10b_ltc_lts_isr(struct gk20a *g,	unsigned int ltc, unsigned int slice)
 		unsigned int ltc, unsigned int slice)
 {
 	u32 offset;
 	u32 ltc_intr;
@@ -289,20 +287,12 @@ void gp10b_ltc_lts_isr(struct gk20a *g,
 		ltc_intr);
 }
-void gp10b_ltc_isr(struct gk20a *g)
+void gp10b_ltc_isr(struct gk20a *g, unsigned int ltc)
 {
-	u32 mc_intr;
+	unsigned int slice;
 	unsigned int ltc, slice;
-	mc_intr = gk20a_readl(g, mc_intr_ltc_r());
+	for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
-	nvgpu_err(g, "mc_ltc_intr: %08x", mc_intr);
+		gp10b_ltc_lts_isr(g, ltc, slice);
 	for (ltc = 0; ltc < g->ltc_count; ltc++) {
 		if ((mc_intr & 1U << ltc) == 0) {
 			continue;
 		}
 		for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
 			gp10b_ltc_lts_isr(g, ltc, slice);
 		}
 	}
 }
--- a/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.h
+++ b/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.h
@@ -25,14 +25,12 @@
 struct gk20a;
 struct gpu_ops;
 void gp10b_ltc_isr(struct gk20a *g);
 int gp10b_determine_L2_size_bytes(struct gk20a *g);
 int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr);
 void gp10b_ltc_init_fs_state(struct gk20a *g);
 int gp10b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
 		       u32 min, u32 max);
 void gp10b_ltc_set_enabled(struct gk20a *g, bool enabled);
-void gp10b_ltc_lts_isr(struct gk20a *g,
+void gp10b_ltc_isr(struct gk20a *g, unsigned int ltc);
-		unsigned int ltc, unsigned int slice);
+void gp10b_ltc_lts_isr(struct gk20a *g, unsigned int ltc, unsigned int slice);
 #endif
--- a/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c
+++ b/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c
@@ -106,8 +106,7 @@ void gv11b_ltc_intr_en_illegal_compstat(struct gk20a *g, bool enable)
 	gk20a_writel(g, ltc_ltcs_ltss_intr_r(), val);
 }
-void gv11b_ltc_lts_isr(struct gk20a *g,
+void gv11b_ltc_lts_isr(struct gk20a *g, unsigned int ltc, unsigned int slice)
 		unsigned int ltc, unsigned int slice)
 {
 	u32 offset;
 	u32 ltc_intr3;
@@ -204,19 +203,11 @@ void gv11b_ltc_lts_isr(struct gk20a *g,
 	gp10b_ltc_lts_isr(g, ltc, slice);
 }
-void gv11b_ltc_isr(struct gk20a *g)
+void gv11b_ltc_isr(struct gk20a *g, unsigned int ltc)
 {
-	u32 mc_intr;
+	unsigned int slice;
 	unsigned int ltc, slice;
-	mc_intr = gk20a_readl(g, mc_intr_ltc_r());
+	for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
-	for (ltc = 0; ltc < g->ltc_count; ltc++) {
+		gv11b_ltc_lts_isr(g, ltc, slice);
 		if ((mc_intr & 1U << ltc) == 0) {
 			continue;
 		}
 		for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
 			gv11b_ltc_lts_isr(g, ltc, slice);
 		}
 	}
 }
--- a/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.h
+++ b/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.h
@@ -29,8 +29,7 @@ void gv11b_ltc_set_zbc_stencil_entry(struct gk20a *g,
 					  u32 index);
 void gv11b_ltc_init_fs_state(struct gk20a *g);
 void gv11b_ltc_intr_en_illegal_compstat(struct gk20a *g, bool enable);
-void gv11b_ltc_isr(struct gk20a *g);
+void gv11b_ltc_isr(struct gk20a *g, unsigned int ltc);
-void gv11b_ltc_lts_isr(struct gk20a *g,
+void gv11b_ltc_lts_isr(struct gk20a *g, unsigned int ltc, unsigned int slice);
 		unsigned int ltc, unsigned int slice);
 #endif
--- a/drivers/gpu/nvgpu/common/ltc/ltc_tu104.c
+++ b/drivers/gpu/nvgpu/common/ltc/ltc_tu104.c
@@ -229,15 +229,3 @@ out:
 	nvgpu_mutex_release(&g->mm.l2_op_lock);
 	return err;
 }
 void tu104_ltc_isr(struct gk20a *g)
 {
 	unsigned int ltc, slice;
 	/* Go through all the LTCs explicitly */
 	for (ltc = 0; ltc < g->ltc_count; ltc++) {
 		for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
 			gv11b_ltc_lts_isr(g, ltc, slice);
 		}
 	}
 }
--- a/drivers/gpu/nvgpu/common/ltc/ltc_tu104.h
+++ b/drivers/gpu/nvgpu/common/ltc/ltc_tu104.h
@@ -29,7 +29,6 @@ enum gk20a_cbc_op;
 struct gk20a;
 struct gr_gk20a;
 void tu104_ltc_isr(struct gk20a *g);
 u64 ltc_tu104_get_cbc_base_divisor(struct gk20a *g);
 void ltc_tu104_init_fs_state(struct gk20a *g);
 int ltc_tu104_init_comptags(struct gk20a *g, struct gr_gk20a *gr);
--- a/drivers/gpu/nvgpu/common/mc/mc_gm20b.c
+++ b/drivers/gpu/nvgpu/common/mc/mc_gm20b.c
@@ -75,7 +75,7 @@ void gm20b_mc_isr_stall(struct gk20a *g)
 		g->ops.priv_ring.isr(g);
 	}
 	if ((mc_intr_0 & mc_intr_ltc_pending_f()) != 0U) {
-		g->ops.ltc.isr(g);
+		g->ops.mc.ltc_isr(g);
 	}
 	if ((mc_intr_0 & mc_intr_pbus_pending_f()) != 0U) {
 		g->ops.bus.isr(g);
@@ -341,3 +341,18 @@ void gm20b_mc_fb_reset(struct gk20a *g)
 		| mc_elpg_enable_hub_enabled_f();
 	gk20a_writel(g, mc_elpg_enable_r(), val);
 }
 void gm20b_mc_ltc_isr(struct gk20a *g)
 {
 	u32 mc_intr;
 	unsigned int ltc;
 	mc_intr = gk20a_readl(g, mc_intr_ltc_r());
 	nvgpu_err(g, "mc_ltc_intr: %08x", mc_intr);
 	for (ltc = 0; ltc < g->ltc_count; ltc++) {
 		if ((mc_intr & 1U << ltc) == 0) {
 			continue;
 		}
 		g->ops.ltc.isr(g, ltc);
 	}
 }
--- a/drivers/gpu/nvgpu/common/mc/mc_gm20b.h
+++ b/drivers/gpu/nvgpu/common/mc/mc_gm20b.h
@@ -50,5 +50,6 @@ void gm20b_mc_handle_intr_nonstall(struct gk20a *g, u32 ops);
 u32 gm20b_mc_reset_mask(struct gk20a *g, enum nvgpu_unit unit);
 bool gm20b_mc_is_enabled(struct gk20a *g, enum nvgpu_unit unit);
 void gm20b_mc_fb_reset(struct gk20a *g);
 void gm20b_mc_ltc_isr(struct gk20a *g);
 #endif /* NVGPU_MC_GM20B_H */
--- a/drivers/gpu/nvgpu/common/mc/mc_gp10b.c
+++ b/drivers/gpu/nvgpu/common/mc/mc_gp10b.c
@@ -135,7 +135,7 @@ void mc_gp10b_isr_stall(struct gk20a *g)
 		g->ops.priv_ring.isr(g);
 	}
 	if ((mc_intr_0 & mc_intr_ltc_pending_f()) != 0U) {
-		g->ops.ltc.isr(g);
+		g->ops.mc.ltc_isr(g);
 	}
 	if ((mc_intr_0 & mc_intr_pbus_pending_f()) != 0U) {
 		g->ops.bus.isr(g);
@@ -222,3 +222,18 @@ void mc_gp10b_log_pending_intrs(struct gk20a *g)
 	}
 }
 void mc_gp10b_ltc_isr(struct gk20a *g)
 {
 	u32 mc_intr;
 	unsigned int ltc;
 	mc_intr = gk20a_readl(g, mc_intr_ltc_r());
 	nvgpu_err(g, "mc_ltc_intr: %08x", mc_intr);
 	for (ltc = 0; ltc < g->ltc_count; ltc++) {
 		if ((mc_intr & 1U << ltc) == 0) {
 			continue;
 		}
 		g->ops.ltc.isr(g, ltc);
 	}
 }
--- a/drivers/gpu/nvgpu/common/mc/mc_gp10b.h
+++ b/drivers/gpu/nvgpu/common/mc/mc_gp10b.h
@@ -43,5 +43,6 @@ void mc_gp10b_intr_stall_resume(struct gk20a *g);
 u32 mc_gp10b_intr_nonstall(struct gk20a *g);
 void mc_gp10b_intr_nonstall_pause(struct gk20a *g);
 void mc_gp10b_intr_nonstall_resume(struct gk20a *g);
 void mc_gp10b_ltc_isr(struct gk20a *g);
 #endif
--- a/drivers/gpu/nvgpu/common/mc/mc_tu104.c
+++ b/drivers/gpu/nvgpu/common/mc/mc_tu104.c
@@ -410,3 +410,14 @@ void mc_tu104_fbpa_isr(struct gk20a *g)
 		g->ops.fb.handle_fbpa_intr(g, i);
 	}
 }
 void mc_tu104_ltc_isr(struct gk20a *g)
 {
 	unsigned int ltc;
 	/* Go through all the LTCs explicitly */
 	for (ltc = 0; ltc < g->ltc_count; ltc++) {
 		g->ops.ltc.isr(g, ltc);
 	}
 }
--- a/drivers/gpu/nvgpu/common/mc/mc_tu104.h
+++ b/drivers/gpu/nvgpu/common/mc/mc_tu104.h
@@ -61,5 +61,6 @@ u32 intr_tu104_isr_nonstall(struct gk20a *g);
 bool intr_tu104_is_intr_hub_pending(struct gk20a *g, u32 mc_intr_0);
 void intr_tu104_log_pending_intrs(struct gk20a *g);
 void mc_tu104_fbpa_isr(struct gk20a *g);
 void mc_tu104_ltc_isr(struct gk20a *g);
 #endif /* NVGPU_MC_TU104_H */
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -599,6 +599,7 @@ static const struct gpu_ops gm20b_ops = {
 		.reset_mask = gm20b_mc_reset_mask,
 		.is_enabled = gm20b_mc_is_enabled,
 		.fb_reset = gm20b_mc_fb_reset,
 		.ltc_isr = gm20b_mc_ltc_isr,
 	},
 	.debug = {
 		.show_dump = gk20a_debug_show_dump,
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -727,6 +727,7 @@ static const struct gpu_ops gp106_ops = {
 		.reset_mask = gm20b_mc_reset_mask,
 		.is_enabled = gm20b_mc_is_enabled,
 		.fb_reset = NULL,
 		.ltc_isr = mc_gp10b_ltc_isr,
 	},
 	.debug = {
 		.show_dump = gk20a_debug_show_dump,
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -661,6 +661,7 @@ static const struct gpu_ops gp10b_ops = {
 		.reset_mask = gm20b_mc_reset_mask,
 		.is_enabled = gm20b_mc_is_enabled,
 		.fb_reset = gm20b_mc_fb_reset,
 		.ltc_isr = mc_gp10b_ltc_isr,
 	},
 	.debug = {
 		.show_dump = gk20a_debug_show_dump,
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -844,6 +844,7 @@ static const struct gpu_ops gv100_ops = {
 		.reset_mask = gv100_mc_reset_mask,
 		.is_enabled = gm20b_mc_is_enabled,
 		.fb_reset = NULL,
 		.ltc_isr = mc_gp10b_ltc_isr,
 	},
 	.debug = {
 		.show_dump = gk20a_debug_show_dump,
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -785,6 +785,7 @@ static const struct gpu_ops gv11b_ops = {
 		.reset_mask = gm20b_mc_reset_mask,
 		.is_enabled = gm20b_mc_is_enabled,
 		.fb_reset = NULL,
 		.ltc_isr = mc_gp10b_ltc_isr,
 	},
 	.debug = {
 		.show_dump = gk20a_debug_show_dump,
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -204,7 +204,7 @@ struct gpu_ops {
 		void (*init_cbc)(struct gk20a *g, struct gr_gk20a *gr);
 		void (*set_enabled)(struct gk20a *g, bool enabled);
 		void (*init_fs_state)(struct gk20a *g);
-		void (*isr)(struct gk20a *g);
+		void (*isr)(struct gk20a *g, unsigned int ltc);
 		u32 (*cbc_fix_config)(struct gk20a *g, int base);
 		void (*flush)(struct gk20a *g);
 		void (*intr_en_illegal_compstat)(struct gk20a *g, bool enable);
@@ -1186,6 +1186,7 @@ struct gpu_ops {
 		void (*fbpa_isr)(struct gk20a *g);
 		u32 (*reset_mask)(struct gk20a *g, enum nvgpu_unit unit);
 		void (*fb_reset)(struct gk20a *g);
 		void (*ltc_isr)(struct gk20a *g);
 	} mc;
 	struct {
 		void (*show_dump)(struct gk20a *g,
--- a/drivers/gpu/nvgpu/tu104/hal_tu104.c
+++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c
@@ -326,7 +326,7 @@ static const struct gpu_ops tu104_ops = {
 		.init_fs_state = ltc_tu104_init_fs_state,
 		.init_comptags = ltc_tu104_init_comptags,
 		.cbc_ctrl = ltc_tu104_cbc_ctrl,
-		.isr = tu104_ltc_isr,
+		.isr = gv11b_ltc_isr,
 		.cbc_fix_config = NULL,
 		.flush = gm20b_flush_ltc,
 		.set_enabled = gp10b_ltc_set_enabled,
@@ -821,6 +821,7 @@ static const struct gpu_ops tu104_ops = {
 		.reset_mask = gv100_mc_reset_mask,
 		.is_enabled = gm20b_mc_is_enabled,
 		.fb_reset = NULL,
 		.ltc_isr = mc_tu104_ltc_isr,
 	},
 	.debug = {
 		.show_dump = gk20a_debug_show_dump,