gpu: nvgpu: Implement ELPG flush for gm20b

ELPG flush is initiated from a common broadcast register, but must be waited on via per-L2 registers. Split gk20a and gm20b versions of the flush. Change-Id: I75c2d65e8da311b50d35bee70308b60464ec2d4d Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/401545 Reviewed-by: Automatic_Commit_Validation_User
2025-12-24 10:34:43 +03:00 · 2014-04-25 15:00:54 +03:00
parent 24fc5e36a7
commit 1c9aaa1eaf
4 changed files with 97 additions and 37 deletions
--- a/drivers/gpu/nvgpu/gk20a/ltc_common.c
+++ b/drivers/gpu/nvgpu/gk20a/ltc_common.c
@@ -313,37 +313,3 @@ static void gk20a_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr)
 			    0, max_comptag_lines - 1);

 }
-
-/* Flushes the compression bit cache as well as "data".
- * Note: the name here is a bit of a misnomer.  ELPG uses this
- * internally... but ELPG doesn't have to be on to do it manually.
- */
-static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
-{
-	u32 data;
-	s32 retry = 100;
-
-	gk20a_dbg_fn("");
-
-	/* Make sure all previous writes are committed to the L2. There's no
-	   guarantee that writes are to DRAM. This will be a sysmembar internal
-	   to the L2. */
-	gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(),
-		     ltc_ltcs_ltss_g_elpg_flush_pending_f());
-	do {
-		data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r());
-
-		if (ltc_ltc0_ltss_g_elpg_flush_v(data) ==
-		    ltc_ltc0_ltss_g_elpg_flush_pending_v()) {
-			gk20a_dbg_info("g_elpg_flush 0x%x", data);
-			retry--;
-			usleep_range(20, 40);
-		} else
-			break;
-	} while (retry >= 0 || !tegra_platform_is_silicon());
-
-	if (retry < 0)
-		gk20a_warn(dev_from_gk20a(g),
-			    "g_elpg_flush too many retries");
-
-}
--- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
@@ -212,6 +212,40 @@ void gk20a_ltc_isr(struct gk20a *g)
 	gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr);
 }

+/* Flushes the compression bit cache as well as "data".
+ * Note: the name here is a bit of a misnomer.  ELPG uses this
+ * internally... but ELPG doesn't have to be on to do it manually.
+ */
+static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
+{
+	u32 data;
+	s32 retry = 100;
+
+	gk20a_dbg_fn("");
+
+	/* Make sure all previous writes are committed to the L2. There's no
+	   guarantee that writes are to DRAM. This will be a sysmembar internal
+	   to the L2. */
+	gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(),
+		     ltc_ltcs_ltss_g_elpg_flush_pending_f());
+	do {
+		data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r());
+
+		if (ltc_ltc0_ltss_g_elpg_flush_v(data) ==
+		    ltc_ltc0_ltss_g_elpg_flush_pending_v()) {
+			gk20a_dbg_info("g_elpg_flush 0x%x", data);
+			retry--;
+			usleep_range(20, 40);
+		} else
+			break;
+	} while (retry >= 0 || !tegra_platform_is_silicon());
+
+	if (retry < 0)
+		gk20a_warn(dev_from_gk20a(g),
+			    "g_elpg_flush too many retries");
+
+}
+
 void gk20a_init_ltc(struct gpu_ops *gops)
 {
 	gops->ltc.determine_L2_size_bytes = gk20a_determine_L2_size_bytes;
--- a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h
@@ -96,11 +96,11 @@ static inline u32 ltc_ltcs_ltss_cbc_ctrl1_r(void)
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(void)
 {
-        return 0x1;
+	return 0x1;
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(void)
 {
-        return 0x2;
+	return 0x2;
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_v(u32 r)
 {
@@ -258,6 +258,22 @@ static inline u32 ltc_ltc0_ltss_g_elpg_flush_pending_f(void)
 {
 	return 0x1;
 }
+static inline u32 ltc_ltc1_ltss_g_elpg_r(void)
+{
+	return 0x00142214;
+}
+static inline u32 ltc_ltc1_ltss_g_elpg_flush_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 ltc_ltc1_ltss_g_elpg_flush_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltc1_ltss_g_elpg_flush_pending_f(void)
+{
+	return 0x1;
+}
 static inline u32 ltc_ltc0_ltss_intr_r(void)
 {
 	return 0x0014020c;
--- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
@@ -193,6 +193,50 @@ void gm20b_ltc_isr(struct gk20a *g)
 	gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr);
 }

+static void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g)
+{
+	u32 data;
+	bool done[g->ltc_count];
+	s32 retry = 100;
+	int i;
+	int num_done = 0;
+	u32 ltc_d = ltc_ltc1_ltss_g_elpg_r() - ltc_ltc0_ltss_g_elpg_r();
+
+	gk20a_dbg_fn("");
+
+	for (i = 0; i < g->ltc_count; i++)
+		done[i] = 0;
+
+	gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(),
+		     ltc_ltcs_ltss_g_elpg_flush_pending_f());
+	do {
+		for (i = 0; i < g->ltc_count; i++) {
+			if (done[i])
+				continue;
+
+			data = gk20a_readl(g,
+					ltc_ltc0_ltss_g_elpg_r() + ltc_d * i);
+
+			if (ltc_ltc0_ltss_g_elpg_flush_v(data)) {
+				gk20a_dbg_info("g_elpg_flush 0x%x", data);
+			} else {
+				done[i] = 1;
+				num_done++;
+			}
+		}
+
+		if (num_done < g->ltc_count) {
+			retry--;
+			usleep_range(20, 40);
+		} else
+			break;
+	} while (retry >= 0 || !tegra_platform_is_silicon());
+
+	if (retry < 0)
+		gk20a_warn(dev_from_gk20a(g),
+			    "g_elpg_flush too many retries");
+}
+
 void gm20b_init_ltc(struct gpu_ops *gops)
 {
 	/* Gk20a reused ops. */
@@ -209,6 +253,6 @@ void gm20b_init_ltc(struct gpu_ops *gops)
 	gops->ltc.init_fs_state = gm20b_ltc_init_fs_state;
 	gops->ltc.init_comptags = gm20b_ltc_init_comptags;
 	gops->ltc.cbc_ctrl = gm20b_ltc_cbc_ctrl;
-	gops->ltc.elpg_flush = gk20a_mm_g_elpg_flush_locked;
+	gops->ltc.elpg_flush = gm20b_ltc_g_elpg_flush_locked;
 	gops->ltc.isr = gm20b_ltc_isr;
 }