From bfa20f62c65e3f99d1a281cec4a75d6dae747512 Mon Sep 17 00:00:00 2001
From: Sagar Kamble <skamble@nvidia.com>
Date: Tue, 17 May 2022 18:28:46 +0530
Subject: [PATCH] gpu: nvgpu: add/remove l2 cache flush when updating the ctx
 buffers

gr ctx buffer in non-cacheable hence there is no need to do L2 cache
flush when updating the buffer. Remove the flushes.

pm ctx buffer is cacheable hence add l2 flush in the function
nvgpu_profiler_quiesce_hwpm_streamout_non_resident since it
updates the buffer.

Bug 3677982

Change-Id: I0c15ec7a7f8fa250af1d25891122acc24443a872
Signed-off-by: Sagar Kamble <skamble@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2713916
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/gr/ctx.c            | 41 ++------------------
 drivers/gpu/nvgpu/common/gr/global_ctx.c     |  7 ----
 drivers/gpu/nvgpu/common/gr/subctx.c         |  6 ---
 drivers/gpu/nvgpu/common/profiler/profiler.c |  9 +++++
 drivers/gpu/nvgpu/os/linux/ioctl_dbg.c       |  8 ----
 5 files changed, 12 insertions(+), 59 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/gr/ctx.c b/drivers/gpu/nvgpu/common/gr/ctx.c
index bc8612a14..2d4976f64 100644
--- a/drivers/gpu/nvgpu/common/gr/ctx.c
+++ b/drivers/gpu/nvgpu/common/gr/ctx.c
@@ -749,20 +749,12 @@ u64 nvgpu_gr_ctx_get_zcull_ctx_va(struct nvgpu_gr_ctx *gr_ctx)
 
 int nvgpu_gr_ctx_init_zcull(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
 {
-	int err;
-
 	nvgpu_log(g, gpu_dbg_gr, " ");
 
-	err = nvgpu_pg_elpg_ms_protected_call(g, g->ops.mm.cache.l2_flush(g, true));
-	if (err != 0) {
-		nvgpu_err(g, "l2_flush failed");
-		return err;
-	}
-
 	g->ops.gr.ctxsw_prog.set_zcull_mode_no_ctxsw(g, &gr_ctx->mem);
 	g->ops.gr.ctxsw_prog.set_zcull_ptr(g, &gr_ctx->mem, 0);
 
-	return err;
+	return 0;
 }
 
 int nvgpu_gr_ctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
@@ -1061,13 +1053,6 @@ u32 nvgpu_gr_ctx_get_pm_ctx_pm_mode(struct nvgpu_gr_ctx *gr_ctx)
 u32 nvgpu_gr_ctx_get_ctx_id(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
 {
 	if (!gr_ctx->ctx_id_valid) {
-		/* Channel gr_ctx buffer is gpu cacheable.
-		   Flush and invalidate before cpu update. */
-		if (nvgpu_pg_elpg_ms_protected_call(g,
-					g->ops.mm.cache.l2_flush(g, true)) != 0) {
-			nvgpu_err(g, "l2_flush failed");
-		}
-
 		gr_ctx->ctx_id = g->ops.gr.ctxsw_prog.get_main_image_ctx_id(g,
 					&gr_ctx->mem);
 		gr_ctx->ctx_id_valid = true;
@@ -1106,24 +1091,14 @@ bool nvgpu_gr_ctx_desc_dump_ctxsw_stats_on_channel_close(
 int nvgpu_gr_ctx_set_smpc_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 	bool enable)
 {
-	int err;
-
 	if (!nvgpu_mem_is_valid(&gr_ctx->mem)) {
 		nvgpu_err(g, "no graphics context allocated");
 		return -EFAULT;
 	}
 
-	/* Channel gr_ctx buffer is gpu cacheable.
-	   Flush and invalidate before cpu update. */
-	err = nvgpu_pg_elpg_ms_protected_call(g, g->ops.mm.cache.l2_flush(g, true));
-	if (err != 0) {
-		nvgpu_err(g, "l2_flush failed");
-		return err;
-	}
-
 	g->ops.gr.ctxsw_prog.set_pm_smpc_mode(g, &gr_ctx->mem, enable);
 
-	return err;
+	return 0;
 }
 
 int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
@@ -1188,16 +1163,6 @@ int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 int nvgpu_gr_ctx_set_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 	bool set_pm_ptr)
 {
-	int err;
-
-	/* Channel gr_ctx buffer is gpu cacheable.
-	   Flush and invalidate before cpu update. */
-	err = nvgpu_pg_elpg_ms_protected_call(g, g->ops.mm.cache.l2_flush(g, true));
-	if (err != 0) {
-		nvgpu_err(g, "l2_flush failed");
-		return err;
-	}
-
 	g->ops.gr.ctxsw_prog.set_pm_mode(g, &gr_ctx->mem,
 			gr_ctx->pm_ctx.pm_mode);
 	if (set_pm_ptr) {
@@ -1205,6 +1170,6 @@ int nvgpu_gr_ctx_set_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 			gr_ctx->pm_ctx.gpu_va);
 	}
 
-	return err;
+	return 0;
 }
 #endif /* CONFIG_NVGPU_DEBUGGER */
diff --git a/drivers/gpu/nvgpu/common/gr/global_ctx.c b/drivers/gpu/nvgpu/common/gr/global_ctx.c
index 62ab255d3..cae66ed92 100644
--- a/drivers/gpu/nvgpu/common/gr/global_ctx.c
+++ b/drivers/gpu/nvgpu/common/gr/global_ctx.c
@@ -450,13 +450,6 @@ void nvgpu_gr_global_ctx_load_local_golden_image(struct gk20a *g,
 	struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image,
 	struct nvgpu_mem *target_mem)
 {
-	/* Channel gr_ctx buffer is gpu cacheable.
-	   Flush and invalidate before cpu update. */
-	if (nvgpu_pg_elpg_ms_protected_call(g,
-				g->ops.mm.cache.l2_flush(g, true)) != 0) {
-		nvgpu_err(g, "l2_flush failed");
-	}
-
 	nvgpu_mem_wr_n(g, target_mem, 0, local_golden_image->context,
 		nvgpu_safe_cast_u64_to_u32(local_golden_image->size));
 
diff --git a/drivers/gpu/nvgpu/common/gr/subctx.c b/drivers/gpu/nvgpu/common/gr/subctx.c
index 8a6e79891..eaf614a10 100644
--- a/drivers/gpu/nvgpu/common/gr/subctx.c
+++ b/drivers/gpu/nvgpu/common/gr/subctx.c
@@ -84,12 +84,6 @@ void nvgpu_gr_subctx_load_ctx_header(struct gk20a *g,
 	struct nvgpu_gr_ctx *gr_ctx, u64 gpu_va)
 {
 	struct nvgpu_mem *ctxheader = &subctx->ctx_header;
-	int err = 0;
-
-	err = nvgpu_pg_elpg_ms_protected_call(g, g->ops.mm.cache.l2_flush(g, true));
-	if (err != 0) {
-		nvgpu_err(g, "l2_flush failed");
-	}
 
 #ifdef CONFIG_NVGPU_SET_FALCON_ACCESS_MAP
 	/* set priv access map */
diff --git a/drivers/gpu/nvgpu/common/profiler/profiler.c b/drivers/gpu/nvgpu/common/profiler/profiler.c
index 678983678..8b277bae1 100644
--- a/drivers/gpu/nvgpu/common/profiler/profiler.c
+++ b/drivers/gpu/nvgpu/common/profiler/profiler.c
@@ -38,6 +38,7 @@
 #include <nvgpu/sort.h>
 #include <nvgpu/gr/gr_instances.h>
 #include <nvgpu/grmgr.h>
+#include <nvgpu/power_features/pg.h>
 
 static int nvgpu_profiler_build_regops_allowlist(struct nvgpu_profiler_object *prof);
 static void nvgpu_profiler_destroy_regops_allowlist(struct nvgpu_profiler_object *prof);
@@ -496,6 +497,7 @@ static int nvgpu_profiler_quiesce_hwpm_streamout_non_resident(struct gk20a *g,
 		struct nvgpu_tsg *tsg)
 {
 	struct nvgpu_mem *pm_ctx_mem;
+	int err;
 
 	nvgpu_log(g, gpu_dbg_prof,
 		"HWPM streamout quiesce in non-resident state started");
@@ -505,11 +507,18 @@ static int nvgpu_profiler_quiesce_hwpm_streamout_non_resident(struct gk20a *g,
 	}
 
 	pm_ctx_mem = nvgpu_gr_ctx_get_pm_ctx_mem(tsg->gr_ctx);
+
 	if (pm_ctx_mem == NULL) {
 		nvgpu_err(g, "No PM context");
 		return -EINVAL;
 	}
 
+	err = nvgpu_pg_elpg_ms_protected_call(g, g->ops.mm.cache.l2_flush(g, true));
+	if (err != 0) {
+		nvgpu_err(g, "l2_flush failed");
+		return err;
+	}
+
 	nvgpu_memset(g, pm_ctx_mem, 0U, 0U, pm_ctx_mem->size);
 	nvgpu_log(g, gpu_dbg_prof,
 		"HWPM streamout quiesce in non-resident state successfull");
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
index 0cd47be85..e81e05a94 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
@@ -1947,14 +1947,6 @@ static int nvgpu_dbg_gpu_ioctl_get_gr_context(struct dbg_session_gk20a *dbg_s,
 		goto done;
 	}
 
-	/* Channel gr_ctx buffer is gpu cacheable.
-	   Flush and invalidate before cpu update. */
-	err = nvgpu_pg_elpg_ms_protected_call(g, g->ops.mm.cache.l2_flush(g, true));
-	if (err != 0) {
-		nvgpu_err(g, "l2_flush failed");
-		goto done;
-	}
-
 	err = nvgpu_dbg_get_context_buffer(g, ctx_mem, user_buffer, size);
 
 done: