gpu: nvgpu: add/remove l2 cache flush when updating the ctx buffers

gr ctx buffer in non-cacheable hence there is no need to do L2 cache flush when updating the buffer. Remove the flushes. pm ctx buffer is cacheable hence add l2 flush in the function nvgpu_profiler_quiesce_hwpm_streamout_non_resident since it updates the buffer. Bug 3677982 Change-Id: I0c15ec7a7f8fa250af1d25891122acc24443a872 Signed-off-by: Sagar Kamble <skamble@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2713916 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2022-05-17 18:28:46 +05:30
parent 65e7baf856
commit bfa20f62c6
5 changed files with 12 additions and 59 deletions
--- a/drivers/gpu/nvgpu/common/gr/ctx.c
+++ b/drivers/gpu/nvgpu/common/gr/ctx.c
@@ -749,20 +749,12 @@ u64 nvgpu_gr_ctx_get_zcull_ctx_va(struct nvgpu_gr_ctx *gr_ctx)

 int nvgpu_gr_ctx_init_zcull(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
 {
-	int err;
-
 	nvgpu_log(g, gpu_dbg_gr, " ");

-	err = nvgpu_pg_elpg_ms_protected_call(g, g->ops.mm.cache.l2_flush(g, true));
-	if (err != 0) {
-		nvgpu_err(g, "l2_flush failed");
-		return err;
-	}
-
 	g->ops.gr.ctxsw_prog.set_zcull_mode_no_ctxsw(g, &gr_ctx->mem);
 	g->ops.gr.ctxsw_prog.set_zcull_ptr(g, &gr_ctx->mem, 0);

-	return err;
+	return 0;
 }

 int nvgpu_gr_ctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
@@ -1061,13 +1053,6 @@ u32 nvgpu_gr_ctx_get_pm_ctx_pm_mode(struct nvgpu_gr_ctx *gr_ctx)
 u32 nvgpu_gr_ctx_get_ctx_id(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
 {
 	if (!gr_ctx->ctx_id_valid) {
-		/* Channel gr_ctx buffer is gpu cacheable.
-		   Flush and invalidate before cpu update. */
-		if (nvgpu_pg_elpg_ms_protected_call(g,
-					g->ops.mm.cache.l2_flush(g, true)) != 0) {
-			nvgpu_err(g, "l2_flush failed");
-		}
-
 		gr_ctx->ctx_id = g->ops.gr.ctxsw_prog.get_main_image_ctx_id(g,
 					&gr_ctx->mem);
 		gr_ctx->ctx_id_valid = true;
@@ -1106,24 +1091,14 @@ bool nvgpu_gr_ctx_desc_dump_ctxsw_stats_on_channel_close(
 int nvgpu_gr_ctx_set_smpc_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 	bool enable)
 {
-	int err;
-
 	if (!nvgpu_mem_is_valid(&gr_ctx->mem)) {
 		nvgpu_err(g, "no graphics context allocated");
 		return -EFAULT;
 	}

-	/* Channel gr_ctx buffer is gpu cacheable.
-	   Flush and invalidate before cpu update. */
-	err = nvgpu_pg_elpg_ms_protected_call(g, g->ops.mm.cache.l2_flush(g, true));
-	if (err != 0) {
-		nvgpu_err(g, "l2_flush failed");
-		return err;
-	}
-
 	g->ops.gr.ctxsw_prog.set_pm_smpc_mode(g, &gr_ctx->mem, enable);

-	return err;
+	return 0;
 }

 int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
@@ -1188,16 +1163,6 @@ int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 int nvgpu_gr_ctx_set_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 	bool set_pm_ptr)
 {
-	int err;
-
-	/* Channel gr_ctx buffer is gpu cacheable.
-	   Flush and invalidate before cpu update. */
-	err = nvgpu_pg_elpg_ms_protected_call(g, g->ops.mm.cache.l2_flush(g, true));
-	if (err != 0) {
-		nvgpu_err(g, "l2_flush failed");
-		return err;
-	}
-
 	g->ops.gr.ctxsw_prog.set_pm_mode(g, &gr_ctx->mem,
 			gr_ctx->pm_ctx.pm_mode);
 	if (set_pm_ptr) {
@@ -1205,6 +1170,6 @@ int nvgpu_gr_ctx_set_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 			gr_ctx->pm_ctx.gpu_va);
 	}

-	return err;
+	return 0;
 }
 #endif /* CONFIG_NVGPU_DEBUGGER */
--- a/drivers/gpu/nvgpu/common/gr/global_ctx.c
+++ b/drivers/gpu/nvgpu/common/gr/global_ctx.c
@@ -450,13 +450,6 @@ void nvgpu_gr_global_ctx_load_local_golden_image(struct gk20a *g,
 	struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image,
 	struct nvgpu_mem *target_mem)
 {
-	/* Channel gr_ctx buffer is gpu cacheable.
-	   Flush and invalidate before cpu update. */
-	if (nvgpu_pg_elpg_ms_protected_call(g,
-				g->ops.mm.cache.l2_flush(g, true)) != 0) {
-		nvgpu_err(g, "l2_flush failed");
-	}
-
 	nvgpu_mem_wr_n(g, target_mem, 0, local_golden_image->context,
 		nvgpu_safe_cast_u64_to_u32(local_golden_image->size));

--- a/drivers/gpu/nvgpu/common/gr/subctx.c
+++ b/drivers/gpu/nvgpu/common/gr/subctx.c
@@ -84,12 +84,6 @@ void nvgpu_gr_subctx_load_ctx_header(struct gk20a *g,
 	struct nvgpu_gr_ctx *gr_ctx, u64 gpu_va)
 {
 	struct nvgpu_mem *ctxheader = &subctx->ctx_header;
-	int err = 0;
-
-	err = nvgpu_pg_elpg_ms_protected_call(g, g->ops.mm.cache.l2_flush(g, true));
-	if (err != 0) {
-		nvgpu_err(g, "l2_flush failed");
-	}

 #ifdef CONFIG_NVGPU_SET_FALCON_ACCESS_MAP
 	/* set priv access map */
--- a/drivers/gpu/nvgpu/common/profiler/profiler.c
+++ b/drivers/gpu/nvgpu/common/profiler/profiler.c
@@ -38,6 +38,7 @@
 #include <nvgpu/sort.h>
 #include <nvgpu/gr/gr_instances.h>
 #include <nvgpu/grmgr.h>
+#include <nvgpu/power_features/pg.h>

 static int nvgpu_profiler_build_regops_allowlist(struct nvgpu_profiler_object *prof);
 static void nvgpu_profiler_destroy_regops_allowlist(struct nvgpu_profiler_object *prof);
@@ -496,6 +497,7 @@ static int nvgpu_profiler_quiesce_hwpm_streamout_non_resident(struct gk20a *g,
 		struct nvgpu_tsg *tsg)
 {
 	struct nvgpu_mem *pm_ctx_mem;
+	int err;

 	nvgpu_log(g, gpu_dbg_prof,
 		"HWPM streamout quiesce in non-resident state started");
@@ -505,11 +507,18 @@ static int nvgpu_profiler_quiesce_hwpm_streamout_non_resident(struct gk20a *g,
 	}

 	pm_ctx_mem = nvgpu_gr_ctx_get_pm_ctx_mem(tsg->gr_ctx);
+
 	if (pm_ctx_mem == NULL) {
 		nvgpu_err(g, "No PM context");
 		return -EINVAL;
 	}

+	err = nvgpu_pg_elpg_ms_protected_call(g, g->ops.mm.cache.l2_flush(g, true));
+	if (err != 0) {
+		nvgpu_err(g, "l2_flush failed");
+		return err;
+	}
+
 	nvgpu_memset(g, pm_ctx_mem, 0U, 0U, pm_ctx_mem->size);
 	nvgpu_log(g, gpu_dbg_prof,
 		"HWPM streamout quiesce in non-resident state successfull");
--- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
@@ -1947,14 +1947,6 @@ static int nvgpu_dbg_gpu_ioctl_get_gr_context(struct dbg_session_gk20a *dbg_s,
 		goto done;
 	}

-	/* Channel gr_ctx buffer is gpu cacheable.
-	   Flush and invalidate before cpu update. */
-	err = nvgpu_pg_elpg_ms_protected_call(g, g->ops.mm.cache.l2_flush(g, true));
-	if (err != 0) {
-		nvgpu_err(g, "l2_flush failed");
-		goto done;
-	}
-
 	err = nvgpu_dbg_get_context_buffer(g, ctx_mem, user_buffer, size);

 done: