gpu: nvgpu: ioctl for flushing GPU L2

CUDA devtools need to be able to flush the GPU's cache in a sideband fashion and so cannot use methods. This change implements an nvgpu_gpu_ioctl to flush and optionally invalidate the GPU's L2 cache and flush fb. Change-Id: Ib06a0bc8d8880ffbfe4b056518cc3c3df0cc4988 Signed-off-by: sujeet baranwal <sbaranwal@nvidia.com> Signed-off-by: Mayank Kaushik <mkaushik@nvidia.com> Reviewed-on: http://git-master/r/671809 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
2025-12-23 09:57:08 +03:00 · 2015-02-19 10:34:51 -08:00
parent ac205be1d3
commit 8d1ab756ed
5 changed files with 38 additions and 10 deletions
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -243,6 +243,20 @@ static int gk20a_ctrl_get_tpc_masks(struct gk20a *g,
 	return err;
 }
 static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g,
 		struct nvgpu_gpu_l2_fb_args *args)
 {
 	int err = 0;
 	if (args->l2_flush)
 		g->ops.mm.l2_flush(g, args->l2_invalidate ? true : false);
 	if (args->fb_flush)
 		g->ops.mm.fb_flush(g);
 	return err;
 }
 long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct platform_device *dev = filp->private_data;
@@ -423,6 +437,10 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
 		err = gk20a_channel_open_ioctl(g,
 			(struct nvgpu_channel_open_args *)buf);
 		break;
 	case NVGPU_GPU_IOCTL_FLUSH_L2:
 		err = nvgpu_gpu_ioctl_l2_fb_ops(g,
 			   (struct nvgpu_gpu_l2_fb_args *)buf);
 		break;
 	default:
 		dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
 		err = -ENOTTY;
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -412,19 +412,16 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
 	case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL:
 		err = dbg_bind_channel_gk20a(dbg_s,
 			     (struct nvgpu_dbg_gpu_bind_channel_args *)buf);
 		gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
 		break;
 	case NVGPU_DBG_GPU_IOCTL_REG_OPS:
 		err = nvgpu_ioctl_channel_reg_ops(dbg_s,
 			   (struct nvgpu_dbg_gpu_exec_reg_ops_args *)buf);
 		gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
 		break;
 	case NVGPU_DBG_GPU_IOCTL_POWERGATE:
 		err = nvgpu_ioctl_powergate_gk20a(dbg_s,
 			   (struct nvgpu_dbg_gpu_powergate_args *)buf);
 		gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
 		break;
 	case NVGPU_DBG_GPU_IOCTL_EVENTS_CTRL:
@@ -460,6 +457,8 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
 		break;
 	}
 	gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
 	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
 		err = copy_to_user((void __user *)arg,
 				   buf, _IOC_SIZE(cmd));
@@ -741,6 +740,8 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
 	bool ch_is_curr_ctx;
 	int err = 0, action = args->mode;
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "action: %d", args->mode);
 	mutex_lock(&g->dbg_sessions_lock);
 	/* Suspend GPU context switching */
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -1502,7 +1502,7 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
 	ctx_header_words =  roundup(ctx_header_bytes, sizeof(u32));
 	ctx_header_words >>= 2;
-	gk20a_mm_l2_flush(g, true);
+	g->ops.mm.l2_flush(g, true);
 	for (i = 0; i < ctx_header_words; i++) {
 		data = gk20a_mem_rd32(ctx_ptr, i);
@@ -1565,7 +1565,7 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
 	/* Channel gr_ctx buffer is gpu cacheable.
 	   Flush and invalidate before cpu update. */
-	gk20a_mm_l2_flush(g, true);
+	g->ops.mm.l2_flush(g, true);
 	ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
 			PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
@@ -1605,7 +1605,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
 	/* Channel gr_ctx buffer is gpu cacheable.
 	   Flush and invalidate before cpu update. */
-	gk20a_mm_l2_flush(g, true);
+	g->ops.mm.l2_flush(g, true);
 	ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
 			PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
@@ -7003,7 +7003,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
 		goto cleanup;
 	}
-	gk20a_mm_l2_flush(g, true);
+	g->ops.mm.l2_flush(g, true);
 	/* write to appropriate place in context image,
 	 * first have to figure out where that really is */
--- a/drivers/gpu/nvgpu/gk20a/regops_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/regops_gk20a.c
@@ -697,7 +697,7 @@ static bool validate_reg_ops(struct dbg_session_gk20a *dbg_s,
 		ok &= !err;
 	}
-	gk20a_dbg(gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d\n",
+	gk20a_dbg(gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d",
 		   *ctx_wr_count, *ctx_rd_count);
 	return ok;
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -242,6 +242,14 @@ struct nvgpu_gpu_open_channel_args {
 	__s32 channel_fd;
 };
 /* L2 cache writeback, optionally invalidate clean lines and flush fb */
 struct nvgpu_gpu_l2_fb_args {
 	__u32 l2_flush:1;
 	__u32 l2_invalidate:1;
 	__u32 fb_flush:1;
 	__u32 reserved;
 } __packed;
 #define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
 	_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
 #define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
@@ -264,13 +272,14 @@ struct nvgpu_gpu_open_channel_args {
 	_IOWR(NVGPU_GPU_IOCTL_MAGIC, 10, struct nvgpu_gpu_get_tpc_masks_args)
 #define NVGPU_GPU_IOCTL_OPEN_CHANNEL \
 	_IOWR(NVGPU_GPU_IOCTL_MAGIC, 11, struct nvgpu_gpu_open_channel_args)
 #define NVGPU_GPU_IOCTL_FLUSH_L2 \
 	_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 12, struct nvgpu_gpu_l2_fb_args)
 #define NVGPU_GPU_IOCTL_LAST		\
-	_IOC_NR(NVGPU_GPU_IOCTL_OPEN_CHANNEL)
+	_IOC_NR(NVGPU_GPU_IOCTL_FLUSH_L2)
 #define NVGPU_GPU_IOCTL_MAX_ARG_SIZE	\
 	sizeof(struct nvgpu_gpu_prepare_compressible_read_args)
 /*
 * /dev/nvhost-tsg-gpu device
 *