gpu: nvgpu: ioctl for flushing GPU L2

CUDA devtools need to be able to flush the GPU's cache
in a sideband fashion and so cannot use methods. This
change implements an nvgpu_gpu_ioctl to flush and
optionally invalidate the GPU's L2 cache and flush fb.

Change-Id: Ib06a0bc8d8880ffbfe4b056518cc3c3df0cc4988
Signed-off-by: sujeet baranwal <sbaranwal@nvidia.com>
Signed-off-by: Mayank Kaushik <mkaushik@nvidia.com>
Reviewed-on: http://git-master/r/671809
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
sujeet baranwal
2015-02-19 10:34:51 -08:00
committed by Dan Willemsen
parent ac205be1d3
commit 8d1ab756ed
5 changed files with 38 additions and 10 deletions

View File

@@ -243,6 +243,20 @@ static int gk20a_ctrl_get_tpc_masks(struct gk20a *g,
return err; return err;
} }
static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g,
struct nvgpu_gpu_l2_fb_args *args)
{
int err = 0;
if (args->l2_flush)
g->ops.mm.l2_flush(g, args->l2_invalidate ? true : false);
if (args->fb_flush)
g->ops.mm.fb_flush(g);
return err;
}
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{ {
struct platform_device *dev = filp->private_data; struct platform_device *dev = filp->private_data;
@@ -423,6 +437,10 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
err = gk20a_channel_open_ioctl(g, err = gk20a_channel_open_ioctl(g,
(struct nvgpu_channel_open_args *)buf); (struct nvgpu_channel_open_args *)buf);
break; break;
case NVGPU_GPU_IOCTL_FLUSH_L2:
err = nvgpu_gpu_ioctl_l2_fb_ops(g,
(struct nvgpu_gpu_l2_fb_args *)buf);
break;
default: default:
dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
err = -ENOTTY; err = -ENOTTY;

View File

@@ -412,19 +412,16 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL: case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL:
err = dbg_bind_channel_gk20a(dbg_s, err = dbg_bind_channel_gk20a(dbg_s,
(struct nvgpu_dbg_gpu_bind_channel_args *)buf); (struct nvgpu_dbg_gpu_bind_channel_args *)buf);
gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
break; break;
case NVGPU_DBG_GPU_IOCTL_REG_OPS: case NVGPU_DBG_GPU_IOCTL_REG_OPS:
err = nvgpu_ioctl_channel_reg_ops(dbg_s, err = nvgpu_ioctl_channel_reg_ops(dbg_s,
(struct nvgpu_dbg_gpu_exec_reg_ops_args *)buf); (struct nvgpu_dbg_gpu_exec_reg_ops_args *)buf);
gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
break; break;
case NVGPU_DBG_GPU_IOCTL_POWERGATE: case NVGPU_DBG_GPU_IOCTL_POWERGATE:
err = nvgpu_ioctl_powergate_gk20a(dbg_s, err = nvgpu_ioctl_powergate_gk20a(dbg_s,
(struct nvgpu_dbg_gpu_powergate_args *)buf); (struct nvgpu_dbg_gpu_powergate_args *)buf);
gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
break; break;
case NVGPU_DBG_GPU_IOCTL_EVENTS_CTRL: case NVGPU_DBG_GPU_IOCTL_EVENTS_CTRL:
@@ -460,6 +457,8 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
break; break;
} }
gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
err = copy_to_user((void __user *)arg, err = copy_to_user((void __user *)arg,
buf, _IOC_SIZE(cmd)); buf, _IOC_SIZE(cmd));
@@ -741,6 +740,8 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
bool ch_is_curr_ctx; bool ch_is_curr_ctx;
int err = 0, action = args->mode; int err = 0, action = args->mode;
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "action: %d", args->mode);
mutex_lock(&g->dbg_sessions_lock); mutex_lock(&g->dbg_sessions_lock);
/* Suspend GPU context switching */ /* Suspend GPU context switching */

View File

@@ -1502,7 +1502,7 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
ctx_header_words = roundup(ctx_header_bytes, sizeof(u32)); ctx_header_words = roundup(ctx_header_bytes, sizeof(u32));
ctx_header_words >>= 2; ctx_header_words >>= 2;
gk20a_mm_l2_flush(g, true); g->ops.mm.l2_flush(g, true);
for (i = 0; i < ctx_header_words; i++) { for (i = 0; i < ctx_header_words; i++) {
data = gk20a_mem_rd32(ctx_ptr, i); data = gk20a_mem_rd32(ctx_ptr, i);
@@ -1565,7 +1565,7 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
/* Channel gr_ctx buffer is gpu cacheable. /* Channel gr_ctx buffer is gpu cacheable.
Flush and invalidate before cpu update. */ Flush and invalidate before cpu update. */
gk20a_mm_l2_flush(g, true); g->ops.mm.l2_flush(g, true);
ctx_ptr = vmap(ch_ctx->gr_ctx->pages, ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT, PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
@@ -1605,7 +1605,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
/* Channel gr_ctx buffer is gpu cacheable. /* Channel gr_ctx buffer is gpu cacheable.
Flush and invalidate before cpu update. */ Flush and invalidate before cpu update. */
gk20a_mm_l2_flush(g, true); g->ops.mm.l2_flush(g, true);
ctx_ptr = vmap(ch_ctx->gr_ctx->pages, ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT, PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
@@ -7003,7 +7003,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
goto cleanup; goto cleanup;
} }
gk20a_mm_l2_flush(g, true); g->ops.mm.l2_flush(g, true);
/* write to appropriate place in context image, /* write to appropriate place in context image,
* first have to figure out where that really is */ * first have to figure out where that really is */

View File

@@ -697,7 +697,7 @@ static bool validate_reg_ops(struct dbg_session_gk20a *dbg_s,
ok &= !err; ok &= !err;
} }
gk20a_dbg(gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d\n", gk20a_dbg(gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d",
*ctx_wr_count, *ctx_rd_count); *ctx_wr_count, *ctx_rd_count);
return ok; return ok;

View File

@@ -242,6 +242,14 @@ struct nvgpu_gpu_open_channel_args {
__s32 channel_fd; __s32 channel_fd;
}; };
/* L2 cache writeback, optionally invalidate clean lines and flush fb */
struct nvgpu_gpu_l2_fb_args {
__u32 l2_flush:1;
__u32 l2_invalidate:1;
__u32 fb_flush:1;
__u32 reserved;
} __packed;
#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \ #define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args) _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \ #define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
@@ -264,13 +272,14 @@ struct nvgpu_gpu_open_channel_args {
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 10, struct nvgpu_gpu_get_tpc_masks_args) _IOWR(NVGPU_GPU_IOCTL_MAGIC, 10, struct nvgpu_gpu_get_tpc_masks_args)
#define NVGPU_GPU_IOCTL_OPEN_CHANNEL \ #define NVGPU_GPU_IOCTL_OPEN_CHANNEL \
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 11, struct nvgpu_gpu_open_channel_args) _IOWR(NVGPU_GPU_IOCTL_MAGIC, 11, struct nvgpu_gpu_open_channel_args)
#define NVGPU_GPU_IOCTL_FLUSH_L2 \
_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 12, struct nvgpu_gpu_l2_fb_args)
#define NVGPU_GPU_IOCTL_LAST \ #define NVGPU_GPU_IOCTL_LAST \
_IOC_NR(NVGPU_GPU_IOCTL_OPEN_CHANNEL) _IOC_NR(NVGPU_GPU_IOCTL_FLUSH_L2)
#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \ #define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
sizeof(struct nvgpu_gpu_prepare_compressible_read_args) sizeof(struct nvgpu_gpu_prepare_compressible_read_args)
/* /*
* /dev/nvhost-tsg-gpu device * /dev/nvhost-tsg-gpu device
* *