mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 09:57:08 +03:00
gpu: nvgpu: ioctl for flushing GPU L2
CUDA devtools need to be able to flush the GPU's cache in a sideband fashion and so cannot use methods. This change implements an nvgpu_gpu_ioctl to flush and optionally invalidate the GPU's L2 cache and flush fb. Change-Id: Ib06a0bc8d8880ffbfe4b056518cc3c3df0cc4988 Signed-off-by: sujeet baranwal <sbaranwal@nvidia.com> Signed-off-by: Mayank Kaushik <mkaushik@nvidia.com> Reviewed-on: http://git-master/r/671809 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
Dan Willemsen
parent
ac205be1d3
commit
8d1ab756ed
@@ -243,6 +243,20 @@ static int gk20a_ctrl_get_tpc_masks(struct gk20a *g,
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g,
|
||||||
|
struct nvgpu_gpu_l2_fb_args *args)
|
||||||
|
{
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
if (args->l2_flush)
|
||||||
|
g->ops.mm.l2_flush(g, args->l2_invalidate ? true : false);
|
||||||
|
|
||||||
|
if (args->fb_flush)
|
||||||
|
g->ops.mm.fb_flush(g);
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||||
{
|
{
|
||||||
struct platform_device *dev = filp->private_data;
|
struct platform_device *dev = filp->private_data;
|
||||||
@@ -423,6 +437,10 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
|
|||||||
err = gk20a_channel_open_ioctl(g,
|
err = gk20a_channel_open_ioctl(g,
|
||||||
(struct nvgpu_channel_open_args *)buf);
|
(struct nvgpu_channel_open_args *)buf);
|
||||||
break;
|
break;
|
||||||
|
case NVGPU_GPU_IOCTL_FLUSH_L2:
|
||||||
|
err = nvgpu_gpu_ioctl_l2_fb_ops(g,
|
||||||
|
(struct nvgpu_gpu_l2_fb_args *)buf);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
|
dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
|
||||||
err = -ENOTTY;
|
err = -ENOTTY;
|
||||||
|
|||||||
@@ -412,19 +412,16 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
|
|||||||
case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL:
|
case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL:
|
||||||
err = dbg_bind_channel_gk20a(dbg_s,
|
err = dbg_bind_channel_gk20a(dbg_s,
|
||||||
(struct nvgpu_dbg_gpu_bind_channel_args *)buf);
|
(struct nvgpu_dbg_gpu_bind_channel_args *)buf);
|
||||||
gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case NVGPU_DBG_GPU_IOCTL_REG_OPS:
|
case NVGPU_DBG_GPU_IOCTL_REG_OPS:
|
||||||
err = nvgpu_ioctl_channel_reg_ops(dbg_s,
|
err = nvgpu_ioctl_channel_reg_ops(dbg_s,
|
||||||
(struct nvgpu_dbg_gpu_exec_reg_ops_args *)buf);
|
(struct nvgpu_dbg_gpu_exec_reg_ops_args *)buf);
|
||||||
gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case NVGPU_DBG_GPU_IOCTL_POWERGATE:
|
case NVGPU_DBG_GPU_IOCTL_POWERGATE:
|
||||||
err = nvgpu_ioctl_powergate_gk20a(dbg_s,
|
err = nvgpu_ioctl_powergate_gk20a(dbg_s,
|
||||||
(struct nvgpu_dbg_gpu_powergate_args *)buf);
|
(struct nvgpu_dbg_gpu_powergate_args *)buf);
|
||||||
gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case NVGPU_DBG_GPU_IOCTL_EVENTS_CTRL:
|
case NVGPU_DBG_GPU_IOCTL_EVENTS_CTRL:
|
||||||
@@ -460,6 +457,8 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
|
||||||
|
|
||||||
if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
|
if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
|
||||||
err = copy_to_user((void __user *)arg,
|
err = copy_to_user((void __user *)arg,
|
||||||
buf, _IOC_SIZE(cmd));
|
buf, _IOC_SIZE(cmd));
|
||||||
@@ -741,6 +740,8 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
|
|||||||
bool ch_is_curr_ctx;
|
bool ch_is_curr_ctx;
|
||||||
int err = 0, action = args->mode;
|
int err = 0, action = args->mode;
|
||||||
|
|
||||||
|
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "action: %d", args->mode);
|
||||||
|
|
||||||
mutex_lock(&g->dbg_sessions_lock);
|
mutex_lock(&g->dbg_sessions_lock);
|
||||||
|
|
||||||
/* Suspend GPU context switching */
|
/* Suspend GPU context switching */
|
||||||
|
|||||||
@@ -1502,7 +1502,7 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
|
|||||||
ctx_header_words = roundup(ctx_header_bytes, sizeof(u32));
|
ctx_header_words = roundup(ctx_header_bytes, sizeof(u32));
|
||||||
ctx_header_words >>= 2;
|
ctx_header_words >>= 2;
|
||||||
|
|
||||||
gk20a_mm_l2_flush(g, true);
|
g->ops.mm.l2_flush(g, true);
|
||||||
|
|
||||||
for (i = 0; i < ctx_header_words; i++) {
|
for (i = 0; i < ctx_header_words; i++) {
|
||||||
data = gk20a_mem_rd32(ctx_ptr, i);
|
data = gk20a_mem_rd32(ctx_ptr, i);
|
||||||
@@ -1565,7 +1565,7 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
|
|||||||
|
|
||||||
/* Channel gr_ctx buffer is gpu cacheable.
|
/* Channel gr_ctx buffer is gpu cacheable.
|
||||||
Flush and invalidate before cpu update. */
|
Flush and invalidate before cpu update. */
|
||||||
gk20a_mm_l2_flush(g, true);
|
g->ops.mm.l2_flush(g, true);
|
||||||
|
|
||||||
ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
|
ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
|
||||||
PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
|
PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
|
||||||
@@ -1605,7 +1605,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
|
|||||||
|
|
||||||
/* Channel gr_ctx buffer is gpu cacheable.
|
/* Channel gr_ctx buffer is gpu cacheable.
|
||||||
Flush and invalidate before cpu update. */
|
Flush and invalidate before cpu update. */
|
||||||
gk20a_mm_l2_flush(g, true);
|
g->ops.mm.l2_flush(g, true);
|
||||||
|
|
||||||
ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
|
ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
|
||||||
PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
|
PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
|
||||||
@@ -7003,7 +7003,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
|
|||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
gk20a_mm_l2_flush(g, true);
|
g->ops.mm.l2_flush(g, true);
|
||||||
|
|
||||||
/* write to appropriate place in context image,
|
/* write to appropriate place in context image,
|
||||||
* first have to figure out where that really is */
|
* first have to figure out where that really is */
|
||||||
|
|||||||
@@ -697,7 +697,7 @@ static bool validate_reg_ops(struct dbg_session_gk20a *dbg_s,
|
|||||||
ok &= !err;
|
ok &= !err;
|
||||||
}
|
}
|
||||||
|
|
||||||
gk20a_dbg(gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d\n",
|
gk20a_dbg(gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d",
|
||||||
*ctx_wr_count, *ctx_rd_count);
|
*ctx_wr_count, *ctx_rd_count);
|
||||||
|
|
||||||
return ok;
|
return ok;
|
||||||
|
|||||||
@@ -242,6 +242,14 @@ struct nvgpu_gpu_open_channel_args {
|
|||||||
__s32 channel_fd;
|
__s32 channel_fd;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* L2 cache writeback, optionally invalidate clean lines and flush fb */
|
||||||
|
struct nvgpu_gpu_l2_fb_args {
|
||||||
|
__u32 l2_flush:1;
|
||||||
|
__u32 l2_invalidate:1;
|
||||||
|
__u32 fb_flush:1;
|
||||||
|
__u32 reserved;
|
||||||
|
} __packed;
|
||||||
|
|
||||||
#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
|
#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
|
||||||
_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
|
_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
|
||||||
#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
|
#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
|
||||||
@@ -264,13 +272,14 @@ struct nvgpu_gpu_open_channel_args {
|
|||||||
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 10, struct nvgpu_gpu_get_tpc_masks_args)
|
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 10, struct nvgpu_gpu_get_tpc_masks_args)
|
||||||
#define NVGPU_GPU_IOCTL_OPEN_CHANNEL \
|
#define NVGPU_GPU_IOCTL_OPEN_CHANNEL \
|
||||||
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 11, struct nvgpu_gpu_open_channel_args)
|
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 11, struct nvgpu_gpu_open_channel_args)
|
||||||
|
#define NVGPU_GPU_IOCTL_FLUSH_L2 \
|
||||||
|
_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 12, struct nvgpu_gpu_l2_fb_args)
|
||||||
|
|
||||||
#define NVGPU_GPU_IOCTL_LAST \
|
#define NVGPU_GPU_IOCTL_LAST \
|
||||||
_IOC_NR(NVGPU_GPU_IOCTL_OPEN_CHANNEL)
|
_IOC_NR(NVGPU_GPU_IOCTL_FLUSH_L2)
|
||||||
#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
|
#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
|
||||||
sizeof(struct nvgpu_gpu_prepare_compressible_read_args)
|
sizeof(struct nvgpu_gpu_prepare_compressible_read_args)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* /dev/nvhost-tsg-gpu device
|
* /dev/nvhost-tsg-gpu device
|
||||||
*
|
*
|
||||||
|
|||||||
Reference in New Issue
Block a user