diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index ffb525491..7cda9949c 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c @@ -377,6 +377,17 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_perfbuf_unmap_args *args); +static int gk20a_dbg_pc_sampling(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_pc_sampling_args *args) +{ + struct channel_gk20a *ch = dbg_s->ch; + struct gk20a *g = ch->g; + + gk20a_dbg_fn(""); + + return g->ops.gr.update_pc_sampling ? + g->ops.gr.update_pc_sampling(ch, args->enable) : -EINVAL; +} long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -449,6 +460,11 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, (struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf); break; + case NVGPU_DBG_GPU_IOCTL_PC_SAMPLING: + err = gk20a_dbg_pc_sampling(dbg_s, + (struct nvgpu_dbg_gpu_pc_sampling_args *)buf); + break; + default: gk20a_err(dev_from_gk20a(g), "unrecognized dbg gpu ioctl cmd: 0x%x", diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index acee2e485..441ea7cc4 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -165,6 +165,8 @@ struct gpu_ops { void *ctx_ptr); int (*dump_gr_regs)(struct gk20a *g, struct gk20a_debug_output *o); + int (*update_pc_sampling)(struct channel_gk20a *ch, + bool enable); } gr; const char *name; struct { diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 4b94f8633..deafc4388 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -500,4 +500,5 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g, void gr_gk20a_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx); int gr_gk20a_halt_pipe(struct gk20a *g); + #endif /*__GR_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index da1c1ab0f..193406434 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -17,6 +17,7 @@ #include /* for mdelay */ #include #include +#include #include "gk20a/gk20a.h" #include "gk20a/gr_gk20a.h" @@ -946,6 +947,37 @@ static int gr_gm20b_dump_gr_status_regs(struct gk20a *g, gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r())); gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n", gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r())); + + return 0; +} + +static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, + bool enable) +{ + struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; + void *ctx_ptr = NULL; + u32 v; + + gk20a_dbg_fn(""); + + if (!ch_ctx || !ch_ctx->gr_ctx || c->vpr) + return -EINVAL; + + ctx_ptr = vmap(ch_ctx->gr_ctx->pages, + PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT, + 0, pgprot_writecombine(PAGE_KERNEL)); + if (!ctx_ptr) + return -ENOMEM; + + v = gk20a_mem_rd32(ctx_ptr, ctxsw_prog_main_image_pm_o()); + v &= ~ctxsw_prog_main_image_pm_pc_sampling_m(); + v |= ctxsw_prog_main_image_pm_pc_sampling_f(enable); + gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, v); + + vunmap(ctx_ptr); + + gk20a_dbg_fn("done"); + return 0; } @@ -993,4 +1025,5 @@ void gm20b_init_gr(struct gpu_ops *gops) gops->gr.update_ctxsw_preemption_mode = gr_gm20b_update_ctxsw_preemption_mode; gops->gr.dump_gr_regs = gr_gm20b_dump_gr_status_regs; + gops->gr.update_pc_sampling = gr_gm20b_update_pc_sampling; } diff --git a/drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h index ec44e9fb7..cefd91e12 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h @@ -110,6 +110,14 @@ static inline u32 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(void) { return 0x0; } +static inline u32 ctxsw_prog_main_image_pm_pc_sampling_f(u32 v) +{ + return (v & 0x1) << 6; +} +static inline u32 ctxsw_prog_main_image_pm_pc_sampling_m(void) +{ + return 0x1 << 6; +} static inline u32 ctxsw_prog_main_image_pm_ptr_o(void) { return 0x0000002c; diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 3c53ba94d..9e3a362f5 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -457,8 +457,20 @@ struct nvgpu_dbg_gpu_perfbuf_unmap_args { #define NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP \ _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 8, struct nvgpu_dbg_gpu_perfbuf_unmap_args) +/* Enable/disable PC Sampling */ +struct nvgpu_dbg_gpu_pc_sampling_args { + __u32 enable; + __u32 _pad0[1]; +}; + +#define NVGPU_DBG_GPU_IOCTL_PC_SAMPLING_DISABLE 0 +#define NVGPU_DBG_GPU_IOCTL_PC_SAMPLING_ENABLE 1 + +#define NVGPU_DBG_GPU_IOCTL_PC_SAMPLING \ + _IOW(NVGPU_DBG_GPU_IOCTL_MAGIC, 9, struct nvgpu_dbg_gpu_pc_sampling_args) + #define NVGPU_DBG_GPU_IOCTL_LAST \ - _IOC_NR(NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP) + _IOC_NR(NVGPU_DBG_GPU_IOCTL_PC_SAMPLING) #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \ sizeof(struct nvgpu_dbg_gpu_perfbuf_map_args)