mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 02:22:34 +03:00
gpu: nvgpu: Allow enabling PC sampling
Allow enabling of PC sampling hardware workaround. It is only applicable to gm20b. Bug 1517458 Bug 1573150 Change-Id: Iad6a3ae556489fb7ab9628637d291849d2cd98ea Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/710421
This commit is contained in:
committed by
Dan Willemsen
parent
1b6372ec6b
commit
325e0587d9
@@ -377,6 +377,17 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
|
||||
static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
|
||||
struct nvgpu_dbg_gpu_perfbuf_unmap_args *args);
|
||||
|
||||
static int gk20a_dbg_pc_sampling(struct dbg_session_gk20a *dbg_s,
|
||||
struct nvgpu_dbg_gpu_pc_sampling_args *args)
|
||||
{
|
||||
struct channel_gk20a *ch = dbg_s->ch;
|
||||
struct gk20a *g = ch->g;
|
||||
|
||||
gk20a_dbg_fn("");
|
||||
|
||||
return g->ops.gr.update_pc_sampling ?
|
||||
g->ops.gr.update_pc_sampling(ch, args->enable) : -EINVAL;
|
||||
}
|
||||
long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
@@ -449,6 +460,11 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
|
||||
(struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf);
|
||||
break;
|
||||
|
||||
case NVGPU_DBG_GPU_IOCTL_PC_SAMPLING:
|
||||
err = gk20a_dbg_pc_sampling(dbg_s,
|
||||
(struct nvgpu_dbg_gpu_pc_sampling_args *)buf);
|
||||
break;
|
||||
|
||||
default:
|
||||
gk20a_err(dev_from_gk20a(g),
|
||||
"unrecognized dbg gpu ioctl cmd: 0x%x",
|
||||
|
||||
@@ -165,6 +165,8 @@ struct gpu_ops {
|
||||
void *ctx_ptr);
|
||||
int (*dump_gr_regs)(struct gk20a *g,
|
||||
struct gk20a_debug_output *o);
|
||||
int (*update_pc_sampling)(struct channel_gk20a *ch,
|
||||
bool enable);
|
||||
} gr;
|
||||
const char *name;
|
||||
struct {
|
||||
|
||||
@@ -500,4 +500,5 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
|
||||
void gr_gk20a_free_gr_ctx(struct gk20a *g,
|
||||
struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx);
|
||||
int gr_gk20a_halt_pipe(struct gk20a *g);
|
||||
|
||||
#endif /*__GR_GK20A_H__*/
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include <linux/delay.h> /* for mdelay */
|
||||
#include <linux/io.h>
|
||||
#include <linux/tegra-fuse.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "gk20a/gr_gk20a.h"
|
||||
@@ -946,6 +947,37 @@ static int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
|
||||
gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r()));
|
||||
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n",
|
||||
gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r()));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
|
||||
bool enable)
|
||||
{
|
||||
struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
|
||||
void *ctx_ptr = NULL;
|
||||
u32 v;
|
||||
|
||||
gk20a_dbg_fn("");
|
||||
|
||||
if (!ch_ctx || !ch_ctx->gr_ctx || c->vpr)
|
||||
return -EINVAL;
|
||||
|
||||
ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
|
||||
PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
|
||||
0, pgprot_writecombine(PAGE_KERNEL));
|
||||
if (!ctx_ptr)
|
||||
return -ENOMEM;
|
||||
|
||||
v = gk20a_mem_rd32(ctx_ptr, ctxsw_prog_main_image_pm_o());
|
||||
v &= ~ctxsw_prog_main_image_pm_pc_sampling_m();
|
||||
v |= ctxsw_prog_main_image_pm_pc_sampling_f(enable);
|
||||
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, v);
|
||||
|
||||
vunmap(ctx_ptr);
|
||||
|
||||
gk20a_dbg_fn("done");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -993,4 +1025,5 @@ void gm20b_init_gr(struct gpu_ops *gops)
|
||||
gops->gr.update_ctxsw_preemption_mode =
|
||||
gr_gm20b_update_ctxsw_preemption_mode;
|
||||
gops->gr.dump_gr_regs = gr_gm20b_dump_gr_status_regs;
|
||||
gops->gr.update_pc_sampling = gr_gm20b_update_pc_sampling;
|
||||
}
|
||||
|
||||
@@ -110,6 +110,14 @@ static inline u32 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(void)
|
||||
{
|
||||
return 0x0;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_pm_pc_sampling_f(u32 v)
|
||||
{
|
||||
return (v & 0x1) << 6;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_pm_pc_sampling_m(void)
|
||||
{
|
||||
return 0x1 << 6;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_pm_ptr_o(void)
|
||||
{
|
||||
return 0x0000002c;
|
||||
|
||||
@@ -457,8 +457,20 @@ struct nvgpu_dbg_gpu_perfbuf_unmap_args {
|
||||
#define NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP \
|
||||
_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 8, struct nvgpu_dbg_gpu_perfbuf_unmap_args)
|
||||
|
||||
/* Enable/disable PC Sampling */
|
||||
struct nvgpu_dbg_gpu_pc_sampling_args {
|
||||
__u32 enable;
|
||||
__u32 _pad0[1];
|
||||
};
|
||||
|
||||
#define NVGPU_DBG_GPU_IOCTL_PC_SAMPLING_DISABLE 0
|
||||
#define NVGPU_DBG_GPU_IOCTL_PC_SAMPLING_ENABLE 1
|
||||
|
||||
#define NVGPU_DBG_GPU_IOCTL_PC_SAMPLING \
|
||||
_IOW(NVGPU_DBG_GPU_IOCTL_MAGIC, 9, struct nvgpu_dbg_gpu_pc_sampling_args)
|
||||
|
||||
#define NVGPU_DBG_GPU_IOCTL_LAST \
|
||||
_IOC_NR(NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP)
|
||||
_IOC_NR(NVGPU_DBG_GPU_IOCTL_PC_SAMPLING)
|
||||
#define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \
|
||||
sizeof(struct nvgpu_dbg_gpu_perfbuf_map_args)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user