gpu: nvgpu: Allow enabling PC sampling

Allow enabling of PC sampling hardware workaround. It is only
applicable to gm20b.

Bug 1517458
Bug 1573150

Change-Id: Iad6a3ae556489fb7ab9628637d291849d2cd98ea
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/710421
This commit is contained in:
Terje Bergstrom
2015-02-20 18:15:04 -08:00
committed by Dan Willemsen
parent 1b6372ec6b
commit 325e0587d9
6 changed files with 73 additions and 1 deletions

View File

@@ -377,6 +377,17 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_perfbuf_unmap_args *args); struct nvgpu_dbg_gpu_perfbuf_unmap_args *args);
static int gk20a_dbg_pc_sampling(struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_pc_sampling_args *args)
{
struct channel_gk20a *ch = dbg_s->ch;
struct gk20a *g = ch->g;
gk20a_dbg_fn("");
return g->ops.gr.update_pc_sampling ?
g->ops.gr.update_pc_sampling(ch, args->enable) : -EINVAL;
}
long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg) unsigned long arg)
{ {
@@ -449,6 +460,11 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
(struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf); (struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf);
break; break;
case NVGPU_DBG_GPU_IOCTL_PC_SAMPLING:
err = gk20a_dbg_pc_sampling(dbg_s,
(struct nvgpu_dbg_gpu_pc_sampling_args *)buf);
break;
default: default:
gk20a_err(dev_from_gk20a(g), gk20a_err(dev_from_gk20a(g),
"unrecognized dbg gpu ioctl cmd: 0x%x", "unrecognized dbg gpu ioctl cmd: 0x%x",

View File

@@ -165,6 +165,8 @@ struct gpu_ops {
void *ctx_ptr); void *ctx_ptr);
int (*dump_gr_regs)(struct gk20a *g, int (*dump_gr_regs)(struct gk20a *g,
struct gk20a_debug_output *o); struct gk20a_debug_output *o);
int (*update_pc_sampling)(struct channel_gk20a *ch,
bool enable);
} gr; } gr;
const char *name; const char *name;
struct { struct {

View File

@@ -500,4 +500,5 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
void gr_gk20a_free_gr_ctx(struct gk20a *g, void gr_gk20a_free_gr_ctx(struct gk20a *g,
struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx); struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx);
int gr_gk20a_halt_pipe(struct gk20a *g); int gr_gk20a_halt_pipe(struct gk20a *g);
#endif /*__GR_GK20A_H__*/ #endif /*__GR_GK20A_H__*/

View File

@@ -17,6 +17,7 @@
#include <linux/delay.h> /* for mdelay */ #include <linux/delay.h> /* for mdelay */
#include <linux/io.h> #include <linux/io.h>
#include <linux/tegra-fuse.h> #include <linux/tegra-fuse.h>
#include <linux/vmalloc.h>
#include "gk20a/gk20a.h" #include "gk20a/gk20a.h"
#include "gk20a/gr_gk20a.h" #include "gk20a/gr_gk20a.h"
@@ -946,6 +947,37 @@ static int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r())); gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n", gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n",
gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r())); gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r()));
return 0;
}
static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
bool enable)
{
struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
void *ctx_ptr = NULL;
u32 v;
gk20a_dbg_fn("");
if (!ch_ctx || !ch_ctx->gr_ctx || c->vpr)
return -EINVAL;
ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
0, pgprot_writecombine(PAGE_KERNEL));
if (!ctx_ptr)
return -ENOMEM;
v = gk20a_mem_rd32(ctx_ptr, ctxsw_prog_main_image_pm_o());
v &= ~ctxsw_prog_main_image_pm_pc_sampling_m();
v |= ctxsw_prog_main_image_pm_pc_sampling_f(enable);
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, v);
vunmap(ctx_ptr);
gk20a_dbg_fn("done");
return 0; return 0;
} }
@@ -993,4 +1025,5 @@ void gm20b_init_gr(struct gpu_ops *gops)
gops->gr.update_ctxsw_preemption_mode = gops->gr.update_ctxsw_preemption_mode =
gr_gm20b_update_ctxsw_preemption_mode; gr_gm20b_update_ctxsw_preemption_mode;
gops->gr.dump_gr_regs = gr_gm20b_dump_gr_status_regs; gops->gr.dump_gr_regs = gr_gm20b_dump_gr_status_regs;
gops->gr.update_pc_sampling = gr_gm20b_update_pc_sampling;
} }

View File

@@ -110,6 +110,14 @@ static inline u32 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(void)
{ {
return 0x0; return 0x0;
} }
static inline u32 ctxsw_prog_main_image_pm_pc_sampling_f(u32 v)
{
return (v & 0x1) << 6;
}
static inline u32 ctxsw_prog_main_image_pm_pc_sampling_m(void)
{
return 0x1 << 6;
}
static inline u32 ctxsw_prog_main_image_pm_ptr_o(void) static inline u32 ctxsw_prog_main_image_pm_ptr_o(void)
{ {
return 0x0000002c; return 0x0000002c;

View File

@@ -457,8 +457,20 @@ struct nvgpu_dbg_gpu_perfbuf_unmap_args {
#define NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP \ #define NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP \
_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 8, struct nvgpu_dbg_gpu_perfbuf_unmap_args) _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 8, struct nvgpu_dbg_gpu_perfbuf_unmap_args)
/* Enable/disable PC Sampling */
struct nvgpu_dbg_gpu_pc_sampling_args {
__u32 enable;
__u32 _pad0[1];
};
#define NVGPU_DBG_GPU_IOCTL_PC_SAMPLING_DISABLE 0
#define NVGPU_DBG_GPU_IOCTL_PC_SAMPLING_ENABLE 1
#define NVGPU_DBG_GPU_IOCTL_PC_SAMPLING \
_IOW(NVGPU_DBG_GPU_IOCTL_MAGIC, 9, struct nvgpu_dbg_gpu_pc_sampling_args)
#define NVGPU_DBG_GPU_IOCTL_LAST \ #define NVGPU_DBG_GPU_IOCTL_LAST \
_IOC_NR(NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP) _IOC_NR(NVGPU_DBG_GPU_IOCTL_PC_SAMPLING)
#define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \ #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \
sizeof(struct nvgpu_dbg_gpu_perfbuf_map_args) sizeof(struct nvgpu_dbg_gpu_perfbuf_map_args)