mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 18:16:01 +03:00
gpu: nvgpu: Allow enabling PC sampling
Allow enabling of PC sampling hardware workaround. It is only applicable to gm20b. Bug 1517458 Bug 1573150 Change-Id: Iad6a3ae556489fb7ab9628637d291849d2cd98ea Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/710421
This commit is contained in:
committed by
Dan Willemsen
parent
1b6372ec6b
commit
325e0587d9
@@ -377,6 +377,17 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
|
|||||||
static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
|
static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
|
||||||
struct nvgpu_dbg_gpu_perfbuf_unmap_args *args);
|
struct nvgpu_dbg_gpu_perfbuf_unmap_args *args);
|
||||||
|
|
||||||
|
static int gk20a_dbg_pc_sampling(struct dbg_session_gk20a *dbg_s,
|
||||||
|
struct nvgpu_dbg_gpu_pc_sampling_args *args)
|
||||||
|
{
|
||||||
|
struct channel_gk20a *ch = dbg_s->ch;
|
||||||
|
struct gk20a *g = ch->g;
|
||||||
|
|
||||||
|
gk20a_dbg_fn("");
|
||||||
|
|
||||||
|
return g->ops.gr.update_pc_sampling ?
|
||||||
|
g->ops.gr.update_pc_sampling(ch, args->enable) : -EINVAL;
|
||||||
|
}
|
||||||
long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
|
long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
|
||||||
unsigned long arg)
|
unsigned long arg)
|
||||||
{
|
{
|
||||||
@@ -449,6 +460,11 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
|
|||||||
(struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf);
|
(struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case NVGPU_DBG_GPU_IOCTL_PC_SAMPLING:
|
||||||
|
err = gk20a_dbg_pc_sampling(dbg_s,
|
||||||
|
(struct nvgpu_dbg_gpu_pc_sampling_args *)buf);
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
gk20a_err(dev_from_gk20a(g),
|
gk20a_err(dev_from_gk20a(g),
|
||||||
"unrecognized dbg gpu ioctl cmd: 0x%x",
|
"unrecognized dbg gpu ioctl cmd: 0x%x",
|
||||||
|
|||||||
@@ -165,6 +165,8 @@ struct gpu_ops {
|
|||||||
void *ctx_ptr);
|
void *ctx_ptr);
|
||||||
int (*dump_gr_regs)(struct gk20a *g,
|
int (*dump_gr_regs)(struct gk20a *g,
|
||||||
struct gk20a_debug_output *o);
|
struct gk20a_debug_output *o);
|
||||||
|
int (*update_pc_sampling)(struct channel_gk20a *ch,
|
||||||
|
bool enable);
|
||||||
} gr;
|
} gr;
|
||||||
const char *name;
|
const char *name;
|
||||||
struct {
|
struct {
|
||||||
|
|||||||
@@ -500,4 +500,5 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
|
|||||||
void gr_gk20a_free_gr_ctx(struct gk20a *g,
|
void gr_gk20a_free_gr_ctx(struct gk20a *g,
|
||||||
struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx);
|
struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx);
|
||||||
int gr_gk20a_halt_pipe(struct gk20a *g);
|
int gr_gk20a_halt_pipe(struct gk20a *g);
|
||||||
|
|
||||||
#endif /*__GR_GK20A_H__*/
|
#endif /*__GR_GK20A_H__*/
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
#include <linux/delay.h> /* for mdelay */
|
#include <linux/delay.h> /* for mdelay */
|
||||||
#include <linux/io.h>
|
#include <linux/io.h>
|
||||||
#include <linux/tegra-fuse.h>
|
#include <linux/tegra-fuse.h>
|
||||||
|
#include <linux/vmalloc.h>
|
||||||
|
|
||||||
#include "gk20a/gk20a.h"
|
#include "gk20a/gk20a.h"
|
||||||
#include "gk20a/gr_gk20a.h"
|
#include "gk20a/gr_gk20a.h"
|
||||||
@@ -946,6 +947,37 @@ static int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
|
|||||||
gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r()));
|
gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r()));
|
||||||
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n",
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n",
|
||||||
gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r()));
|
gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r()));
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
|
||||||
|
bool enable)
|
||||||
|
{
|
||||||
|
struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
|
||||||
|
void *ctx_ptr = NULL;
|
||||||
|
u32 v;
|
||||||
|
|
||||||
|
gk20a_dbg_fn("");
|
||||||
|
|
||||||
|
if (!ch_ctx || !ch_ctx->gr_ctx || c->vpr)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
|
||||||
|
PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
|
||||||
|
0, pgprot_writecombine(PAGE_KERNEL));
|
||||||
|
if (!ctx_ptr)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
v = gk20a_mem_rd32(ctx_ptr, ctxsw_prog_main_image_pm_o());
|
||||||
|
v &= ~ctxsw_prog_main_image_pm_pc_sampling_m();
|
||||||
|
v |= ctxsw_prog_main_image_pm_pc_sampling_f(enable);
|
||||||
|
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, v);
|
||||||
|
|
||||||
|
vunmap(ctx_ptr);
|
||||||
|
|
||||||
|
gk20a_dbg_fn("done");
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -993,4 +1025,5 @@ void gm20b_init_gr(struct gpu_ops *gops)
|
|||||||
gops->gr.update_ctxsw_preemption_mode =
|
gops->gr.update_ctxsw_preemption_mode =
|
||||||
gr_gm20b_update_ctxsw_preemption_mode;
|
gr_gm20b_update_ctxsw_preemption_mode;
|
||||||
gops->gr.dump_gr_regs = gr_gm20b_dump_gr_status_regs;
|
gops->gr.dump_gr_regs = gr_gm20b_dump_gr_status_regs;
|
||||||
|
gops->gr.update_pc_sampling = gr_gm20b_update_pc_sampling;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -110,6 +110,14 @@ static inline u32 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(void)
|
|||||||
{
|
{
|
||||||
return 0x0;
|
return 0x0;
|
||||||
}
|
}
|
||||||
|
static inline u32 ctxsw_prog_main_image_pm_pc_sampling_f(u32 v)
|
||||||
|
{
|
||||||
|
return (v & 0x1) << 6;
|
||||||
|
}
|
||||||
|
static inline u32 ctxsw_prog_main_image_pm_pc_sampling_m(void)
|
||||||
|
{
|
||||||
|
return 0x1 << 6;
|
||||||
|
}
|
||||||
static inline u32 ctxsw_prog_main_image_pm_ptr_o(void)
|
static inline u32 ctxsw_prog_main_image_pm_ptr_o(void)
|
||||||
{
|
{
|
||||||
return 0x0000002c;
|
return 0x0000002c;
|
||||||
|
|||||||
@@ -457,8 +457,20 @@ struct nvgpu_dbg_gpu_perfbuf_unmap_args {
|
|||||||
#define NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP \
|
#define NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP \
|
||||||
_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 8, struct nvgpu_dbg_gpu_perfbuf_unmap_args)
|
_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 8, struct nvgpu_dbg_gpu_perfbuf_unmap_args)
|
||||||
|
|
||||||
|
/* Enable/disable PC Sampling */
|
||||||
|
struct nvgpu_dbg_gpu_pc_sampling_args {
|
||||||
|
__u32 enable;
|
||||||
|
__u32 _pad0[1];
|
||||||
|
};
|
||||||
|
|
||||||
|
#define NVGPU_DBG_GPU_IOCTL_PC_SAMPLING_DISABLE 0
|
||||||
|
#define NVGPU_DBG_GPU_IOCTL_PC_SAMPLING_ENABLE 1
|
||||||
|
|
||||||
|
#define NVGPU_DBG_GPU_IOCTL_PC_SAMPLING \
|
||||||
|
_IOW(NVGPU_DBG_GPU_IOCTL_MAGIC, 9, struct nvgpu_dbg_gpu_pc_sampling_args)
|
||||||
|
|
||||||
#define NVGPU_DBG_GPU_IOCTL_LAST \
|
#define NVGPU_DBG_GPU_IOCTL_LAST \
|
||||||
_IOC_NR(NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP)
|
_IOC_NR(NVGPU_DBG_GPU_IOCTL_PC_SAMPLING)
|
||||||
#define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \
|
#define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \
|
||||||
sizeof(struct nvgpu_dbg_gpu_perfbuf_map_args)
|
sizeof(struct nvgpu_dbg_gpu_perfbuf_map_args)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user