diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index ffb525491..7cda9949c 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -377,6 +377,17 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
 static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
 		struct nvgpu_dbg_gpu_perfbuf_unmap_args *args);
 
+static int gk20a_dbg_pc_sampling(struct dbg_session_gk20a *dbg_s,
+			  struct nvgpu_dbg_gpu_pc_sampling_args *args)
+{
+	struct channel_gk20a *ch = dbg_s->ch;
+	struct gk20a *g = ch->g;
+
+	gk20a_dbg_fn("");
+
+	return g->ops.gr.update_pc_sampling ?
+		g->ops.gr.update_pc_sampling(ch, args->enable) : -EINVAL;
+}
 long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
 			     unsigned long arg)
 {
@@ -449,6 +460,11 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
 		       (struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf);
 		break;
 
+	case NVGPU_DBG_GPU_IOCTL_PC_SAMPLING:
+		err = gk20a_dbg_pc_sampling(dbg_s,
+			   (struct nvgpu_dbg_gpu_pc_sampling_args *)buf);
+		break;
+
 	default:
 		gk20a_err(dev_from_gk20a(g),
 			   "unrecognized dbg gpu ioctl cmd: 0x%x",
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index acee2e485..441ea7cc4 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -165,6 +165,8 @@ struct gpu_ops {
 				void *ctx_ptr);
 		int (*dump_gr_regs)(struct gk20a *g,
 				struct gk20a_debug_output *o);
+		int (*update_pc_sampling)(struct channel_gk20a *ch,
+					   bool enable);
 	} gr;
 	const char *name;
 	struct {
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 4b94f8633..deafc4388 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -500,4 +500,5 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
 void gr_gk20a_free_gr_ctx(struct gk20a *g,
 			  struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx);
 int gr_gk20a_halt_pipe(struct gk20a *g);
+
 #endif /*__GR_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index da1c1ab0f..193406434 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -17,6 +17,7 @@
 #include <linux/delay.h>	/* for mdelay */
 #include <linux/io.h>
 #include <linux/tegra-fuse.h>
+#include <linux/vmalloc.h>
 
 #include "gk20a/gk20a.h"
 #include "gk20a/gr_gk20a.h"
@@ -946,6 +947,37 @@ static int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
 		gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r()));
 	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n",
 		gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r()));
+
+	return 0;
+}
+
+static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
+				       bool enable)
+{
+	struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
+	void *ctx_ptr = NULL;
+	u32 v;
+
+	gk20a_dbg_fn("");
+
+	if (!ch_ctx || !ch_ctx->gr_ctx || c->vpr)
+		return -EINVAL;
+
+	ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
+			PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
+			0, pgprot_writecombine(PAGE_KERNEL));
+	if (!ctx_ptr)
+		return -ENOMEM;
+
+	v = gk20a_mem_rd32(ctx_ptr, ctxsw_prog_main_image_pm_o());
+	v &= ~ctxsw_prog_main_image_pm_pc_sampling_m();
+	v |= ctxsw_prog_main_image_pm_pc_sampling_f(enable);
+	gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, v);
+
+	vunmap(ctx_ptr);
+
+	gk20a_dbg_fn("done");
+
 	return 0;
 }
 
@@ -993,4 +1025,5 @@ void gm20b_init_gr(struct gpu_ops *gops)
 	gops->gr.update_ctxsw_preemption_mode =
 		gr_gm20b_update_ctxsw_preemption_mode;
 	gops->gr.dump_gr_regs = gr_gm20b_dump_gr_status_regs;
+	gops->gr.update_pc_sampling = gr_gm20b_update_pc_sampling;
 }
diff --git a/drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h
index ec44e9fb7..cefd91e12 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h
@@ -110,6 +110,14 @@ static inline u32 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(void)
 {
 	return 0x0;
 }
+static inline u32 ctxsw_prog_main_image_pm_pc_sampling_f(u32 v)
+{
+	return (v & 0x1) << 6;
+}
+static inline u32 ctxsw_prog_main_image_pm_pc_sampling_m(void)
+{
+	return 0x1 << 6;
+}
 static inline u32 ctxsw_prog_main_image_pm_ptr_o(void)
 {
 	return 0x0000002c;
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index 3c53ba94d..9e3a362f5 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -457,8 +457,20 @@ struct nvgpu_dbg_gpu_perfbuf_unmap_args {
 #define NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP \
 	_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 8, struct nvgpu_dbg_gpu_perfbuf_unmap_args)
 
+/* Enable/disable PC Sampling */
+struct nvgpu_dbg_gpu_pc_sampling_args {
+	__u32 enable;
+	__u32 _pad0[1];
+};
+
+#define NVGPU_DBG_GPU_IOCTL_PC_SAMPLING_DISABLE	0
+#define NVGPU_DBG_GPU_IOCTL_PC_SAMPLING_ENABLE	1
+
+#define NVGPU_DBG_GPU_IOCTL_PC_SAMPLING \
+	_IOW(NVGPU_DBG_GPU_IOCTL_MAGIC,  9, struct nvgpu_dbg_gpu_pc_sampling_args)
+
 #define NVGPU_DBG_GPU_IOCTL_LAST		\
-	_IOC_NR(NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP)
+	_IOC_NR(NVGPU_DBG_GPU_IOCTL_PC_SAMPLING)
 #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE		\
 	sizeof(struct nvgpu_dbg_gpu_perfbuf_map_args)