gpu: nvgpu: add SET_CTX_MMU_DEBUG_MODE ioctl

Added NVGPU_DBG_GPU_IOCTL_SET_CTX_MMU_DEBUG_MODE ioctl to set MMU debug mode for a given context. Added gr.set_mmu_debug_mode HAL to change NV_PGPC_PRI_MMU_DEBUG_CTRL for a given channel. HAL implementation for native case is gm20b_gr_set_mmu_debug_mode. It internally uses regops, which directly writes to the register if the context is resident, or writes to gr context otherwise. Added NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE to enable the feature. NV_PGPC_PRI_MMU_DEBUG_CTRL has to be context switched in FECS ucode, so the feature is only enabled on TU104 for now. Bug 2515097 Change-Id: Ib4efaf06fc47a8539b4474f94c68c20ce225263f Signed-off-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2110720 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2019-04-30 17:19:51 -07:00
parent 5c09935297
commit af2ccb811d
16 changed files with 113 additions and 2 deletions
--- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -182,6 +182,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.get_offset_in_gpccs_segment =
 			gr_gk20a_get_offset_in_gpccs_segment,
 		.set_debug_mode = gm20b_gr_set_debug_mode,
+		.set_mmu_debug_mode = NULL,
 		.reset = NULL,
 		.halt_pipe = NULL,
 		.disable_ctxsw = nvgpu_gr_disable_ctxsw,
--- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_gv11b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_gv11b.c
@@ -43,4 +43,5 @@ void vgpu_gv11b_init_gpu_characteristics(struct gk20a *g)
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, true);
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, true);
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC, true);
+	nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, false);
 }
--- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -215,6 +215,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.get_offset_in_gpccs_segment =
 			gr_gk20a_get_offset_in_gpccs_segment,
 		.set_debug_mode = gm20b_gr_set_debug_mode,
+		.set_mmu_debug_mode = NULL,
 		.reset = NULL,
 		.halt_pipe = NULL,
 		.disable_ctxsw = nvgpu_gr_disable_ctxsw,
--- a/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c
@@ -114,6 +114,7 @@ void vgpu_init_gpu_characteristics(struct gk20a *g)
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH, false);
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_RESCHEDULE_RUNLIST, false);
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_SPARSE_ALLOCS, false);
+	nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, false);
 }

 int vgpu_get_constants(struct gk20a *g)
--- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.c
@@ -31,6 +31,7 @@
 #include <nvgpu/utils.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/channel.h>
+#include <nvgpu/regops.h>
 #include <nvgpu/gr/ctx.h>
 #include <nvgpu/gr/config.h>
 #include <nvgpu/gr/gr.h>
@@ -673,6 +674,26 @@ void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
 	gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset, 0);
 }

+int gm20b_gr_set_mmu_debug_mode(struct gk20a *g,
+		struct nvgpu_channel *ch, bool enable)
+{
+	struct nvgpu_dbg_reg_op ctx_ops = {
+		.op = REGOP(WRITE_32),
+		.type = REGOP(TYPE_GR_CTX),
+		.offset = gr_gpcs_pri_mmu_debug_ctrl_r(),
+		.value_lo = enable ?
+			gr_gpcs_pri_mmu_debug_ctrl_debug_enabled_f() :
+			gr_gpcs_pri_mmu_debug_ctrl_debug_disabled_f(),
+	};
+	int err;
+
+	err = gr_gk20a_exec_ctx_ops(ch, &ctx_ops, 1, 1, 0, NULL);
+	if (err != 0) {
+		nvgpu_err(g, "Failed to access register");
+	}
+	return err;
+}
+
 void gm20b_gr_set_debug_mode(struct gk20a *g, bool enable)
 {
 	u32 reg_val, gpc_debug_ctrl;
--- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.h
+++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.h
@@ -64,6 +64,8 @@ void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
 			u32 global_esr);
 u32 gr_gm20b_get_pmm_per_chiplet_offset(void);
 void gm20b_gr_set_debug_mode(struct gk20a *g, bool enable);
+int gm20b_gr_set_mmu_debug_mode(struct gk20a *g,
+		struct nvgpu_channel *ch, bool enable);
 bool gm20b_gr_esr_bpt_pending_events(u32 global_esr,
 				     enum nvgpu_event_id_type bpt_event);
 #endif /* NVGPU_GM20B_GR_GM20B_H */
--- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
@@ -159,6 +159,7 @@ static const struct gpu_ops gm20b_ops = {
 		.get_lrf_tex_ltc_dram_override = NULL,
 		.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
 		.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
+		.set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode,
 		.record_sm_error_state = gm20b_gr_record_sm_error_state,
 		.clear_sm_error_state = gm20b_gr_clear_sm_error_state,
 		.suspend_contexts = gr_gk20a_suspend_contexts,
@@ -1069,6 +1070,7 @@ int gm20b_init_hal(struct gk20a *g)
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, false);
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, false);
 	nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
+	nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, false);

 	g->name = "gm20b";

--- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
@@ -187,6 +187,7 @@ static const struct gpu_ops gp10b_ops = {
 		.get_lrf_tex_ltc_dram_override = get_ecc_override_val,
 		.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
 		.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
+		.set_mmu_debug_mode = NULL,
 		.record_sm_error_state = gm20b_gr_record_sm_error_state,
 		.clear_sm_error_state = gm20b_gr_clear_sm_error_state,
 		.suspend_contexts = gr_gp10b_suspend_contexts,
@@ -1142,6 +1143,7 @@ int gp10b_init_hal(struct gk20a *g)
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, false);
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, true);
 	nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
+	nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, false);

 	g->name = "gp10b";

--- a/drivers/gpu/nvgpu/hal/init/hal_gv100.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv100.c
@@ -305,6 +305,7 @@ static const struct gpu_ops gv100_ops = {
 		.get_offset_in_gpccs_segment =
 			gr_gk20a_get_offset_in_gpccs_segment,
 		.set_debug_mode = gm20b_gr_set_debug_mode,
+		.set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode,
 		.log_mme_exception = NULL,
 		.reset = nvgpu_gr_reset,
 		.esr_bpt_pending_events = gv11b_gr_esr_bpt_pending_events,
@@ -1401,6 +1402,7 @@ int gv100_init_hal(struct gk20a *g)
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, true);
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_SEC2_VM, true);
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_PMU_SUPER_SURFACE, true);
+	nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, false);

 	/*
 	 * gv100 bypasses the IOMMU since it uses the nvlink path memory.
--- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
@@ -275,6 +275,7 @@ static const struct gpu_ops gv11b_ops = {
 		.get_offset_in_gpccs_segment =
 			gr_gk20a_get_offset_in_gpccs_segment,
 		.set_debug_mode = gm20b_gr_set_debug_mode,
+		.set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode,
 		.log_mme_exception = NULL,
 		.get_ctxsw_checksum_mismatch_mailbox_val =
 				gr_gv11b_ctxsw_checksum_mismatch_mailbox_val,
@@ -1294,6 +1295,7 @@ int gv11b_init_hal(struct gk20a *g)
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, true);
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, true);
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC, true);
+	nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, false);

 	/*
 	 * gv11b bypasses the IOMMU since it uses a special nvlink path to
--- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
@@ -285,6 +285,7 @@ static const struct gpu_ops tu104_ops = {
 		.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
 		.get_num_hwpm_perfmon = gr_gv100_get_num_hwpm_perfmon,
 		.set_pmm_register = gr_gv100_set_pmm_register,
+		.set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode,
 		.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
 		.init_hwpm_pmm_register = gr_gv100_init_hwpm_pmm_register,
 		.record_sm_error_state = gv11b_gr_record_sm_error_state,
@@ -1452,6 +1453,7 @@ int tu104_init_hal(struct gk20a *g)
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_SEC2_VM, true);
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_GSP_VM, true);
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_PMU_SUPER_SURFACE, true);
+	nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, true);

 	/* for now */
 	gops->clk.support_clk_freq_controller = false;
--- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
@@ -201,10 +201,12 @@ struct gk20a;
 /* Reduced profile of nvgpu driver */
 #define NVGPU_DRIVER_REDUCED_PROFILE		77

+/* NVGPU_GPU_IOCTL_SET_MMU_DEBUG_MODE is available */
+#define NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE	78
 /*
 * Must be greater than the largest bit offset in the above list.
 */
-#define NVGPU_MAX_ENABLED_BITS			78U
+#define NVGPU_MAX_ENABLED_BITS			79U

 /**
 * nvgpu_is_enabled - Check if the passed flag is enabled.
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -401,6 +401,8 @@ struct gpu_ops {
 			u32 num_ppcs, u32 reg_list_ppc_count,
 			u32 *__offset_in_segment);
 		void (*set_debug_mode)(struct gk20a *g, bool enable);
+		int (*set_mmu_debug_mode)(struct gk20a *g,
+			struct nvgpu_channel *ch, bool enable);
 		void (*log_mme_exception)(struct gk20a *g);
 		int (*reset)(struct gk20a *g);
 		bool (*esr_bpt_pending_events)(u32 global_esr,
--- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
@@ -239,6 +239,8 @@ static struct nvgpu_flags_mapping flags_mapping[] = {
 		NVGPU_SUPPORT_VPR},
 	{NVGPU_GPU_FLAGS_DRIVER_REDUCED_PROFILE,
 		NVGPU_DRIVER_REDUCED_PROFILE},
+	{NVGPU_GPU_FLAGS_SUPPORT_SET_CTX_MMU_DEBUG_MODE,
+		NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE},
 };

 static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g)
--- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
@@ -124,6 +124,10 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
 static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
 			      struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args);

+static int nvgpu_dbg_gpu_ioctl_set_mmu_debug_mode(
+		struct dbg_session_gk20a *dbg_s,
+		struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args *args);
+
 static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
 		struct dbg_session_gk20a *dbg_s,
 		struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args);
@@ -1079,6 +1083,51 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
 	return  err;
 }

+static int nvgpu_dbg_gpu_ioctl_set_mmu_debug_mode(
+		struct dbg_session_gk20a *dbg_s,
+		struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args *args)
+{
+	int err;
+	struct gk20a *g = dbg_s->g;
+	struct nvgpu_channel *ch;
+	bool enable = (args->mode == NVGPU_DBG_GPU_CTX_MMU_DEBUG_MODE_ENABLED);
+
+	nvgpu_log_fn(g, "mode=%u", args->mode);
+
+	if (args->reserved != 0U) {
+		return -EINVAL;
+	}
+
+	if (g->ops.gr.set_mmu_debug_mode == NULL) {
+		return -ENOSYS;
+	}
+
+	err = gk20a_busy(g);
+	if (err) {
+		nvgpu_err(g, "failed to poweron");
+		return err;
+	}
+
+	/* Take the global lock, since we'll be doing global regops */
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+
+	ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
+	if (!ch) {
+		nvgpu_err(g, "no bound channel for mmu debug mode");
+		goto clean_up;
+	}
+
+	err = g->ops.gr.set_mmu_debug_mode(g, ch, enable);
+	if (err) {
+		nvgpu_err(g, "set mmu debug mode failed, err=%d", err);
+	}
+
+clean_up:
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+	gk20a_idle(g);
+	return err;
+}
+
 static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
 		struct dbg_session_gk20a *dbg_s,
 		struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args)
@@ -2110,6 +2159,11 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
 				(struct nvgpu_dbg_gpu_cycle_stats_snapshot_args *)buf);
 		break;
 #endif
+	case NVGPU_DBG_GPU_IOCTL_SET_CTX_MMU_DEBUG_MODE:
+		err = nvgpu_dbg_gpu_ioctl_set_mmu_debug_mode(dbg_s,
+		   (struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args *)buf);
+		break;
+

 	default:
 		nvgpu_err(g,
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -168,6 +168,8 @@ struct nvgpu_gpu_zbc_query_table_args {
 #define NVGPU_GPU_FLAGS_SUPPORT_USERMODE_SUBMIT		(1ULL << 30)
 /* Reduced profile is enabled */
 #define NVGPU_GPU_FLAGS_DRIVER_REDUCED_PROFILE		(1ULL << 31)
+/* Set MMU debug mode is available */
+#define NVGPU_GPU_FLAGS_SUPPORT_SET_CTX_MMU_DEBUG_MODE	(1ULL << 32)
 /* SM LRF ECC is enabled */
 #define NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF	(1ULL << 60)
 /* SM SHM ECC is enabled */
@@ -1448,8 +1450,20 @@ struct nvgpu_dbg_gpu_cycle_stats_snapshot_args {
 #define NVGPU_DBG_GPU_IOCTL_CYCLE_STATS_SNAPSHOT	\
 	_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 25, struct nvgpu_dbg_gpu_cycle_stats_snapshot_args)

+/* MMU Debug Mode */
+#define NVGPU_DBG_GPU_CTX_MMU_DEBUG_MODE_DISABLED	0
+#define NVGPU_DBG_GPU_CTX_MMU_DEBUG_MODE_ENABLED	1
+
+struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args {
+	__u32 mode;
+	__u32 reserved;
+};
+#define NVGPU_DBG_GPU_IOCTL_SET_CTX_MMU_DEBUG_MODE	\
+	_IOW(NVGPU_DBG_GPU_IOCTL_MAGIC, 26, \
+	struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args)
+
 #define NVGPU_DBG_GPU_IOCTL_LAST		\
-	_IOC_NR(NVGPU_DBG_GPU_IOCTL_CYCLE_STATS_SNAPSHOT)
+	_IOC_NR(NVGPU_DBG_GPU_IOCTL_SET_CTX_MMU_DEBUG_MODE)

 #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE		\
 	sizeof(struct nvgpu_dbg_gpu_access_fb_memory_args)