diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c index db4b66d48..cca7eff5f 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c @@ -182,6 +182,7 @@ static const struct gpu_ops vgpu_gp10b_ops = { .get_offset_in_gpccs_segment = gr_gk20a_get_offset_in_gpccs_segment, .set_debug_mode = gm20b_gr_set_debug_mode, + .set_mmu_debug_mode = NULL, .reset = NULL, .halt_pipe = NULL, .disable_ctxsw = nvgpu_gr_disable_ctxsw, diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_gv11b.c index ea0f392da..375e2f045 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_gv11b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_gv11b.c @@ -43,4 +43,5 @@ void vgpu_gv11b_init_gpu_characteristics(struct gk20a *g) nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC, true); + nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, false); } diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c index d1067ddf1..fd9eaa703 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c @@ -215,6 +215,7 @@ static const struct gpu_ops vgpu_gv11b_ops = { .get_offset_in_gpccs_segment = gr_gk20a_get_offset_in_gpccs_segment, .set_debug_mode = gm20b_gr_set_debug_mode, + .set_mmu_debug_mode = NULL, .reset = NULL, .halt_pipe = NULL, .disable_ctxsw = nvgpu_gr_disable_ctxsw, diff --git a/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c index 701197cd6..e2175cd5d 100644 --- a/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c @@ -114,6 +114,7 @@ void vgpu_init_gpu_characteristics(struct gk20a *g) nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH, false); nvgpu_set_enabled(g, NVGPU_SUPPORT_RESCHEDULE_RUNLIST, false); nvgpu_set_enabled(g, NVGPU_SUPPORT_SPARSE_ALLOCS, false); + nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, false); } int vgpu_get_constants(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.c index 4da1c8b43..0abdd52af 100644 --- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.c +++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -673,6 +674,26 @@ void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset, 0); } +int gm20b_gr_set_mmu_debug_mode(struct gk20a *g, + struct nvgpu_channel *ch, bool enable) +{ + struct nvgpu_dbg_reg_op ctx_ops = { + .op = REGOP(WRITE_32), + .type = REGOP(TYPE_GR_CTX), + .offset = gr_gpcs_pri_mmu_debug_ctrl_r(), + .value_lo = enable ? + gr_gpcs_pri_mmu_debug_ctrl_debug_enabled_f() : + gr_gpcs_pri_mmu_debug_ctrl_debug_disabled_f(), + }; + int err; + + err = gr_gk20a_exec_ctx_ops(ch, &ctx_ops, 1, 1, 0, NULL); + if (err != 0) { + nvgpu_err(g, "Failed to access register"); + } + return err; +} + void gm20b_gr_set_debug_mode(struct gk20a *g, bool enable) { u32 reg_val, gpc_debug_ctrl; diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.h b/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.h index bb4d13d03..63355f706 100644 --- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.h +++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.h @@ -64,6 +64,8 @@ void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, u32 global_esr); u32 gr_gm20b_get_pmm_per_chiplet_offset(void); void gm20b_gr_set_debug_mode(struct gk20a *g, bool enable); +int gm20b_gr_set_mmu_debug_mode(struct gk20a *g, + struct nvgpu_channel *ch, bool enable); bool gm20b_gr_esr_bpt_pending_events(u32 global_esr, enum nvgpu_event_id_type bpt_event); #endif /* NVGPU_GM20B_GR_GM20B_H */ diff --git a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c index 1d68f0a67..dbf898cc7 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c @@ -159,6 +159,7 @@ static const struct gpu_ops gm20b_ops = { .get_lrf_tex_ltc_dram_override = NULL, .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode, .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode, + .set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode, .record_sm_error_state = gm20b_gr_record_sm_error_state, .clear_sm_error_state = gm20b_gr_clear_sm_error_state, .suspend_contexts = gr_gk20a_suspend_contexts, @@ -1069,6 +1070,7 @@ int gm20b_init_hal(struct gk20a *g) nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, false); nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, false); nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false); + nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, false); g->name = "gm20b"; diff --git a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c index 7756d500c..c3970a79e 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c @@ -187,6 +187,7 @@ static const struct gpu_ops gp10b_ops = { .get_lrf_tex_ltc_dram_override = get_ecc_override_val, .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode, .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode, + .set_mmu_debug_mode = NULL, .record_sm_error_state = gm20b_gr_record_sm_error_state, .clear_sm_error_state = gm20b_gr_clear_sm_error_state, .suspend_contexts = gr_gp10b_suspend_contexts, @@ -1142,6 +1143,7 @@ int gp10b_init_hal(struct gk20a *g) nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, false); nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, true); nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false); + nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, false); g->name = "gp10b"; diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv100.c b/drivers/gpu/nvgpu/hal/init/hal_gv100.c index 4eacb1a77..1319ce6d4 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gv100.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gv100.c @@ -305,6 +305,7 @@ static const struct gpu_ops gv100_ops = { .get_offset_in_gpccs_segment = gr_gk20a_get_offset_in_gpccs_segment, .set_debug_mode = gm20b_gr_set_debug_mode, + .set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode, .log_mme_exception = NULL, .reset = nvgpu_gr_reset, .esr_bpt_pending_events = gv11b_gr_esr_bpt_pending_events, @@ -1401,6 +1402,7 @@ int gv100_init_hal(struct gk20a *g) nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_SEC2_VM, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_PMU_SUPER_SURFACE, true); + nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, false); /* * gv100 bypasses the IOMMU since it uses the nvlink path memory. diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c index c8a8cf08b..357e74f5b 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c @@ -275,6 +275,7 @@ static const struct gpu_ops gv11b_ops = { .get_offset_in_gpccs_segment = gr_gk20a_get_offset_in_gpccs_segment, .set_debug_mode = gm20b_gr_set_debug_mode, + .set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode, .log_mme_exception = NULL, .get_ctxsw_checksum_mismatch_mailbox_val = gr_gv11b_ctxsw_checksum_mismatch_mailbox_val, @@ -1294,6 +1295,7 @@ int gv11b_init_hal(struct gk20a *g) nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC, true); + nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, false); /* * gv11b bypasses the IOMMU since it uses a special nvlink path to diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c index 388bbdb92..d92ecfd9b 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c +++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c @@ -285,6 +285,7 @@ static const struct gpu_ops tu104_ops = { .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode, .get_num_hwpm_perfmon = gr_gv100_get_num_hwpm_perfmon, .set_pmm_register = gr_gv100_set_pmm_register, + .set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode, .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode, .init_hwpm_pmm_register = gr_gv100_init_hwpm_pmm_register, .record_sm_error_state = gv11b_gr_record_sm_error_state, @@ -1452,6 +1453,7 @@ int tu104_init_hal(struct gk20a *g) nvgpu_set_enabled(g, NVGPU_SUPPORT_SEC2_VM, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_GSP_VM, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_PMU_SUPER_SURFACE, true); + nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, true); /* for now */ gops->clk.support_clk_freq_controller = false; diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h index c0e34ee61..4f2284ee6 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h +++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h @@ -201,10 +201,12 @@ struct gk20a; /* Reduced profile of nvgpu driver */ #define NVGPU_DRIVER_REDUCED_PROFILE 77 +/* NVGPU_GPU_IOCTL_SET_MMU_DEBUG_MODE is available */ +#define NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE 78 /* * Must be greater than the largest bit offset in the above list. */ -#define NVGPU_MAX_ENABLED_BITS 78U +#define NVGPU_MAX_ENABLED_BITS 79U /** * nvgpu_is_enabled - Check if the passed flag is enabled. diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index b02756a63..d0b5c4517 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -401,6 +401,8 @@ struct gpu_ops { u32 num_ppcs, u32 reg_list_ppc_count, u32 *__offset_in_segment); void (*set_debug_mode)(struct gk20a *g, bool enable); + int (*set_mmu_debug_mode)(struct gk20a *g, + struct nvgpu_channel *ch, bool enable); void (*log_mme_exception)(struct gk20a *g); int (*reset)(struct gk20a *g); bool (*esr_bpt_pending_events)(u32 global_esr, diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c index 58fddfe87..8f357f3f9 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c @@ -239,6 +239,8 @@ static struct nvgpu_flags_mapping flags_mapping[] = { NVGPU_SUPPORT_VPR}, {NVGPU_GPU_FLAGS_DRIVER_REDUCED_PROFILE, NVGPU_DRIVER_REDUCED_PROFILE}, + {NVGPU_GPU_FLAGS_SUPPORT_SET_CTX_MMU_DEBUG_MODE, + NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE}, }; static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c index d8543b1c4..f0857fd36 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c @@ -124,6 +124,10 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args); +static int nvgpu_dbg_gpu_ioctl_set_mmu_debug_mode( + struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args *args); + static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args); @@ -1079,6 +1083,51 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s, return err; } +static int nvgpu_dbg_gpu_ioctl_set_mmu_debug_mode( + struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args *args) +{ + int err; + struct gk20a *g = dbg_s->g; + struct nvgpu_channel *ch; + bool enable = (args->mode == NVGPU_DBG_GPU_CTX_MMU_DEBUG_MODE_ENABLED); + + nvgpu_log_fn(g, "mode=%u", args->mode); + + if (args->reserved != 0U) { + return -EINVAL; + } + + if (g->ops.gr.set_mmu_debug_mode == NULL) { + return -ENOSYS; + } + + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, "failed to poweron"); + return err; + } + + /* Take the global lock, since we'll be doing global regops */ + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + + ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); + if (!ch) { + nvgpu_err(g, "no bound channel for mmu debug mode"); + goto clean_up; + } + + err = g->ops.gr.set_mmu_debug_mode(g, ch, enable); + if (err) { + nvgpu_err(g, "set mmu debug mode failed, err=%d", err); + } + +clean_up: + nvgpu_mutex_release(&g->dbg_sessions_lock); + gk20a_idle(g); + return err; +} + static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args) @@ -2110,6 +2159,11 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, (struct nvgpu_dbg_gpu_cycle_stats_snapshot_args *)buf); break; #endif + case NVGPU_DBG_GPU_IOCTL_SET_CTX_MMU_DEBUG_MODE: + err = nvgpu_dbg_gpu_ioctl_set_mmu_debug_mode(dbg_s, + (struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args *)buf); + break; + default: nvgpu_err(g, diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 45e8ae79e..f5f4c6280 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -168,6 +168,8 @@ struct nvgpu_gpu_zbc_query_table_args { #define NVGPU_GPU_FLAGS_SUPPORT_USERMODE_SUBMIT (1ULL << 30) /* Reduced profile is enabled */ #define NVGPU_GPU_FLAGS_DRIVER_REDUCED_PROFILE (1ULL << 31) +/* Set MMU debug mode is available */ +#define NVGPU_GPU_FLAGS_SUPPORT_SET_CTX_MMU_DEBUG_MODE (1ULL << 32) /* SM LRF ECC is enabled */ #define NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF (1ULL << 60) /* SM SHM ECC is enabled */ @@ -1448,8 +1450,20 @@ struct nvgpu_dbg_gpu_cycle_stats_snapshot_args { #define NVGPU_DBG_GPU_IOCTL_CYCLE_STATS_SNAPSHOT \ _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 25, struct nvgpu_dbg_gpu_cycle_stats_snapshot_args) +/* MMU Debug Mode */ +#define NVGPU_DBG_GPU_CTX_MMU_DEBUG_MODE_DISABLED 0 +#define NVGPU_DBG_GPU_CTX_MMU_DEBUG_MODE_ENABLED 1 + +struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args { + __u32 mode; + __u32 reserved; +}; +#define NVGPU_DBG_GPU_IOCTL_SET_CTX_MMU_DEBUG_MODE \ + _IOW(NVGPU_DBG_GPU_IOCTL_MAGIC, 26, \ + struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args) + #define NVGPU_DBG_GPU_IOCTL_LAST \ - _IOC_NR(NVGPU_DBG_GPU_IOCTL_CYCLE_STATS_SNAPSHOT) + _IOC_NR(NVGPU_DBG_GPU_IOCTL_SET_CTX_MMU_DEBUG_MODE) #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \ sizeof(struct nvgpu_dbg_gpu_access_fb_memory_args)