gpu: nvgpu: add SET_CTX_MMU_DEBUG_MODE ioctl

Added NVGPU_DBG_GPU_IOCTL_SET_CTX_MMU_DEBUG_MODE ioctl to set MMU
debug mode for a given context.

Added gr.set_mmu_debug_mode HAL to change NV_PGPC_PRI_MMU_DEBUG_CTRL
for a given channel. HAL implementation for native case is
gm20b_gr_set_mmu_debug_mode. It internally uses regops, which directly
writes to the register if the context is resident, or writes to
gr context otherwise.

Added NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE to enable the feature.
NV_PGPC_PRI_MMU_DEBUG_CTRL has to be context switched in FECS ucode,
so the feature is only enabled on TU104 for now.

Bug 2515097

Change-Id: Ib4efaf06fc47a8539b4474f94c68c20ce225263f
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2110720
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Thomas Fleury
2019-04-30 17:19:51 -07:00
committed by mobile promotions
parent 5c09935297
commit af2ccb811d
16 changed files with 113 additions and 2 deletions

View File

@@ -182,6 +182,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.get_offset_in_gpccs_segment =
gr_gk20a_get_offset_in_gpccs_segment,
.set_debug_mode = gm20b_gr_set_debug_mode,
.set_mmu_debug_mode = NULL,
.reset = NULL,
.halt_pipe = NULL,
.disable_ctxsw = nvgpu_gr_disable_ctxsw,

View File

@@ -43,4 +43,5 @@ void vgpu_gv11b_init_gpu_characteristics(struct gk20a *g)
nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, false);
}

View File

@@ -215,6 +215,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.get_offset_in_gpccs_segment =
gr_gk20a_get_offset_in_gpccs_segment,
.set_debug_mode = gm20b_gr_set_debug_mode,
.set_mmu_debug_mode = NULL,
.reset = NULL,
.halt_pipe = NULL,
.disable_ctxsw = nvgpu_gr_disable_ctxsw,

View File

@@ -114,6 +114,7 @@ void vgpu_init_gpu_characteristics(struct gk20a *g)
nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH, false);
nvgpu_set_enabled(g, NVGPU_SUPPORT_RESCHEDULE_RUNLIST, false);
nvgpu_set_enabled(g, NVGPU_SUPPORT_SPARSE_ALLOCS, false);
nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, false);
}
int vgpu_get_constants(struct gk20a *g)

View File

@@ -31,6 +31,7 @@
#include <nvgpu/utils.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/channel.h>
#include <nvgpu/regops.h>
#include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/gr/gr.h>
@@ -673,6 +674,26 @@ void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset, 0);
}
int gm20b_gr_set_mmu_debug_mode(struct gk20a *g,
struct nvgpu_channel *ch, bool enable)
{
struct nvgpu_dbg_reg_op ctx_ops = {
.op = REGOP(WRITE_32),
.type = REGOP(TYPE_GR_CTX),
.offset = gr_gpcs_pri_mmu_debug_ctrl_r(),
.value_lo = enable ?
gr_gpcs_pri_mmu_debug_ctrl_debug_enabled_f() :
gr_gpcs_pri_mmu_debug_ctrl_debug_disabled_f(),
};
int err;
err = gr_gk20a_exec_ctx_ops(ch, &ctx_ops, 1, 1, 0, NULL);
if (err != 0) {
nvgpu_err(g, "Failed to access register");
}
return err;
}
void gm20b_gr_set_debug_mode(struct gk20a *g, bool enable)
{
u32 reg_val, gpc_debug_ctrl;

View File

@@ -64,6 +64,8 @@ void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
u32 global_esr);
u32 gr_gm20b_get_pmm_per_chiplet_offset(void);
void gm20b_gr_set_debug_mode(struct gk20a *g, bool enable);
int gm20b_gr_set_mmu_debug_mode(struct gk20a *g,
struct nvgpu_channel *ch, bool enable);
bool gm20b_gr_esr_bpt_pending_events(u32 global_esr,
enum nvgpu_event_id_type bpt_event);
#endif /* NVGPU_GM20B_GR_GM20B_H */

View File

@@ -159,6 +159,7 @@ static const struct gpu_ops gm20b_ops = {
.get_lrf_tex_ltc_dram_override = NULL,
.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
.set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode,
.record_sm_error_state = gm20b_gr_record_sm_error_state,
.clear_sm_error_state = gm20b_gr_clear_sm_error_state,
.suspend_contexts = gr_gk20a_suspend_contexts,
@@ -1069,6 +1070,7 @@ int gm20b_init_hal(struct gk20a *g)
nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, false);
nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, false);
nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, false);
g->name = "gm20b";

View File

@@ -187,6 +187,7 @@ static const struct gpu_ops gp10b_ops = {
.get_lrf_tex_ltc_dram_override = get_ecc_override_val,
.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
.set_mmu_debug_mode = NULL,
.record_sm_error_state = gm20b_gr_record_sm_error_state,
.clear_sm_error_state = gm20b_gr_clear_sm_error_state,
.suspend_contexts = gr_gp10b_suspend_contexts,
@@ -1142,6 +1143,7 @@ int gp10b_init_hal(struct gk20a *g)
nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, false);
nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, true);
nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, false);
g->name = "gp10b";

View File

@@ -305,6 +305,7 @@ static const struct gpu_ops gv100_ops = {
.get_offset_in_gpccs_segment =
gr_gk20a_get_offset_in_gpccs_segment,
.set_debug_mode = gm20b_gr_set_debug_mode,
.set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode,
.log_mme_exception = NULL,
.reset = nvgpu_gr_reset,
.esr_bpt_pending_events = gv11b_gr_esr_bpt_pending_events,
@@ -1401,6 +1402,7 @@ int gv100_init_hal(struct gk20a *g)
nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_SEC2_VM, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_PMU_SUPER_SURFACE, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, false);
/*
* gv100 bypasses the IOMMU since it uses the nvlink path memory.

View File

@@ -275,6 +275,7 @@ static const struct gpu_ops gv11b_ops = {
.get_offset_in_gpccs_segment =
gr_gk20a_get_offset_in_gpccs_segment,
.set_debug_mode = gm20b_gr_set_debug_mode,
.set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode,
.log_mme_exception = NULL,
.get_ctxsw_checksum_mismatch_mailbox_val =
gr_gv11b_ctxsw_checksum_mismatch_mailbox_val,
@@ -1294,6 +1295,7 @@ int gv11b_init_hal(struct gk20a *g)
nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, false);
/*
* gv11b bypasses the IOMMU since it uses a special nvlink path to

View File

@@ -285,6 +285,7 @@ static const struct gpu_ops tu104_ops = {
.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
.get_num_hwpm_perfmon = gr_gv100_get_num_hwpm_perfmon,
.set_pmm_register = gr_gv100_set_pmm_register,
.set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode,
.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
.init_hwpm_pmm_register = gr_gv100_init_hwpm_pmm_register,
.record_sm_error_state = gv11b_gr_record_sm_error_state,
@@ -1452,6 +1453,7 @@ int tu104_init_hal(struct gk20a *g)
nvgpu_set_enabled(g, NVGPU_SUPPORT_SEC2_VM, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_GSP_VM, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_PMU_SUPER_SURFACE, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, true);
/* for now */
gops->clk.support_clk_freq_controller = false;

View File

@@ -201,10 +201,12 @@ struct gk20a;
/* Reduced profile of nvgpu driver */
#define NVGPU_DRIVER_REDUCED_PROFILE 77
/* NVGPU_GPU_IOCTL_SET_MMU_DEBUG_MODE is available */
#define NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE 78
/*
* Must be greater than the largest bit offset in the above list.
*/
#define NVGPU_MAX_ENABLED_BITS 78U
#define NVGPU_MAX_ENABLED_BITS 79U
/**
* nvgpu_is_enabled - Check if the passed flag is enabled.

View File

@@ -401,6 +401,8 @@ struct gpu_ops {
u32 num_ppcs, u32 reg_list_ppc_count,
u32 *__offset_in_segment);
void (*set_debug_mode)(struct gk20a *g, bool enable);
int (*set_mmu_debug_mode)(struct gk20a *g,
struct nvgpu_channel *ch, bool enable);
void (*log_mme_exception)(struct gk20a *g);
int (*reset)(struct gk20a *g);
bool (*esr_bpt_pending_events)(u32 global_esr,

View File

@@ -239,6 +239,8 @@ static struct nvgpu_flags_mapping flags_mapping[] = {
NVGPU_SUPPORT_VPR},
{NVGPU_GPU_FLAGS_DRIVER_REDUCED_PROFILE,
NVGPU_DRIVER_REDUCED_PROFILE},
{NVGPU_GPU_FLAGS_SUPPORT_SET_CTX_MMU_DEBUG_MODE,
NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE},
};
static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g)

View File

@@ -124,6 +124,10 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args);
static int nvgpu_dbg_gpu_ioctl_set_mmu_debug_mode(
struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args *args);
static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args);
@@ -1079,6 +1083,51 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
return err;
}
static int nvgpu_dbg_gpu_ioctl_set_mmu_debug_mode(
struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args *args)
{
int err;
struct gk20a *g = dbg_s->g;
struct nvgpu_channel *ch;
bool enable = (args->mode == NVGPU_DBG_GPU_CTX_MMU_DEBUG_MODE_ENABLED);
nvgpu_log_fn(g, "mode=%u", args->mode);
if (args->reserved != 0U) {
return -EINVAL;
}
if (g->ops.gr.set_mmu_debug_mode == NULL) {
return -ENOSYS;
}
err = gk20a_busy(g);
if (err) {
nvgpu_err(g, "failed to poweron");
return err;
}
/* Take the global lock, since we'll be doing global regops */
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
if (!ch) {
nvgpu_err(g, "no bound channel for mmu debug mode");
goto clean_up;
}
err = g->ops.gr.set_mmu_debug_mode(g, ch, enable);
if (err) {
nvgpu_err(g, "set mmu debug mode failed, err=%d", err);
}
clean_up:
nvgpu_mutex_release(&g->dbg_sessions_lock);
gk20a_idle(g);
return err;
}
static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args)
@@ -2110,6 +2159,11 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
(struct nvgpu_dbg_gpu_cycle_stats_snapshot_args *)buf);
break;
#endif
case NVGPU_DBG_GPU_IOCTL_SET_CTX_MMU_DEBUG_MODE:
err = nvgpu_dbg_gpu_ioctl_set_mmu_debug_mode(dbg_s,
(struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args *)buf);
break;
default:
nvgpu_err(g,

View File

@@ -168,6 +168,8 @@ struct nvgpu_gpu_zbc_query_table_args {
#define NVGPU_GPU_FLAGS_SUPPORT_USERMODE_SUBMIT (1ULL << 30)
/* Reduced profile is enabled */
#define NVGPU_GPU_FLAGS_DRIVER_REDUCED_PROFILE (1ULL << 31)
/* Set MMU debug mode is available */
#define NVGPU_GPU_FLAGS_SUPPORT_SET_CTX_MMU_DEBUG_MODE (1ULL << 32)
/* SM LRF ECC is enabled */
#define NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF (1ULL << 60)
/* SM SHM ECC is enabled */
@@ -1448,8 +1450,20 @@ struct nvgpu_dbg_gpu_cycle_stats_snapshot_args {
#define NVGPU_DBG_GPU_IOCTL_CYCLE_STATS_SNAPSHOT \
_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 25, struct nvgpu_dbg_gpu_cycle_stats_snapshot_args)
/* MMU Debug Mode */
#define NVGPU_DBG_GPU_CTX_MMU_DEBUG_MODE_DISABLED 0
#define NVGPU_DBG_GPU_CTX_MMU_DEBUG_MODE_ENABLED 1
struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args {
__u32 mode;
__u32 reserved;
};
#define NVGPU_DBG_GPU_IOCTL_SET_CTX_MMU_DEBUG_MODE \
_IOW(NVGPU_DBG_GPU_IOCTL_MAGIC, 26, \
struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args)
#define NVGPU_DBG_GPU_IOCTL_LAST \
_IOC_NR(NVGPU_DBG_GPU_IOCTL_CYCLE_STATS_SNAPSHOT)
_IOC_NR(NVGPU_DBG_GPU_IOCTL_SET_CTX_MMU_DEBUG_MODE)
#define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \
sizeof(struct nvgpu_dbg_gpu_access_fb_memory_args)