gpu: nvgpu: add ioctl to configure implicit ERRBAR

Add ioctl support to configure implicit ERRBAR by setting/unsetting
NV_PGRAPH_PRI_GPCS_TPCS_SM_SCH_MACRO_SCHED register.

Add gpu characteritics flag: NVGPU_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED
to allow userspace driver to determine if implicit ERRBAR ioctl is
supported.

Bug: 200782861

Change-Id: I530a4cf73bc5c844e8d73094d3e23949568fe335
Signed-off-by: atanand <atanand@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2718672
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Sagar Kamble <skamble@nvidia.com>
Reviewed-by: Vaibhav Kachore <vkachore@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
atanand
2022-05-26 09:34:15 +00:00
committed by mobile promotions
parent faf18009cb
commit eae4593343
15 changed files with 181 additions and 2 deletions

View File

@@ -1112,6 +1112,28 @@ const u32 *ga10b_gr_get_hwpm_cau_init_data(u32 *count)
return hwpm_cau_init_data;
}
int ga10b_gr_set_sched_wait_for_errbar(struct gk20a *g,
struct nvgpu_channel *ch, bool enable)
{
struct nvgpu_dbg_reg_op ctx_ops = {
.op = REGOP(WRITE_32),
.type = REGOP(TYPE_GR_CTX),
.offset = gr_gpcs_pri_tpcs_sm_sch_macro_sched_r(),
.value_lo = enable ?
gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_enabled_f() :
gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_disabled_f(),
};
int err;
struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch);
u32 flags = NVGPU_REG_OP_FLAG_MODE_ALL_OR_NONE;
err = gr_gk20a_exec_ctx_ops(tsg, &ctx_ops, 1, 1, 0, &flags);
if (err != 0) {
nvgpu_err(g, "update implicit ERRBAR failed");
}
return err;
}
#endif /* CONFIG_NVGPU_DEBUGGER */
#ifdef CONFIG_NVGPU_HAL_NON_FUSA

View File

@@ -67,5 +67,7 @@ int gr_ga10b_find_priv_offset_in_buffer(struct gk20a *g, u32 addr,
u32 context_buffer_size,
u32 *priv_offset);
const u32 *ga10b_gr_get_hwpm_cau_init_data(u32 *count);
int ga10b_gr_set_sched_wait_for_errbar(struct gk20a *g,
struct nvgpu_channel *ch, bool enable);
#endif /* CONFIG_NVGPU_DEBUGGER */
#endif /* NVGPU_GR_GA10B_H */

View File

@@ -859,6 +859,7 @@ static const struct gops_gr ga100_ops_gr = {
gr_ga100_process_context_buffer_priv_segment,
.set_debug_mode = gm20b_gr_set_debug_mode,
.set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode,
.set_sched_wait_for_errbar = ga10b_gr_set_sched_wait_for_errbar,
.esr_bpt_pending_events = gv11b_gr_esr_bpt_pending_events,
.get_ctx_buffer_offsets = gr_gk20a_get_ctx_buffer_offsets,
.get_pm_ctx_buffer_offsets = gr_gk20a_get_pm_ctx_buffer_offsets,
@@ -1903,6 +1904,7 @@ int ga100_init_hal(struct gk20a *g)
#endif
#ifdef CONFIG_NVGPU_DEBUGGER
nvgpu_set_enabled(g, NVGPU_L2_MAX_WAYS_EVICT_LAST_ENABLED, true);
nvgpu_set_enabled(g, NVGPU_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED, true);
#endif
/*
* Tu104 has multiple async-LCE (3), GRCE (2) and PCE (4).

View File

@@ -867,6 +867,7 @@ static const struct gops_gr ga10b_ops_gr = {
gr_ga10b_process_context_buffer_priv_segment,
.set_debug_mode = gm20b_gr_set_debug_mode,
.set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode,
.set_sched_wait_for_errbar = ga10b_gr_set_sched_wait_for_errbar,
.esr_bpt_pending_events = gv11b_gr_esr_bpt_pending_events,
.get_ctx_buffer_offsets = gr_gk20a_get_ctx_buffer_offsets,
.get_pm_ctx_buffer_offsets = gr_gk20a_get_pm_ctx_buffer_offsets,
@@ -1932,6 +1933,7 @@ int ga10b_init_hal(struct gk20a *g)
#endif
#ifdef CONFIG_NVGPU_DEBUGGER
nvgpu_set_enabled(g, NVGPU_L2_MAX_WAYS_EVICT_LAST_ENABLED, true);
nvgpu_set_enabled(g, NVGPU_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED, true);
#endif
if (g->ops.pmu.is_pmu_supported(g)) {