gpu: nvgpu: add ioctl to configure implicit ERRBAR

Add ioctl support to configure implicit ERRBAR by setting/unsetting
NV_PGRAPH_PRI_GPCS_TPCS_SM_SCH_MACRO_SCHED register.

Add gpu characteritics flag: NVGPU_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED
to allow userspace driver to determine if implicit ERRBAR ioctl is
supported.

Bug: 200782861

Change-Id: I530a4cf73bc5c844e8d73094d3e23949568fe335
Signed-off-by: atanand <atanand@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2718672
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Sagar Kamble <skamble@nvidia.com>
Reviewed-by: Vaibhav Kachore <vkachore@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
atanand
2022-05-26 09:34:15 +00:00
committed by mobile promotions
parent faf18009cb
commit eae4593343
15 changed files with 181 additions and 2 deletions

View File

@@ -27,6 +27,7 @@
#include <nvgpu/os_sched.h>
#include <nvgpu/channel.h>
#include <nvgpu/tsg.h>
#include <nvgpu/atomic.h>
#include <nvgpu/rc.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/error_notifier.h>
@@ -273,6 +274,14 @@ static int nvgpu_tsg_unbind_channel_common(struct nvgpu_tsg *tsg,
break;
}
}
while (nvgpu_atomic_read(&ch->sched_exit_wait_for_errbar_refcnt) > 0) {
err = nvgpu_tsg_set_sched_exit_wait_for_errbar(ch, false);
if (err != 0) {
nvgpu_err(g, "disable implicit ERRBAR failed ch:%u",
ch->chid);
break;
}
}
#endif
/* Remove channel from TSG and re-enable rest of the channels */
@@ -377,6 +386,14 @@ fail:
break;
}
}
while (nvgpu_atomic_read(&ch->sched_exit_wait_for_errbar_refcnt) > 0) {
err = nvgpu_tsg_set_sched_exit_wait_for_errbar(ch, false);
if (err != 0) {
nvgpu_err(g, "disable implicit ERRBAR failed ch:%u",
ch->chid);
break;
}
}
#endif
nvgpu_rwsem_down_write(&tsg->ch_list_lock);
@@ -1214,4 +1231,46 @@ int nvgpu_tsg_set_mmu_debug_mode(struct nvgpu_channel *ch, bool enable)
return err;
}
int nvgpu_tsg_set_sched_exit_wait_for_errbar(struct nvgpu_channel *ch, bool enable)
{
struct gk20a *g;
int err = 0;
struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch);
if (tsg == NULL) {
return -EINVAL;
}
g = ch->g;
if (g->ops.gr.set_sched_wait_for_errbar == NULL) {
return -ENOSYS;
}
if (enable) {
nvgpu_atomic_inc(&ch->sched_exit_wait_for_errbar_refcnt);
nvgpu_atomic_inc(&tsg->sched_exit_wait_for_errbar_refcnt);
} else {
if (nvgpu_atomic_read(&ch->sched_exit_wait_for_errbar_refcnt) != 0) {
nvgpu_atomic_dec(&ch->sched_exit_wait_for_errbar_refcnt);
}
if (nvgpu_atomic_read(&tsg->sched_exit_wait_for_errbar_refcnt) != 0) {
nvgpu_atomic_dec(&tsg->sched_exit_wait_for_errbar_refcnt);
}
}
/*
* enable GPC implict ERRBAR if it was requested for at
* least one channel in the TSG
*/
err = g->ops.gr.set_sched_wait_for_errbar(g, ch,
nvgpu_atomic_read(&tsg->sched_exit_wait_for_errbar_refcnt) > 0);
if (err != 0) {
nvgpu_err(g, "set implicit ERRBAR failed, err=%d", err);
return err;
}
return err;
}
#endif

View File

@@ -1112,6 +1112,28 @@ const u32 *ga10b_gr_get_hwpm_cau_init_data(u32 *count)
return hwpm_cau_init_data;
}
int ga10b_gr_set_sched_wait_for_errbar(struct gk20a *g,
struct nvgpu_channel *ch, bool enable)
{
struct nvgpu_dbg_reg_op ctx_ops = {
.op = REGOP(WRITE_32),
.type = REGOP(TYPE_GR_CTX),
.offset = gr_gpcs_pri_tpcs_sm_sch_macro_sched_r(),
.value_lo = enable ?
gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_enabled_f() :
gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_disabled_f(),
};
int err;
struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch);
u32 flags = NVGPU_REG_OP_FLAG_MODE_ALL_OR_NONE;
err = gr_gk20a_exec_ctx_ops(tsg, &ctx_ops, 1, 1, 0, &flags);
if (err != 0) {
nvgpu_err(g, "update implicit ERRBAR failed");
}
return err;
}
#endif /* CONFIG_NVGPU_DEBUGGER */
#ifdef CONFIG_NVGPU_HAL_NON_FUSA

View File

@@ -67,5 +67,7 @@ int gr_ga10b_find_priv_offset_in_buffer(struct gk20a *g, u32 addr,
u32 context_buffer_size,
u32 *priv_offset);
const u32 *ga10b_gr_get_hwpm_cau_init_data(u32 *count);
int ga10b_gr_set_sched_wait_for_errbar(struct gk20a *g,
struct nvgpu_channel *ch, bool enable);
#endif /* CONFIG_NVGPU_DEBUGGER */
#endif /* NVGPU_GR_GA10B_H */

View File

@@ -859,6 +859,7 @@ static const struct gops_gr ga100_ops_gr = {
gr_ga100_process_context_buffer_priv_segment,
.set_debug_mode = gm20b_gr_set_debug_mode,
.set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode,
.set_sched_wait_for_errbar = ga10b_gr_set_sched_wait_for_errbar,
.esr_bpt_pending_events = gv11b_gr_esr_bpt_pending_events,
.get_ctx_buffer_offsets = gr_gk20a_get_ctx_buffer_offsets,
.get_pm_ctx_buffer_offsets = gr_gk20a_get_pm_ctx_buffer_offsets,
@@ -1903,6 +1904,7 @@ int ga100_init_hal(struct gk20a *g)
#endif
#ifdef CONFIG_NVGPU_DEBUGGER
nvgpu_set_enabled(g, NVGPU_L2_MAX_WAYS_EVICT_LAST_ENABLED, true);
nvgpu_set_enabled(g, NVGPU_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED, true);
#endif
/*
* Tu104 has multiple async-LCE (3), GRCE (2) and PCE (4).

View File

@@ -867,6 +867,7 @@ static const struct gops_gr ga10b_ops_gr = {
gr_ga10b_process_context_buffer_priv_segment,
.set_debug_mode = gm20b_gr_set_debug_mode,
.set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode,
.set_sched_wait_for_errbar = ga10b_gr_set_sched_wait_for_errbar,
.esr_bpt_pending_events = gv11b_gr_esr_bpt_pending_events,
.get_ctx_buffer_offsets = gr_gk20a_get_ctx_buffer_offsets,
.get_pm_ctx_buffer_offsets = gr_gk20a_get_pm_ctx_buffer_offsets,
@@ -1932,6 +1933,7 @@ int ga10b_init_hal(struct gk20a *g)
#endif
#ifdef CONFIG_NVGPU_DEBUGGER
nvgpu_set_enabled(g, NVGPU_L2_MAX_WAYS_EVICT_LAST_ENABLED, true);
nvgpu_set_enabled(g, NVGPU_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED, true);
#endif
if (g->ops.pmu.is_pmu_supported(g)) {

View File

@@ -548,6 +548,10 @@ struct nvgpu_channel {
* MMU Debugger Mode is enabled for this channel if refcnt > 0
*/
u32 mmu_debug_mode_refcnt;
/**
* ERRBAR is enabled for this channel if refcnt > 0
*/
nvgpu_atomic_t sched_exit_wait_for_errbar_refcnt;
#endif
};

View File

@@ -228,6 +228,8 @@ struct gk20a;
"Emulate mode support"), \
DEFINE_FLAG(NVGPU_SUPPORT_PES_FS, \
"PES Floorsweeping"), \
DEFINE_FLAG(NVGPU_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED, \
"Implicit ERRBAR support"), \
DEFINE_FLAG(NVGPU_MAX_ENABLED_BITS, "Marks max number of flags"),
/**

View File

@@ -1367,6 +1367,8 @@ struct gops_gr {
u32 num_ppcs, u32 ppc_mask,
u32 *priv_offset);
void (*set_debug_mode)(struct gk20a *g, bool enable);
int (*set_sched_wait_for_errbar)(struct gk20a *g,
struct nvgpu_channel *ch, bool enable);
int (*set_mmu_debug_mode)(struct gk20a *g,
struct nvgpu_channel *ch, bool enable);
bool (*esr_bpt_pending_events)(u32 global_esr,

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -1258,4 +1258,11 @@
#define gr_gpcs_tpcs_sm_l1tag_ctrl_surface_cut_collector_enable_f()\
(0x20000000U)
#define gr_gpc0_tpc0_sm_l1tag_ctrl_r() (0x005043f0U)
#define gr_gpcs_pri_tpcs_sm_sch_macro_sched_r() (0x00419b48U)
#define gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_m()\
(U32(0x1U) << 20U)
#define gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_enabled_f()\
(0x100000U)
#define gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_disabled_f()\
(0x0U)
#endif

View File

@@ -1247,4 +1247,11 @@
#define gr_gpcs_tpcs_sm_l1tag_ctrl_surface_cut_collector_enable_f()\
(0x20000000U)
#define gr_gpc0_tpc0_sm_l1tag_ctrl_r() (0x005043f0U)
#define gr_gpcs_pri_tpcs_sm_sch_macro_sched_r() (0x00419b48U)
#define gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_m()\
(U32(0x1U) << 20U)
#define gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_enabled_f()\
(0x100000U)
#define gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_disabled_f()\
(0x0U)
#endif

View File

@@ -210,6 +210,9 @@ struct nvgpu_tsg {
/** MMU debug mode enabled if mmu_debug_mode_refcnt > 0 */
u32 mmu_debug_mode_refcnt;
/** ERRBAR enabled if sched_exit_wait_for_errbar_refcnt > 0 */
nvgpu_atomic_t sched_exit_wait_for_errbar_refcnt;
/**
* Pointer to store SM errors read from h/w registers.
* Check #nvgpu_tsg_sm_error_state.
@@ -754,5 +757,6 @@ void nvgpu_tsg_reset_faulted_eng_pbdma(struct gk20a *g, struct nvgpu_tsg *tsg,
bool eng, bool pbdma);
#ifdef CONFIG_NVGPU_DEBUGGER
int nvgpu_tsg_set_mmu_debug_mode(struct nvgpu_channel *ch, bool enable);
int nvgpu_tsg_set_sched_exit_wait_for_errbar(struct nvgpu_channel *ch, bool enable);
#endif
#endif /* NVGPU_TSG_H */

View File

@@ -309,6 +309,8 @@ static struct nvgpu_flags_mapping flags_mapping[] = {
NVGPU_SUPPORT_NVS},
{NVGPU_GPU_FLAGS_SUPPORT_NVS_SCHED_CTRL_FIFO,
NVGPU_SUPPORT_NVS_CTRL_FIFO},
{NVGPU_GPU_FLAGS_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED,
NVGPU_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED},
};
static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g)

View File

@@ -1235,6 +1235,51 @@ clean_up:
return err;
}
static int nvgpu_dbg_gpu_set_sched_wait_for_errbar(
struct dbg_session_gk20a *dbg_s,
struct nvgpu_sched_exit_wait_for_errbar_args *args)
{
int err;
struct gk20a *g = dbg_s->g;
struct nvgpu_channel *ch;
bool enable = (args->enable == NVGPU_DBG_GPU_SCHED_EXIT_WAIT_FOR_ERRBAR_ENABLED);
u32 gr_instance_id =
nvgpu_grmgr_get_gr_instance_id(g, dbg_s->gpu_instance_id);
nvgpu_log_fn(g, "enable=%u", args->enable);
if (g->ops.gr.set_sched_wait_for_errbar == NULL) {
return -ENOSYS;
}
err = gk20a_busy(g);
if (err) {
nvgpu_err(g, "failed to poweron, err=%d", err);
return err;
}
/* Take the global lock, since we'll be doing global regops */
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
if (ch == NULL) {
nvgpu_err(g, "no bound channel for mmu debug mode");
err = -EINVAL;
goto clean_up;
}
err = nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id,
nvgpu_tsg_set_sched_exit_wait_for_errbar(ch, enable));
if (err) {
nvgpu_err(g, "set mmu debug mode failed, err=%d", err);
}
clean_up:
nvgpu_mutex_release(&g->dbg_sessions_lock);
gk20a_idle(g);
return err;
}
static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args)
@@ -2931,6 +2976,11 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
(struct nvgpu_dbg_gpu_va_access_args *)buf);
break;
case NVGPU_DBG_GPU_IOCTL_SET_SCHED_EXIT_WAIT_FOR_ERRBAR:
err = nvgpu_dbg_gpu_set_sched_wait_for_errbar(dbg_s,
(struct nvgpu_sched_exit_wait_for_errbar_args *)buf);
break;
default:
nvgpu_err(g,
"unrecognized dbg gpu ioctl cmd: 0x%x",

View File

@@ -203,6 +203,8 @@ struct nvgpu_gpu_zbc_query_table_args {
#define NVGPU_GPU_FLAGS_SUPPORT_NVS (1ULL << 53)
/* The NVS control fifo interface is usable */
#define NVGPU_GPU_FLAGS_SUPPORT_NVS_SCHED_CTRL_FIFO (1ULL << 54)
/* Flag to indicate whether implicit ERRBAR is supported */
#define NVGPU_GPU_FLAGS_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED (1ULL << 55)
/* SM LRF ECC is enabled */
#define NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF (1ULL << 60)
/* SM SHM ECC is enabled */

View File

@@ -621,8 +621,20 @@ struct nvgpu_dbg_gpu_va_access_args {
#define NVGPU_DBG_GPU_IOCTL_ACCESS_GPU_VA \
_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 32, struct nvgpu_dbg_gpu_va_access_args)
/* Implicit ERRBAR Mode */
#define NVGPU_DBG_GPU_SCHED_EXIT_WAIT_FOR_ERRBAR_DISABLED 0
#define NVGPU_DBG_GPU_SCHED_EXIT_WAIT_FOR_ERRBAR_ENABLED 1
struct nvgpu_sched_exit_wait_for_errbar_args {
__u32 enable; /* enable 1, disable 0*/
};
#define NVGPU_DBG_GPU_IOCTL_SET_SCHED_EXIT_WAIT_FOR_ERRBAR \
_IOW(NVGPU_DBG_GPU_IOCTL_MAGIC, 33, \
struct nvgpu_sched_exit_wait_for_errbar_args)
#define NVGPU_DBG_GPU_IOCTL_LAST \
_IOC_NR(NVGPU_DBG_GPU_IOCTL_ACCESS_GPU_VA)
_IOC_NR(NVGPU_DBG_GPU_IOCTL_SET_SCHED_EXIT_WAIT_FOR_ERRBAR)
#define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \
sizeof(struct nvgpu_dbg_gpu_access_fb_memory_args)