gpu: nvgpu: add ioctl to configure implicit ERRBAR

Add ioctl support to configure implicit ERRBAR by setting/unsetting NV_PGRAPH_PRI_GPCS_TPCS_SM_SCH_MACRO_SCHED register. Add gpu characteritics flag: NVGPU_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED to allow userspace driver to determine if implicit ERRBAR ioctl is supported. Bug: 200782861 Change-Id: I530a4cf73bc5c844e8d73094d3e23949568fe335 Signed-off-by: atanand <atanand@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2718672 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: Sagar Kamble <skamble@nvidia.com> Reviewed-by: Vaibhav Kachore <vkachore@nvidia.com> GVS: Gerrit_Virtual_Submit
2025-12-22 09:12:24 +03:00 · 2022-05-26 09:34:15 +00:00
parent faf18009cb
commit eae4593343
15 changed files with 181 additions and 2 deletions
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -27,6 +27,7 @@
 #include <nvgpu/os_sched.h>
 #include <nvgpu/channel.h>
 #include <nvgpu/tsg.h>
+#include <nvgpu/atomic.h>
 #include <nvgpu/rc.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/error_notifier.h>
@@ -273,6 +274,14 @@ static int nvgpu_tsg_unbind_channel_common(struct nvgpu_tsg *tsg,
 			break;
 		}
 	}
+	while (nvgpu_atomic_read(&ch->sched_exit_wait_for_errbar_refcnt) > 0) {
+		err = nvgpu_tsg_set_sched_exit_wait_for_errbar(ch, false);
+		if (err != 0) {
+			nvgpu_err(g, "disable implicit ERRBAR failed ch:%u",
+				ch->chid);
+			break;
+		}
+	}
 #endif

 	/* Remove channel from TSG and re-enable rest of the channels */
@@ -377,6 +386,14 @@ fail:
 			break;
 		}
 	}
+	while (nvgpu_atomic_read(&ch->sched_exit_wait_for_errbar_refcnt) > 0) {
+		err = nvgpu_tsg_set_sched_exit_wait_for_errbar(ch, false);
+		if (err != 0) {
+			nvgpu_err(g, "disable implicit ERRBAR failed ch:%u",
+				ch->chid);
+			break;
+		}
+	}
 #endif

 	nvgpu_rwsem_down_write(&tsg->ch_list_lock);
@@ -1214,4 +1231,46 @@ int nvgpu_tsg_set_mmu_debug_mode(struct nvgpu_channel *ch, bool enable)

 	return err;
 }
+
+int nvgpu_tsg_set_sched_exit_wait_for_errbar(struct nvgpu_channel *ch, bool enable)
+{
+	struct gk20a *g;
+	int err = 0;
+	struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch);
+
+	if (tsg == NULL) {
+		return -EINVAL;
+	}
+	g = ch->g;
+
+	if (g->ops.gr.set_sched_wait_for_errbar == NULL) {
+		return -ENOSYS;
+	}
+
+	if (enable) {
+		nvgpu_atomic_inc(&ch->sched_exit_wait_for_errbar_refcnt);
+		nvgpu_atomic_inc(&tsg->sched_exit_wait_for_errbar_refcnt);
+	} else {
+		if (nvgpu_atomic_read(&ch->sched_exit_wait_for_errbar_refcnt) != 0) {
+			nvgpu_atomic_dec(&ch->sched_exit_wait_for_errbar_refcnt);
+		}
+
+		if (nvgpu_atomic_read(&tsg->sched_exit_wait_for_errbar_refcnt) != 0) {
+			nvgpu_atomic_dec(&tsg->sched_exit_wait_for_errbar_refcnt);
+		}
+	}
+
+	/*
+	 * enable GPC implict ERRBAR if it was requested for at
+	 * least one channel in the TSG
+	 */
+	err = g->ops.gr.set_sched_wait_for_errbar(g, ch,
+			nvgpu_atomic_read(&tsg->sched_exit_wait_for_errbar_refcnt) > 0);
+	if (err != 0) {
+		nvgpu_err(g, "set implicit ERRBAR failed, err=%d", err);
+		return err;
+	}
+
+	return err;
+}
 #endif
--- a/drivers/gpu/nvgpu/hal/gr/gr/gr_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_ga10b.c
@@ -1112,6 +1112,28 @@ const u32 *ga10b_gr_get_hwpm_cau_init_data(u32 *count)
 	return hwpm_cau_init_data;
 }

+int ga10b_gr_set_sched_wait_for_errbar(struct gk20a *g,
+	struct nvgpu_channel *ch, bool enable)
+{
+	struct nvgpu_dbg_reg_op ctx_ops = {
+		.op = REGOP(WRITE_32),
+		.type = REGOP(TYPE_GR_CTX),
+		.offset = gr_gpcs_pri_tpcs_sm_sch_macro_sched_r(),
+		.value_lo = enable ?
+		gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_enabled_f() :
+		gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_disabled_f(),
+	};
+	int err;
+	struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch);
+	u32 flags = NVGPU_REG_OP_FLAG_MODE_ALL_OR_NONE;
+
+	err = gr_gk20a_exec_ctx_ops(tsg, &ctx_ops, 1, 1, 0, &flags);
+	if (err != 0) {
+		nvgpu_err(g, "update implicit ERRBAR failed");
+	}
+	return err;
+}
+
 #endif /* CONFIG_NVGPU_DEBUGGER */

 #ifdef CONFIG_NVGPU_HAL_NON_FUSA
--- a/drivers/gpu/nvgpu/hal/gr/gr/gr_ga10b.h
+++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_ga10b.h
@@ -67,5 +67,7 @@ int gr_ga10b_find_priv_offset_in_buffer(struct gk20a *g, u32 addr,
 					u32 context_buffer_size,
 					u32 *priv_offset);
 const u32 *ga10b_gr_get_hwpm_cau_init_data(u32 *count);
+int ga10b_gr_set_sched_wait_for_errbar(struct gk20a *g,
+					struct nvgpu_channel *ch, bool enable);
 #endif /* CONFIG_NVGPU_DEBUGGER */
 #endif /* NVGPU_GR_GA10B_H */
--- a/drivers/gpu/nvgpu/hal/init/hal_ga100.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_ga100.c
@@ -859,6 +859,7 @@ static const struct gops_gr ga100_ops_gr = {
 		gr_ga100_process_context_buffer_priv_segment,
 	.set_debug_mode = gm20b_gr_set_debug_mode,
 	.set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode,
+	.set_sched_wait_for_errbar = ga10b_gr_set_sched_wait_for_errbar,
 	.esr_bpt_pending_events = gv11b_gr_esr_bpt_pending_events,
 	.get_ctx_buffer_offsets = gr_gk20a_get_ctx_buffer_offsets,
 	.get_pm_ctx_buffer_offsets = gr_gk20a_get_pm_ctx_buffer_offsets,
@@ -1903,6 +1904,7 @@ int ga100_init_hal(struct gk20a *g)
 #endif
 #ifdef CONFIG_NVGPU_DEBUGGER
 	nvgpu_set_enabled(g, NVGPU_L2_MAX_WAYS_EVICT_LAST_ENABLED, true);
+	nvgpu_set_enabled(g, NVGPU_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED, true);
 #endif
 	/*
 	 * Tu104 has multiple async-LCE (3), GRCE (2) and PCE (4).
--- a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
@@ -867,6 +867,7 @@ static const struct gops_gr ga10b_ops_gr = {
 		gr_ga10b_process_context_buffer_priv_segment,
 	.set_debug_mode = gm20b_gr_set_debug_mode,
 	.set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode,
+	.set_sched_wait_for_errbar = ga10b_gr_set_sched_wait_for_errbar,
 	.esr_bpt_pending_events = gv11b_gr_esr_bpt_pending_events,
 	.get_ctx_buffer_offsets = gr_gk20a_get_ctx_buffer_offsets,
 	.get_pm_ctx_buffer_offsets = gr_gk20a_get_pm_ctx_buffer_offsets,
@@ -1932,6 +1933,7 @@ int ga10b_init_hal(struct gk20a *g)
 #endif
 #ifdef CONFIG_NVGPU_DEBUGGER
 	nvgpu_set_enabled(g, NVGPU_L2_MAX_WAYS_EVICT_LAST_ENABLED, true);
+	nvgpu_set_enabled(g, NVGPU_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED, true);
 #endif

 	if (g->ops.pmu.is_pmu_supported(g)) {
--- a/drivers/gpu/nvgpu/include/nvgpu/channel.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h
@@ -548,6 +548,10 @@ struct nvgpu_channel {
 	 * MMU Debugger Mode is enabled for this channel if refcnt > 0
 	 */
 	u32 mmu_debug_mode_refcnt;
+	/**
+	 * ERRBAR is enabled for this channel if refcnt > 0
+	 */
+	nvgpu_atomic_t sched_exit_wait_for_errbar_refcnt;
 #endif
 };

--- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
@@ -228,6 +228,8 @@ struct gk20a;
 			"Emulate mode support"), \
 	DEFINE_FLAG(NVGPU_SUPPORT_PES_FS, \
 			"PES Floorsweeping"), \
+	DEFINE_FLAG(NVGPU_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED,               \
+			"Implicit ERRBAR support"),		\
 	DEFINE_FLAG(NVGPU_MAX_ENABLED_BITS, "Marks max number of flags"),

 /**
--- a/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h
@@ -1367,6 +1367,8 @@ struct gops_gr {
 					     u32 num_ppcs, u32 ppc_mask,
 					     u32 *priv_offset);
 	void (*set_debug_mode)(struct gk20a *g, bool enable);
+	int (*set_sched_wait_for_errbar)(struct gk20a *g,
+					struct nvgpu_channel *ch, bool enable);
 	int (*set_mmu_debug_mode)(struct gk20a *g,
 				  struct nvgpu_channel *ch, bool enable);
 	bool (*esr_bpt_pending_events)(u32 global_esr,
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/ga100/hw_gr_ga100.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/ga100/hw_gr_ga100.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -1258,4 +1258,11 @@
 #define gr_gpcs_tpcs_sm_l1tag_ctrl_surface_cut_collector_enable_f()\
 				(0x20000000U)
 #define gr_gpc0_tpc0_sm_l1tag_ctrl_r()                             (0x005043f0U)
+#define gr_gpcs_pri_tpcs_sm_sch_macro_sched_r()                    (0x00419b48U)
+#define gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_m()\
+				(U32(0x1U) << 20U)
+#define gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_enabled_f()\
+				(0x100000U)
+#define gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_disabled_f()\
+				(0x0U)
 #endif
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_gr_ga10b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_gr_ga10b.h
@@ -1247,4 +1247,11 @@
 #define gr_gpcs_tpcs_sm_l1tag_ctrl_surface_cut_collector_enable_f()\
 				(0x20000000U)
 #define gr_gpc0_tpc0_sm_l1tag_ctrl_r()                             (0x005043f0U)
+#define gr_gpcs_pri_tpcs_sm_sch_macro_sched_r()                    (0x00419b48U)
+#define gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_m()\
+				(U32(0x1U) << 20U)
+#define gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_enabled_f()\
+				(0x100000U)
+#define gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_disabled_f()\
+				(0x0U)
 #endif
--- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h
@@ -210,6 +210,9 @@ struct nvgpu_tsg {
 	/** MMU debug mode enabled if mmu_debug_mode_refcnt > 0 */
 	u32  mmu_debug_mode_refcnt;

+	/** ERRBAR enabled if sched_exit_wait_for_errbar_refcnt > 0 */
+	nvgpu_atomic_t sched_exit_wait_for_errbar_refcnt;
+
 	/**
 	 * Pointer to store SM errors read from h/w registers.
 	 * Check #nvgpu_tsg_sm_error_state.
@@ -754,5 +757,6 @@ void nvgpu_tsg_reset_faulted_eng_pbdma(struct gk20a *g, struct nvgpu_tsg *tsg,
 		bool eng, bool pbdma);
 #ifdef CONFIG_NVGPU_DEBUGGER
 int nvgpu_tsg_set_mmu_debug_mode(struct nvgpu_channel *ch, bool enable);
+int nvgpu_tsg_set_sched_exit_wait_for_errbar(struct nvgpu_channel *ch, bool enable);
 #endif
 #endif /* NVGPU_TSG_H */
--- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
@@ -309,6 +309,8 @@ static struct nvgpu_flags_mapping flags_mapping[] = {
 		NVGPU_SUPPORT_NVS},
 	{NVGPU_GPU_FLAGS_SUPPORT_NVS_SCHED_CTRL_FIFO,
 		NVGPU_SUPPORT_NVS_CTRL_FIFO},
+	{NVGPU_GPU_FLAGS_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED,
+		NVGPU_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED},
 };

 static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g)
--- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
@@ -1235,6 +1235,51 @@ clean_up:
 	return err;
 }

+static int nvgpu_dbg_gpu_set_sched_wait_for_errbar(
+	struct dbg_session_gk20a *dbg_s,
+	struct nvgpu_sched_exit_wait_for_errbar_args *args)
+{
+	int err;
+	struct gk20a *g = dbg_s->g;
+	struct nvgpu_channel *ch;
+	bool enable = (args->enable == NVGPU_DBG_GPU_SCHED_EXIT_WAIT_FOR_ERRBAR_ENABLED);
+	u32 gr_instance_id =
+		nvgpu_grmgr_get_gr_instance_id(g, dbg_s->gpu_instance_id);
+
+	nvgpu_log_fn(g, "enable=%u", args->enable);
+
+	if (g->ops.gr.set_sched_wait_for_errbar == NULL) {
+		return -ENOSYS;
+	}
+
+	err = gk20a_busy(g);
+	if (err) {
+		nvgpu_err(g, "failed to poweron, err=%d", err);
+		return err;
+	}
+
+	/* Take the global lock, since we'll be doing global regops */
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+
+	ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
+	if (ch == NULL) {
+		nvgpu_err(g, "no bound channel for mmu debug mode");
+		err = -EINVAL;
+		goto clean_up;
+	}
+
+	err = nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id,
+					nvgpu_tsg_set_sched_exit_wait_for_errbar(ch, enable));
+	if (err) {
+		nvgpu_err(g, "set mmu debug mode failed, err=%d", err);
+	}
+
+clean_up:
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+	gk20a_idle(g);
+	return err;
+}
+
 static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
 		struct dbg_session_gk20a *dbg_s,
 		struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args)
@@ -2931,6 +2976,11 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
 			(struct nvgpu_dbg_gpu_va_access_args *)buf);
 		break;

+	case NVGPU_DBG_GPU_IOCTL_SET_SCHED_EXIT_WAIT_FOR_ERRBAR:
+		err = nvgpu_dbg_gpu_set_sched_wait_for_errbar(dbg_s,
+			(struct nvgpu_sched_exit_wait_for_errbar_args *)buf);
+		break;
+
 	default:
 		nvgpu_err(g,
 			   "unrecognized dbg gpu ioctl cmd: 0x%x",
--- a/include/uapi/linux/nvgpu-ctrl.h
+++ b/include/uapi/linux/nvgpu-ctrl.h
@@ -203,6 +203,8 @@ struct nvgpu_gpu_zbc_query_table_args {
 #define NVGPU_GPU_FLAGS_SUPPORT_NVS		(1ULL << 53)
 /* The NVS control fifo interface is usable */
 #define NVGPU_GPU_FLAGS_SUPPORT_NVS_SCHED_CTRL_FIFO		(1ULL << 54)
+/* Flag to indicate whether implicit ERRBAR is supported */
+#define NVGPU_GPU_FLAGS_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED    (1ULL << 55)
 /* SM LRF ECC is enabled */
 #define NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF	(1ULL << 60)
 /* SM SHM ECC is enabled */
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -621,8 +621,20 @@ struct nvgpu_dbg_gpu_va_access_args {
 #define NVGPU_DBG_GPU_IOCTL_ACCESS_GPU_VA \
 	_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 32, struct nvgpu_dbg_gpu_va_access_args)

+/* Implicit ERRBAR Mode */
+#define NVGPU_DBG_GPU_SCHED_EXIT_WAIT_FOR_ERRBAR_DISABLED	0
+#define NVGPU_DBG_GPU_SCHED_EXIT_WAIT_FOR_ERRBAR_ENABLED	1
+
+struct nvgpu_sched_exit_wait_for_errbar_args {
+	__u32 enable; /* enable 1, disable 0*/
+};
+
+#define NVGPU_DBG_GPU_IOCTL_SET_SCHED_EXIT_WAIT_FOR_ERRBAR \
+	_IOW(NVGPU_DBG_GPU_IOCTL_MAGIC, 33, \
+	struct nvgpu_sched_exit_wait_for_errbar_args)
+
 #define NVGPU_DBG_GPU_IOCTL_LAST		\
-	_IOC_NR(NVGPU_DBG_GPU_IOCTL_ACCESS_GPU_VA)
+	_IOC_NR(NVGPU_DBG_GPU_IOCTL_SET_SCHED_EXIT_WAIT_FOR_ERRBAR)

 #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE		\
 	sizeof(struct nvgpu_dbg_gpu_access_fb_memory_args)