From eae459334320199374277de78f53b48a5bf979e6 Mon Sep 17 00:00:00 2001 From: atanand Date: Thu, 26 May 2022 09:34:15 +0000 Subject: [PATCH] gpu: nvgpu: add ioctl to configure implicit ERRBAR Add ioctl support to configure implicit ERRBAR by setting/unsetting NV_PGRAPH_PRI_GPCS_TPCS_SM_SCH_MACRO_SCHED register. Add gpu characteritics flag: NVGPU_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED to allow userspace driver to determine if implicit ERRBAR ioctl is supported. Bug: 200782861 Change-Id: I530a4cf73bc5c844e8d73094d3e23949568fe335 Signed-off-by: atanand Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2718672 Reviewed-by: svc-mobile-coverity Reviewed-by: svc-mobile-cert Reviewed-by: Sagar Kamble Reviewed-by: Vaibhav Kachore GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/common/fifo/tsg.c | 59 +++++++++++++++++++ drivers/gpu/nvgpu/hal/gr/gr/gr_ga10b.c | 22 +++++++ drivers/gpu/nvgpu/hal/gr/gr/gr_ga10b.h | 2 + drivers/gpu/nvgpu/hal/init/hal_ga100.c | 2 + drivers/gpu/nvgpu/hal/init/hal_ga10b.c | 2 + drivers/gpu/nvgpu/include/nvgpu/channel.h | 4 ++ drivers/gpu/nvgpu/include/nvgpu/enabled.h | 2 + drivers/gpu/nvgpu/include/nvgpu/gops/gr.h | 2 + .../include/nvgpu/hw/ga100/hw_gr_ga100.h | 9 ++- .../include/nvgpu/hw/ga10b/hw_gr_ga10b.h | 7 +++ drivers/gpu/nvgpu/include/nvgpu/tsg.h | 4 ++ drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c | 2 + drivers/gpu/nvgpu/os/linux/ioctl_dbg.c | 50 ++++++++++++++++ include/uapi/linux/nvgpu-ctrl.h | 2 + include/uapi/linux/nvgpu.h | 14 ++++- 15 files changed, 181 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c index 9b15ba78f..09e1cedb7 100644 --- a/drivers/gpu/nvgpu/common/fifo/tsg.c +++ b/drivers/gpu/nvgpu/common/fifo/tsg.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -273,6 +274,14 @@ static int nvgpu_tsg_unbind_channel_common(struct nvgpu_tsg *tsg, break; } } + while (nvgpu_atomic_read(&ch->sched_exit_wait_for_errbar_refcnt) > 0) { + err = nvgpu_tsg_set_sched_exit_wait_for_errbar(ch, false); + if (err != 0) { + nvgpu_err(g, "disable implicit ERRBAR failed ch:%u", + ch->chid); + break; + } + } #endif /* Remove channel from TSG and re-enable rest of the channels */ @@ -377,6 +386,14 @@ fail: break; } } + while (nvgpu_atomic_read(&ch->sched_exit_wait_for_errbar_refcnt) > 0) { + err = nvgpu_tsg_set_sched_exit_wait_for_errbar(ch, false); + if (err != 0) { + nvgpu_err(g, "disable implicit ERRBAR failed ch:%u", + ch->chid); + break; + } + } #endif nvgpu_rwsem_down_write(&tsg->ch_list_lock); @@ -1214,4 +1231,46 @@ int nvgpu_tsg_set_mmu_debug_mode(struct nvgpu_channel *ch, bool enable) return err; } + +int nvgpu_tsg_set_sched_exit_wait_for_errbar(struct nvgpu_channel *ch, bool enable) +{ + struct gk20a *g; + int err = 0; + struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch); + + if (tsg == NULL) { + return -EINVAL; + } + g = ch->g; + + if (g->ops.gr.set_sched_wait_for_errbar == NULL) { + return -ENOSYS; + } + + if (enable) { + nvgpu_atomic_inc(&ch->sched_exit_wait_for_errbar_refcnt); + nvgpu_atomic_inc(&tsg->sched_exit_wait_for_errbar_refcnt); + } else { + if (nvgpu_atomic_read(&ch->sched_exit_wait_for_errbar_refcnt) != 0) { + nvgpu_atomic_dec(&ch->sched_exit_wait_for_errbar_refcnt); + } + + if (nvgpu_atomic_read(&tsg->sched_exit_wait_for_errbar_refcnt) != 0) { + nvgpu_atomic_dec(&tsg->sched_exit_wait_for_errbar_refcnt); + } + } + + /* + * enable GPC implict ERRBAR if it was requested for at + * least one channel in the TSG + */ + err = g->ops.gr.set_sched_wait_for_errbar(g, ch, + nvgpu_atomic_read(&tsg->sched_exit_wait_for_errbar_refcnt) > 0); + if (err != 0) { + nvgpu_err(g, "set implicit ERRBAR failed, err=%d", err); + return err; + } + + return err; +} #endif diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_ga10b.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_ga10b.c index 7a250fda3..e15b88446 100644 --- a/drivers/gpu/nvgpu/hal/gr/gr/gr_ga10b.c +++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_ga10b.c @@ -1112,6 +1112,28 @@ const u32 *ga10b_gr_get_hwpm_cau_init_data(u32 *count) return hwpm_cau_init_data; } +int ga10b_gr_set_sched_wait_for_errbar(struct gk20a *g, + struct nvgpu_channel *ch, bool enable) +{ + struct nvgpu_dbg_reg_op ctx_ops = { + .op = REGOP(WRITE_32), + .type = REGOP(TYPE_GR_CTX), + .offset = gr_gpcs_pri_tpcs_sm_sch_macro_sched_r(), + .value_lo = enable ? + gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_enabled_f() : + gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_disabled_f(), + }; + int err; + struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch); + u32 flags = NVGPU_REG_OP_FLAG_MODE_ALL_OR_NONE; + + err = gr_gk20a_exec_ctx_ops(tsg, &ctx_ops, 1, 1, 0, &flags); + if (err != 0) { + nvgpu_err(g, "update implicit ERRBAR failed"); + } + return err; +} + #endif /* CONFIG_NVGPU_DEBUGGER */ #ifdef CONFIG_NVGPU_HAL_NON_FUSA diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_ga10b.h b/drivers/gpu/nvgpu/hal/gr/gr/gr_ga10b.h index cf99e7f16..7a47af17d 100644 --- a/drivers/gpu/nvgpu/hal/gr/gr/gr_ga10b.h +++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_ga10b.h @@ -67,5 +67,7 @@ int gr_ga10b_find_priv_offset_in_buffer(struct gk20a *g, u32 addr, u32 context_buffer_size, u32 *priv_offset); const u32 *ga10b_gr_get_hwpm_cau_init_data(u32 *count); +int ga10b_gr_set_sched_wait_for_errbar(struct gk20a *g, + struct nvgpu_channel *ch, bool enable); #endif /* CONFIG_NVGPU_DEBUGGER */ #endif /* NVGPU_GR_GA10B_H */ diff --git a/drivers/gpu/nvgpu/hal/init/hal_ga100.c b/drivers/gpu/nvgpu/hal/init/hal_ga100.c index dc404e509..a71a64b4f 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_ga100.c +++ b/drivers/gpu/nvgpu/hal/init/hal_ga100.c @@ -859,6 +859,7 @@ static const struct gops_gr ga100_ops_gr = { gr_ga100_process_context_buffer_priv_segment, .set_debug_mode = gm20b_gr_set_debug_mode, .set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode, + .set_sched_wait_for_errbar = ga10b_gr_set_sched_wait_for_errbar, .esr_bpt_pending_events = gv11b_gr_esr_bpt_pending_events, .get_ctx_buffer_offsets = gr_gk20a_get_ctx_buffer_offsets, .get_pm_ctx_buffer_offsets = gr_gk20a_get_pm_ctx_buffer_offsets, @@ -1903,6 +1904,7 @@ int ga100_init_hal(struct gk20a *g) #endif #ifdef CONFIG_NVGPU_DEBUGGER nvgpu_set_enabled(g, NVGPU_L2_MAX_WAYS_EVICT_LAST_ENABLED, true); + nvgpu_set_enabled(g, NVGPU_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED, true); #endif /* * Tu104 has multiple async-LCE (3), GRCE (2) and PCE (4). diff --git a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c index ab0cb4c01..9a4fe4699 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c @@ -867,6 +867,7 @@ static const struct gops_gr ga10b_ops_gr = { gr_ga10b_process_context_buffer_priv_segment, .set_debug_mode = gm20b_gr_set_debug_mode, .set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode, + .set_sched_wait_for_errbar = ga10b_gr_set_sched_wait_for_errbar, .esr_bpt_pending_events = gv11b_gr_esr_bpt_pending_events, .get_ctx_buffer_offsets = gr_gk20a_get_ctx_buffer_offsets, .get_pm_ctx_buffer_offsets = gr_gk20a_get_pm_ctx_buffer_offsets, @@ -1932,6 +1933,7 @@ int ga10b_init_hal(struct gk20a *g) #endif #ifdef CONFIG_NVGPU_DEBUGGER nvgpu_set_enabled(g, NVGPU_L2_MAX_WAYS_EVICT_LAST_ENABLED, true); + nvgpu_set_enabled(g, NVGPU_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED, true); #endif if (g->ops.pmu.is_pmu_supported(g)) { diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h index 875b039d2..4cb7927e8 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h @@ -548,6 +548,10 @@ struct nvgpu_channel { * MMU Debugger Mode is enabled for this channel if refcnt > 0 */ u32 mmu_debug_mode_refcnt; + /** + * ERRBAR is enabled for this channel if refcnt > 0 + */ + nvgpu_atomic_t sched_exit_wait_for_errbar_refcnt; #endif }; diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h index e3bffe95e..155d41067 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h +++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h @@ -228,6 +228,8 @@ struct gk20a; "Emulate mode support"), \ DEFINE_FLAG(NVGPU_SUPPORT_PES_FS, \ "PES Floorsweeping"), \ + DEFINE_FLAG(NVGPU_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED, \ + "Implicit ERRBAR support"), \ DEFINE_FLAG(NVGPU_MAX_ENABLED_BITS, "Marks max number of flags"), /** diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h b/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h index 55d540a98..3f538f9b6 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h @@ -1367,6 +1367,8 @@ struct gops_gr { u32 num_ppcs, u32 ppc_mask, u32 *priv_offset); void (*set_debug_mode)(struct gk20a *g, bool enable); + int (*set_sched_wait_for_errbar)(struct gk20a *g, + struct nvgpu_channel *ch, bool enable); int (*set_mmu_debug_mode)(struct gk20a *g, struct nvgpu_channel *ch, bool enable); bool (*esr_bpt_pending_events)(u32 global_esr, diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/ga100/hw_gr_ga100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/ga100/hw_gr_ga100.h index ce7171e21..9e13acd05 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/ga100/hw_gr_ga100.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/ga100/hw_gr_ga100.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -1258,4 +1258,11 @@ #define gr_gpcs_tpcs_sm_l1tag_ctrl_surface_cut_collector_enable_f()\ (0x20000000U) #define gr_gpc0_tpc0_sm_l1tag_ctrl_r() (0x005043f0U) +#define gr_gpcs_pri_tpcs_sm_sch_macro_sched_r() (0x00419b48U) +#define gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_m()\ + (U32(0x1U) << 20U) +#define gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_enabled_f()\ + (0x100000U) +#define gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_disabled_f()\ + (0x0U) #endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_gr_ga10b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_gr_ga10b.h index 1739f5248..b6d9c5e71 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_gr_ga10b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_gr_ga10b.h @@ -1247,4 +1247,11 @@ #define gr_gpcs_tpcs_sm_l1tag_ctrl_surface_cut_collector_enable_f()\ (0x20000000U) #define gr_gpc0_tpc0_sm_l1tag_ctrl_r() (0x005043f0U) +#define gr_gpcs_pri_tpcs_sm_sch_macro_sched_r() (0x00419b48U) +#define gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_m()\ + (U32(0x1U) << 20U) +#define gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_enabled_f()\ + (0x100000U) +#define gr_gpcs_pri_tpcs_sm_sch_macro_sched_exit_wait_for_errbar_disabled_f()\ + (0x0U) #endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/tsg.h b/drivers/gpu/nvgpu/include/nvgpu/tsg.h index 345b64f3e..8f8a6edf8 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h +++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h @@ -210,6 +210,9 @@ struct nvgpu_tsg { /** MMU debug mode enabled if mmu_debug_mode_refcnt > 0 */ u32 mmu_debug_mode_refcnt; + /** ERRBAR enabled if sched_exit_wait_for_errbar_refcnt > 0 */ + nvgpu_atomic_t sched_exit_wait_for_errbar_refcnt; + /** * Pointer to store SM errors read from h/w registers. * Check #nvgpu_tsg_sm_error_state. @@ -754,5 +757,6 @@ void nvgpu_tsg_reset_faulted_eng_pbdma(struct gk20a *g, struct nvgpu_tsg *tsg, bool eng, bool pbdma); #ifdef CONFIG_NVGPU_DEBUGGER int nvgpu_tsg_set_mmu_debug_mode(struct nvgpu_channel *ch, bool enable); +int nvgpu_tsg_set_sched_exit_wait_for_errbar(struct nvgpu_channel *ch, bool enable); #endif #endif /* NVGPU_TSG_H */ diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c index e1cb01ddd..4a8fc0359 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c @@ -309,6 +309,8 @@ static struct nvgpu_flags_mapping flags_mapping[] = { NVGPU_SUPPORT_NVS}, {NVGPU_GPU_FLAGS_SUPPORT_NVS_SCHED_CTRL_FIFO, NVGPU_SUPPORT_NVS_CTRL_FIFO}, + {NVGPU_GPU_FLAGS_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED, + NVGPU_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED}, }; static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c index 06679747c..5f1e400ad 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c @@ -1235,6 +1235,51 @@ clean_up: return err; } +static int nvgpu_dbg_gpu_set_sched_wait_for_errbar( + struct dbg_session_gk20a *dbg_s, + struct nvgpu_sched_exit_wait_for_errbar_args *args) +{ + int err; + struct gk20a *g = dbg_s->g; + struct nvgpu_channel *ch; + bool enable = (args->enable == NVGPU_DBG_GPU_SCHED_EXIT_WAIT_FOR_ERRBAR_ENABLED); + u32 gr_instance_id = + nvgpu_grmgr_get_gr_instance_id(g, dbg_s->gpu_instance_id); + + nvgpu_log_fn(g, "enable=%u", args->enable); + + if (g->ops.gr.set_sched_wait_for_errbar == NULL) { + return -ENOSYS; + } + + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, "failed to poweron, err=%d", err); + return err; + } + + /* Take the global lock, since we'll be doing global regops */ + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + + ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); + if (ch == NULL) { + nvgpu_err(g, "no bound channel for mmu debug mode"); + err = -EINVAL; + goto clean_up; + } + + err = nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id, + nvgpu_tsg_set_sched_exit_wait_for_errbar(ch, enable)); + if (err) { + nvgpu_err(g, "set mmu debug mode failed, err=%d", err); + } + +clean_up: + nvgpu_mutex_release(&g->dbg_sessions_lock); + gk20a_idle(g); + return err; +} + static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args) @@ -2931,6 +2976,11 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, (struct nvgpu_dbg_gpu_va_access_args *)buf); break; + case NVGPU_DBG_GPU_IOCTL_SET_SCHED_EXIT_WAIT_FOR_ERRBAR: + err = nvgpu_dbg_gpu_set_sched_wait_for_errbar(dbg_s, + (struct nvgpu_sched_exit_wait_for_errbar_args *)buf); + break; + default: nvgpu_err(g, "unrecognized dbg gpu ioctl cmd: 0x%x", diff --git a/include/uapi/linux/nvgpu-ctrl.h b/include/uapi/linux/nvgpu-ctrl.h index 675e2fc0d..be649cd24 100644 --- a/include/uapi/linux/nvgpu-ctrl.h +++ b/include/uapi/linux/nvgpu-ctrl.h @@ -203,6 +203,8 @@ struct nvgpu_gpu_zbc_query_table_args { #define NVGPU_GPU_FLAGS_SUPPORT_NVS (1ULL << 53) /* The NVS control fifo interface is usable */ #define NVGPU_GPU_FLAGS_SUPPORT_NVS_SCHED_CTRL_FIFO (1ULL << 54) +/* Flag to indicate whether implicit ERRBAR is supported */ +#define NVGPU_GPU_FLAGS_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED (1ULL << 55) /* SM LRF ECC is enabled */ #define NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF (1ULL << 60) /* SM SHM ECC is enabled */ diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 56ebbd9af..6650d37e5 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -621,8 +621,20 @@ struct nvgpu_dbg_gpu_va_access_args { #define NVGPU_DBG_GPU_IOCTL_ACCESS_GPU_VA \ _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 32, struct nvgpu_dbg_gpu_va_access_args) +/* Implicit ERRBAR Mode */ +#define NVGPU_DBG_GPU_SCHED_EXIT_WAIT_FOR_ERRBAR_DISABLED 0 +#define NVGPU_DBG_GPU_SCHED_EXIT_WAIT_FOR_ERRBAR_ENABLED 1 + +struct nvgpu_sched_exit_wait_for_errbar_args { + __u32 enable; /* enable 1, disable 0*/ +}; + +#define NVGPU_DBG_GPU_IOCTL_SET_SCHED_EXIT_WAIT_FOR_ERRBAR \ + _IOW(NVGPU_DBG_GPU_IOCTL_MAGIC, 33, \ + struct nvgpu_sched_exit_wait_for_errbar_args) + #define NVGPU_DBG_GPU_IOCTL_LAST \ - _IOC_NR(NVGPU_DBG_GPU_IOCTL_ACCESS_GPU_VA) + _IOC_NR(NVGPU_DBG_GPU_IOCTL_SET_SCHED_EXIT_WAIT_FOR_ERRBAR) #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \ sizeof(struct nvgpu_dbg_gpu_access_fb_memory_args)