mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 01:50:07 +03:00
gpu: nvgpu: Add multi GR reset support for MIG
* Added multi GR reset/recovery support for MIG. * Added a api to get the gr engine id using gr instance id. JIRA NVGPU-5650 JIRA NVGPU-5653 Change-Id: I12ece75a4c33f0944f404121b54879e814dda6df Signed-off-by: Lakshmanan M <lm@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2443644 Reviewed-by: automaticguardword <automaticguardword@nvidia.com> Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: Rajesh Devaraj <rdevaraj@nvidia.com> Reviewed-by: Dinesh T <dt@nvidia.com> Reviewed-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit
This commit is contained in:
committed by
Alex Waterman
parent
613e1e704a
commit
883c12529a
@@ -92,20 +92,26 @@ bool nvgpu_engine_check_valid_id(struct gk20a *g, u32 engine_id)
|
||||
return f->host_engines[engine_id] != NULL;
|
||||
}
|
||||
|
||||
u32 nvgpu_engine_get_gr_id(struct gk20a *g)
|
||||
u32 nvgpu_engine_get_gr_id_for_inst(struct gk20a *g, u32 inst_id)
|
||||
{
|
||||
const struct nvgpu_device *dev;
|
||||
/* Consider 1st available GR engine */
|
||||
|
||||
dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, 0);
|
||||
dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, inst_id);
|
||||
if (dev == NULL) {
|
||||
nvgpu_warn(g, "No GR devices on this GPU?!");
|
||||
nvgpu_warn(g, "No GR devices on this GPU for inst[%u]?!",
|
||||
inst_id);
|
||||
return NVGPU_INVALID_ENG_ID;
|
||||
}
|
||||
|
||||
return dev->engine_id;
|
||||
}
|
||||
|
||||
u32 nvgpu_engine_get_gr_id(struct gk20a *g)
|
||||
{
|
||||
/* Consider 1st available GR engine */
|
||||
return nvgpu_engine_get_gr_id_for_inst(g, 0U);
|
||||
}
|
||||
|
||||
u32 nvgpu_engine_act_interrupt_mask(struct gk20a *g, u32 engine_id)
|
||||
{
|
||||
const struct nvgpu_device *dev = NULL;
|
||||
|
||||
@@ -681,32 +681,70 @@ int nvgpu_gr_enable_hw(struct gk20a *g)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_ENGINE_RESET
|
||||
static int nvgpu_gr_enable_hw_for_instance(struct gk20a *g)
|
||||
{
|
||||
int err;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "Enable GR%u HW",
|
||||
nvgpu_gr_get_cur_instance_id(g));
|
||||
|
||||
err = gr_reset_engine(g);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Gr Reset failed");
|
||||
return err;
|
||||
}
|
||||
|
||||
nvgpu_cg_init_gr_load_gating_prod(g);
|
||||
|
||||
/* Disable elcg until it gets enabled later in the init*/
|
||||
nvgpu_cg_elcg_disable_no_wait(g);
|
||||
|
||||
/** Enable interrupts at MC level */
|
||||
nvgpu_mc_intr_stall_unit_config(g, MC_INTR_UNIT_GR, MC_INTR_ENABLE);
|
||||
nvgpu_mc_intr_nonstall_unit_config(g, MC_INTR_UNIT_GR, MC_INTR_ENABLE);
|
||||
|
||||
err = gr_init_prepare_hw_impl(g);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "gr_init_prepare_hw_impl failed");
|
||||
return err;
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_gr_reset(struct gk20a *g)
|
||||
{
|
||||
int err;
|
||||
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
|
||||
struct nvgpu_mutex *fecs_mutex =
|
||||
nvgpu_gr_falcon_get_fecs_mutex(g->gr->falcon);
|
||||
nvgpu_gr_falcon_get_fecs_mutex(gr->falcon);
|
||||
|
||||
g->gr->initialized = false;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_rec, "Resetting GR");
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr | gpu_dbg_rec, "Resetting GR%u HW",
|
||||
nvgpu_gr_get_cur_instance_id(g));
|
||||
|
||||
nvgpu_mutex_acquire(fecs_mutex);
|
||||
|
||||
err = nvgpu_gr_enable_hw(g);
|
||||
err = nvgpu_gr_enable_hw_for_instance(g);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "nvgpu_gr_enable_hw_for_instance failed");
|
||||
nvgpu_mutex_release(fecs_mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
err = gr_init_setup_hw(g, g->gr);
|
||||
err = gr_init_setup_hw(g, gr);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "gr_init_setup_hw failed");
|
||||
nvgpu_mutex_release(fecs_mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
err = nvgpu_gr_falcon_init_ctxsw(g, g->gr->falcon);
|
||||
err = nvgpu_gr_falcon_init_ctxsw(g, gr->falcon);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "nvgpu_gr_falcon_init_ctxsw failed");
|
||||
nvgpu_mutex_release(fecs_mutex);
|
||||
return err;
|
||||
}
|
||||
@@ -718,8 +756,9 @@ int nvgpu_gr_reset(struct gk20a *g)
|
||||
* ramchain, etc so this is hw init. Hence should be executed
|
||||
* for every GR engine HW initialization.
|
||||
*/
|
||||
err = nvgpu_gr_init_ctx_state(g, g->gr);
|
||||
err = nvgpu_gr_init_ctx_state(g, gr);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "nvgpu_gr_init_ctx_state failed");
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -727,6 +766,7 @@ int nvgpu_gr_reset(struct gk20a *g)
|
||||
if (g->can_elpg) {
|
||||
err = nvgpu_gr_falcon_bind_fecs_elpg(g);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "nvgpu_gr_falcon_bind_fecs_elpg failed");
|
||||
return err;
|
||||
}
|
||||
}
|
||||
@@ -738,7 +778,9 @@ int nvgpu_gr_reset(struct gk20a *g)
|
||||
|
||||
/* GR is inialized, signal possible waiters */
|
||||
g->gr->initialized = true;
|
||||
nvgpu_cond_signal(&g->gr->init_wq);
|
||||
nvgpu_cond_signal(&gr->init_wq);
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -33,6 +33,7 @@
|
||||
#include <nvgpu/debug.h>
|
||||
#include <nvgpu/rc.h>
|
||||
#include <nvgpu/gr/gr.h>
|
||||
#include <nvgpu/gr/gr_instances.h>
|
||||
|
||||
void nvgpu_rc_fifo_recover(struct gk20a *g, u32 eng_bitmask,
|
||||
u32 hw_id, bool id_is_tsg,
|
||||
@@ -181,8 +182,13 @@ void nvgpu_rc_gr_fault(struct gk20a *g, struct nvgpu_tsg *tsg,
|
||||
#ifdef CONFIG_NVGPU_RECOVERY
|
||||
u32 gr_engine_id;
|
||||
u32 gr_eng_bitmask = 0U;
|
||||
u32 cur_gr_instance_id = nvgpu_gr_get_cur_instance_id(g);
|
||||
u32 inst_id = nvgpu_gr_get_syspipe_id(g, cur_gr_instance_id);
|
||||
|
||||
gr_engine_id = nvgpu_engine_get_gr_id(g);
|
||||
nvgpu_log(g, gpu_dbg_gr, "RC GR%u inst_id%u",
|
||||
cur_gr_instance_id, inst_id);
|
||||
|
||||
gr_engine_id = nvgpu_engine_get_gr_id_for_inst(g, inst_id);
|
||||
if (gr_engine_id != NVGPU_INVALID_ENG_ID) {
|
||||
gr_eng_bitmask = BIT32(gr_engine_id);
|
||||
} else {
|
||||
@@ -203,6 +209,7 @@ void nvgpu_rc_gr_fault(struct gk20a *g, struct nvgpu_tsg *tsg,
|
||||
#else
|
||||
WARN_ON(!g->sw_quiesce_pending);
|
||||
#endif
|
||||
nvgpu_log(g, gpu_dbg_gr, "done");
|
||||
}
|
||||
|
||||
void nvgpu_rc_sched_error_bad_tsg(struct gk20a *g)
|
||||
|
||||
@@ -113,6 +113,18 @@ const struct nvgpu_device *nvgpu_engine_get_active_eng_info(
|
||||
* does not match with any of the engine ids supported by h/w.
|
||||
*/
|
||||
bool nvgpu_engine_check_valid_id(struct gk20a *g, u32 engine_id);
|
||||
/**
|
||||
* @brief Get h/w engine id based on engine's instance identification number
|
||||
* #NVGPU_ENGINE_GR engine enum type.
|
||||
*
|
||||
* @param g [in] The GPU driver struct.
|
||||
* @param inst_id [in] Engine's instance identification number.
|
||||
*
|
||||
* @return H/W engine id for #NVGPU_ENGINE_GR engine enum type.
|
||||
* @retval #NVGPU_INVALID_ENG_ID if #NVGPU_ENGINE_GR engine enum type could not
|
||||
* be found in the set of available h/w engine ids.
|
||||
*/
|
||||
u32 nvgpu_engine_get_gr_id_for_inst(struct gk20a *g, u32 inst_id);
|
||||
/**
|
||||
* @brief Get instance count and first available h/w engine id for
|
||||
* #NVGPU_ENGINE_GR engine enum type.
|
||||
|
||||
Reference in New Issue
Block a user