mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: vgpu: add clear single SM error state
Add support for clearing single SM error state for CUDA debugger. In addition to clearing local copy of SM error state, vgpu_gr_clear_sm_error_state now sends a command to RM server (TEGRA_VGPU_CMD_CLEAR_SM_ERROR_STATE), to clear global ESR and warp ESR. Bug 1791111 Change-Id: I3a1f0644787fd900ec59a0e7974037d46a603487 Signed-off-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-on: http://git-master/r/1296311 (cherry picked from commit fd07e03c3d086f396e4d65575c576a4dd68c920a) Reviewed-on: http://git-master/r/1299060 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Cory Perry <cperry@nvidia.com> Tested-by: Cory Perry <cperry@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
6c35cebdcb
commit
2caea7576a
@@ -1077,11 +1077,26 @@ static int vgpu_gr_clear_sm_error_state(struct gk20a *g,
|
|||||||
struct channel_gk20a *ch, u32 sm_id)
|
struct channel_gk20a *ch, u32 sm_id)
|
||||||
{
|
{
|
||||||
struct gr_gk20a *gr = &g->gr;
|
struct gr_gk20a *gr = &g->gr;
|
||||||
|
struct tegra_vgpu_cmd_msg msg;
|
||||||
|
struct tegra_vgpu_clear_sm_error_state *p =
|
||||||
|
&msg.params.clear_sm_error_state;
|
||||||
|
int err;
|
||||||
|
|
||||||
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
||||||
|
msg.cmd = TEGRA_VGPU_CMD_CLEAR_SM_ERROR_STATE;
|
||||||
|
msg.handle = vgpu_get_handle(g);
|
||||||
|
p->handle = ch->virt_ctx;
|
||||||
|
p->sm_id = sm_id;
|
||||||
|
|
||||||
|
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
|
||||||
|
WARN_ON(err || msg.ret);
|
||||||
|
|
||||||
memset(&gr->sm_error_states[sm_id], 0, sizeof(*gr->sm_error_states));
|
memset(&gr->sm_error_states[sm_id], 0, sizeof(*gr->sm_error_states));
|
||||||
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
||||||
|
|
||||||
|
return err ? err : msg.ret;
|
||||||
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1099,8 +1114,8 @@ static int vgpu_gr_suspend_resume_contexts(struct gk20a *g,
|
|||||||
int channel_fd = -1;
|
int channel_fd = -1;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
mutex_lock(&g->dbg_sessions_lock);
|
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
||||||
mutex_lock(&dbg_s->ch_list_lock);
|
nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
|
||||||
|
|
||||||
n = 0;
|
n = 0;
|
||||||
list_for_each_entry(ch_data, &dbg_s->ch_list, ch_entry)
|
list_for_each_entry(ch_data, &dbg_s->ch_list, ch_entry)
|
||||||
@@ -1137,8 +1152,8 @@ static int vgpu_gr_suspend_resume_contexts(struct gk20a *g,
|
|||||||
}
|
}
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
mutex_unlock(&dbg_s->ch_list_lock);
|
nvgpu_mutex_release(&dbg_s->ch_list_lock);
|
||||||
mutex_unlock(&g->dbg_sessions_lock);
|
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
||||||
|
|
||||||
*ctx_resident_ch_fd = channel_fd;
|
*ctx_resident_ch_fd = channel_fd;
|
||||||
kfree(msg);
|
kfree(msg);
|
||||||
|
|||||||
@@ -101,6 +101,7 @@ enum {
|
|||||||
TEGRA_VGPU_CMD_GET_GPU_LOAD = 65,
|
TEGRA_VGPU_CMD_GET_GPU_LOAD = 65,
|
||||||
TEGRA_VGPU_CMD_SUSPEND_CONTEXTS = 66,
|
TEGRA_VGPU_CMD_SUSPEND_CONTEXTS = 66,
|
||||||
TEGRA_VGPU_CMD_RESUME_CONTEXTS = 67,
|
TEGRA_VGPU_CMD_RESUME_CONTEXTS = 67,
|
||||||
|
TEGRA_VGPU_CMD_CLEAR_SM_ERROR_STATE = 68,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct tegra_vgpu_connect_params {
|
struct tegra_vgpu_connect_params {
|
||||||
@@ -462,6 +463,11 @@ struct tegra_vgpu_suspend_resume_contexts {
|
|||||||
u16 chids[];
|
u16 chids[];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct tegra_vgpu_clear_sm_error_state {
|
||||||
|
u64 handle;
|
||||||
|
u32 sm_id;
|
||||||
|
};
|
||||||
|
|
||||||
struct tegra_vgpu_cmd_msg {
|
struct tegra_vgpu_cmd_msg {
|
||||||
u32 cmd;
|
u32 cmd;
|
||||||
int ret;
|
int ret;
|
||||||
@@ -510,6 +516,7 @@ struct tegra_vgpu_cmd_msg {
|
|||||||
struct tegra_vgpu_gpu_load_params gpu_load;
|
struct tegra_vgpu_gpu_load_params gpu_load;
|
||||||
struct tegra_vgpu_suspend_resume_contexts suspend_contexts;
|
struct tegra_vgpu_suspend_resume_contexts suspend_contexts;
|
||||||
struct tegra_vgpu_suspend_resume_contexts resume_contexts;
|
struct tegra_vgpu_suspend_resume_contexts resume_contexts;
|
||||||
|
struct tegra_vgpu_clear_sm_error_state clear_sm_error_state;
|
||||||
char padding[192];
|
char padding[192];
|
||||||
} params;
|
} params;
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user