mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: refactor SET_SM_EXCEPTION_MASK ioctl
added hal layer for SM exception mask handling for taking care of vitualization case. Jira VQRM-4806 Bug 200447406 Bug 2331747 Change-Id: Ia44778a2e41c1a508c48026b8dee285966f1a544 Signed-off-by: aalex <aalex@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1816284 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
@@ -367,6 +367,7 @@ void gk20a_tsg_release(struct nvgpu_ref *ref)
|
||||
if(tsg->sm_error_states != NULL) {
|
||||
nvgpu_kfree(g, tsg->sm_error_states);
|
||||
tsg->sm_error_states = NULL;
|
||||
nvgpu_mutex_destroy(&tsg->sm_exception_mask_lock);
|
||||
}
|
||||
|
||||
/* unhook all events created on this TSG */
|
||||
@@ -407,6 +408,11 @@ int gk20a_tsg_alloc_sm_error_states_mem(struct gk20a *g,
|
||||
int err = 0;
|
||||
|
||||
if (tsg->sm_error_states != NULL) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = nvgpu_mutex_init(&tsg->sm_exception_mask_lock);
|
||||
if (err) {
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -415,6 +421,7 @@ int gk20a_tsg_alloc_sm_error_states_mem(struct gk20a *g,
|
||||
* num_sm);
|
||||
if (tsg->sm_error_states == NULL) {
|
||||
nvgpu_err(g, "sm_error_states mem allocation failed");
|
||||
nvgpu_mutex_destroy(&tsg->sm_exception_mask_lock);
|
||||
err = -ENOMEM;
|
||||
}
|
||||
|
||||
@@ -440,3 +447,20 @@ void gk20a_tsg_update_sm_error_state_locked(struct tsg_gk20a *tsg,
|
||||
tsg_sm_error_states->hww_warp_esr_report_mask =
|
||||
sm_error_state->hww_warp_esr_report_mask;
|
||||
}
|
||||
|
||||
int gk20a_tsg_set_sm_exception_type_mask(struct channel_gk20a *ch,
|
||||
u32 exception_mask)
|
||||
{
|
||||
struct tsg_gk20a *tsg;
|
||||
|
||||
tsg = tsg_gk20a_from_ch(ch);
|
||||
if (!tsg) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
nvgpu_mutex_acquire(&tsg->sm_exception_mask_lock);
|
||||
tsg->sm_exception_mask_type = exception_mask;
|
||||
nvgpu_mutex_release(&tsg->sm_exception_mask_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -82,6 +82,7 @@ struct tsg_gk20a {
|
||||
#define NVGPU_SM_EXCEPTION_TYPE_MASK_NONE (0x0U)
|
||||
#define NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL (0x1U << 0)
|
||||
u32 sm_exception_mask_type;
|
||||
struct nvgpu_mutex sm_exception_mask_lock;
|
||||
};
|
||||
|
||||
int gk20a_enable_tsg(struct tsg_gk20a *tsg);
|
||||
@@ -103,6 +104,8 @@ int gk20a_tsg_alloc_sm_error_states_mem(struct gk20a *g,
|
||||
void gk20a_tsg_update_sm_error_state_locked(struct tsg_gk20a *tsg,
|
||||
u32 sm_id,
|
||||
struct nvgpu_tsg_sm_error_state *sm_error_state);
|
||||
int gk20a_tsg_set_sm_exception_type_mask(struct channel_gk20a *ch,
|
||||
u32 exception_mask);
|
||||
|
||||
struct gk20a_event_id_data {
|
||||
struct gk20a *g;
|
||||
|
||||
@@ -477,6 +477,7 @@ static const struct gpu_ops gm20b_ops = {
|
||||
.get_sema_wait_cmd_size = gk20a_fifo_get_sema_wait_cmd_size,
|
||||
.get_sema_incr_cmd_size = gk20a_fifo_get_sema_incr_cmd_size,
|
||||
.add_sema_cmd = gk20a_fifo_add_sema_cmd,
|
||||
.set_sm_exception_type_mask = gk20a_tsg_set_sm_exception_type_mask,
|
||||
},
|
||||
.gr_ctx = {
|
||||
.get_netlist_name = gr_gm20b_get_netlist_name,
|
||||
|
||||
@@ -552,6 +552,7 @@ static const struct gpu_ops gp106_ops = {
|
||||
.get_sema_wait_cmd_size = gk20a_fifo_get_sema_wait_cmd_size,
|
||||
.get_sema_incr_cmd_size = gk20a_fifo_get_sema_incr_cmd_size,
|
||||
.add_sema_cmd = gk20a_fifo_add_sema_cmd,
|
||||
.set_sm_exception_type_mask = gk20a_tsg_set_sm_exception_type_mask,
|
||||
},
|
||||
.gr_ctx = {
|
||||
.get_netlist_name = gr_gp106_get_netlist_name,
|
||||
|
||||
@@ -649,6 +649,7 @@ static const struct gpu_ops gv100_ops = {
|
||||
.get_sema_wait_cmd_size = gv11b_fifo_get_sema_wait_cmd_size,
|
||||
.get_sema_incr_cmd_size = gv11b_fifo_get_sema_incr_cmd_size,
|
||||
.add_sema_cmd = gv11b_fifo_add_sema_cmd,
|
||||
.set_sm_exception_type_mask = gk20a_tsg_set_sm_exception_type_mask,
|
||||
},
|
||||
.gr_ctx = {
|
||||
.get_netlist_name = gr_gv100_get_netlist_name,
|
||||
|
||||
@@ -612,6 +612,7 @@ static const struct gpu_ops gv11b_ops = {
|
||||
.get_sema_wait_cmd_size = gv11b_fifo_get_sema_wait_cmd_size,
|
||||
.get_sema_incr_cmd_size = gv11b_fifo_get_sema_incr_cmd_size,
|
||||
.add_sema_cmd = gv11b_fifo_add_sema_cmd,
|
||||
.set_sm_exception_type_mask = gk20a_tsg_set_sm_exception_type_mask,
|
||||
},
|
||||
.gr_ctx = {
|
||||
.get_netlist_name = gr_gv11b_get_netlist_name,
|
||||
|
||||
@@ -748,6 +748,8 @@ struct gpu_ops {
|
||||
struct nvgpu_semaphore *s, u64 sema_va,
|
||||
struct priv_cmd_entry *cmd,
|
||||
u32 off, bool acquire, bool wfi);
|
||||
int (*set_sm_exception_type_mask)(struct channel_gk20a *ch,
|
||||
u32 exception_mask);
|
||||
} fifo;
|
||||
struct pmu_v {
|
||||
u32 (*get_pmu_cmdline_args_size)(struct nvgpu_pmu *pmu);
|
||||
|
||||
@@ -123,6 +123,7 @@ enum {
|
||||
TEGRA_VGPU_CMD_RESUME = 83,
|
||||
TEGRA_VGPU_CMD_GET_ECC_INFO = 84,
|
||||
TEGRA_VGPU_CMD_GET_ECC_COUNTER_VALUE = 85,
|
||||
TEGRA_VGPU_CMD_SET_SM_EXCEPTION_TYPE_MASK = 86,
|
||||
};
|
||||
|
||||
struct tegra_vgpu_connect_params {
|
||||
@@ -467,6 +468,11 @@ struct tegra_vgpu_gpu_clk_rate_params {
|
||||
u32 rate; /* in kHz */
|
||||
};
|
||||
|
||||
struct tegra_vgpu_set_sm_exception_type_mask_params {
|
||||
u64 handle;
|
||||
u32 mask;
|
||||
};
|
||||
|
||||
/* TEGRA_VGPU_MAX_ENGINES must be equal or greater than num_engines */
|
||||
#define TEGRA_VGPU_MAX_ENGINES 4
|
||||
struct tegra_vgpu_engines_info {
|
||||
@@ -678,6 +684,7 @@ struct tegra_vgpu_cmd_msg {
|
||||
struct tegra_vgpu_channel_update_pc_sampling update_pc_sampling;
|
||||
struct tegra_vgpu_ecc_info_params ecc_info;
|
||||
struct tegra_vgpu_ecc_counter_params ecc_counter;
|
||||
struct tegra_vgpu_set_sm_exception_type_mask_params set_sm_exception_mask;
|
||||
char padding[192];
|
||||
} params;
|
||||
};
|
||||
|
||||
@@ -154,10 +154,6 @@ static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s);
|
||||
static int gk20a_dbg_gpu_do_dev_open(struct inode *inode,
|
||||
struct file *filp, bool is_profiler);
|
||||
|
||||
static int nvgpu_set_sm_exception_type_mask_locked(
|
||||
struct dbg_session_gk20a *dbg_s,
|
||||
u32 exception_mask);
|
||||
|
||||
unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait)
|
||||
{
|
||||
unsigned int mask = 0;
|
||||
@@ -1808,44 +1804,13 @@ out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int nvgpu_set_sm_exception_type_mask_locked(
|
||||
struct dbg_session_gk20a *dbg_s,
|
||||
u32 exception_mask)
|
||||
{
|
||||
struct gk20a *g = dbg_s->g;
|
||||
int err = 0;
|
||||
struct channel_gk20a *ch = NULL;
|
||||
|
||||
/*
|
||||
* Obtain the fisrt channel from the channel list in
|
||||
* dbg_session, find the context associated with channel
|
||||
* and set the sm_mask_type to that context
|
||||
*/
|
||||
ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
|
||||
if (ch != NULL) {
|
||||
struct tsg_gk20a *tsg;
|
||||
|
||||
tsg = tsg_gk20a_from_ch(ch);
|
||||
if (tsg != NULL) {
|
||||
tsg->sm_exception_mask_type = exception_mask;
|
||||
goto type_mask_end;
|
||||
}
|
||||
}
|
||||
|
||||
nvgpu_log_fn(g, "unable to find the TSG\n");
|
||||
err = -EINVAL;
|
||||
|
||||
type_mask_end:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int nvgpu_dbg_gpu_set_sm_exception_type_mask(
|
||||
struct dbg_session_gk20a *dbg_s,
|
||||
static int nvgpu_dbg_gpu_set_sm_exception_type_mask(struct dbg_session_gk20a *dbg_s,
|
||||
struct nvgpu_dbg_gpu_set_sm_exception_type_mask_args *args)
|
||||
{
|
||||
int err = 0;
|
||||
struct gk20a *g = dbg_s->g;
|
||||
u32 sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE;
|
||||
struct channel_gk20a *ch = NULL;
|
||||
|
||||
switch (args->exception_type_mask) {
|
||||
case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_FATAL:
|
||||
@@ -1866,10 +1831,13 @@ static int nvgpu_dbg_gpu_set_sm_exception_type_mask(
|
||||
return err;
|
||||
}
|
||||
|
||||
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
||||
err = nvgpu_set_sm_exception_type_mask_locked(dbg_s,
|
||||
sm_exception_mask_type);
|
||||
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
||||
ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
|
||||
if (ch != NULL) {
|
||||
err = g->ops.fifo.set_sm_exception_type_mask(ch,
|
||||
sm_exception_mask_type);
|
||||
} else {
|
||||
err = -EINVAL;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -61,5 +61,5 @@ int vgpu_tsg_bind_channel(struct tsg_gk20a *tsg,
|
||||
int vgpu_tsg_unbind_channel(struct channel_gk20a *ch);
|
||||
int vgpu_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice);
|
||||
int vgpu_enable_tsg(struct tsg_gk20a *tsg);
|
||||
|
||||
int vgpu_set_sm_exception_type_mask(struct channel_gk20a *ch, u32 mask);
|
||||
#endif
|
||||
|
||||
@@ -358,6 +358,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
|
||||
.get_sema_wait_cmd_size = gk20a_fifo_get_sema_wait_cmd_size,
|
||||
.get_sema_incr_cmd_size = gk20a_fifo_get_sema_incr_cmd_size,
|
||||
.add_sema_cmd = gk20a_fifo_add_sema_cmd,
|
||||
.set_sm_exception_type_mask = gk20a_tsg_set_sm_exception_type_mask,
|
||||
},
|
||||
.gr_ctx = {
|
||||
.get_netlist_name = gr_gp10b_get_netlist_name,
|
||||
|
||||
@@ -424,6 +424,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
|
||||
.get_sema_wait_cmd_size = gv11b_fifo_get_sema_wait_cmd_size,
|
||||
.get_sema_incr_cmd_size = gv11b_fifo_get_sema_incr_cmd_size,
|
||||
.add_sema_cmd = gv11b_fifo_add_sema_cmd,
|
||||
.set_sm_exception_type_mask = vgpu_set_sm_exception_type_mask,
|
||||
},
|
||||
.gr_ctx = {
|
||||
.get_netlist_name = gr_gv11b_get_netlist_name,
|
||||
|
||||
@@ -163,3 +163,26 @@ int vgpu_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice)
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int vgpu_set_sm_exception_type_mask(struct channel_gk20a *ch,
|
||||
u32 exception_mask)
|
||||
{
|
||||
struct tegra_vgpu_cmd_msg msg;
|
||||
struct tegra_vgpu_set_sm_exception_type_mask_params *p =
|
||||
&msg.params.set_sm_exception_mask;
|
||||
int err = 0;
|
||||
struct gk20a *g = ch->g;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
msg.cmd = TEGRA_VGPU_CMD_SET_SM_EXCEPTION_TYPE_MASK;
|
||||
msg.handle = vgpu_get_handle(g);
|
||||
p->handle = ch->virt_ctx;
|
||||
p->mask = exception_mask;
|
||||
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
|
||||
err = err ? err : msg.ret;
|
||||
WARN_ON(err);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user