gpu: nvgpu: refactor SET_SM_EXCEPTION_MASK ioctl

added hal layer for SM exception mask handling for
taking care of vitualization case.

Jira VQRM-4806
Bug  200447406
Bug  2331747

Change-Id: Ia44778a2e41c1a508c48026b8dee285966f1a544
Signed-off-by: aalex <aalex@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1816284
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
aalex
2018-09-07 22:08:05 +05:30
committed by mobile promotions
parent ec067c5ed1
commit c5810a670d
13 changed files with 75 additions and 42 deletions

View File

@@ -367,6 +367,7 @@ void gk20a_tsg_release(struct nvgpu_ref *ref)
if(tsg->sm_error_states != NULL) {
nvgpu_kfree(g, tsg->sm_error_states);
tsg->sm_error_states = NULL;
nvgpu_mutex_destroy(&tsg->sm_exception_mask_lock);
}
/* unhook all events created on this TSG */
@@ -407,6 +408,11 @@ int gk20a_tsg_alloc_sm_error_states_mem(struct gk20a *g,
int err = 0;
if (tsg->sm_error_states != NULL) {
return -EINVAL;
}
err = nvgpu_mutex_init(&tsg->sm_exception_mask_lock);
if (err) {
return err;
}
@@ -415,6 +421,7 @@ int gk20a_tsg_alloc_sm_error_states_mem(struct gk20a *g,
* num_sm);
if (tsg->sm_error_states == NULL) {
nvgpu_err(g, "sm_error_states mem allocation failed");
nvgpu_mutex_destroy(&tsg->sm_exception_mask_lock);
err = -ENOMEM;
}
@@ -440,3 +447,20 @@ void gk20a_tsg_update_sm_error_state_locked(struct tsg_gk20a *tsg,
tsg_sm_error_states->hww_warp_esr_report_mask =
sm_error_state->hww_warp_esr_report_mask;
}
int gk20a_tsg_set_sm_exception_type_mask(struct channel_gk20a *ch,
u32 exception_mask)
{
struct tsg_gk20a *tsg;
tsg = tsg_gk20a_from_ch(ch);
if (!tsg) {
return -EINVAL;
}
nvgpu_mutex_acquire(&tsg->sm_exception_mask_lock);
tsg->sm_exception_mask_type = exception_mask;
nvgpu_mutex_release(&tsg->sm_exception_mask_lock);
return 0;
}

View File

@@ -82,6 +82,7 @@ struct tsg_gk20a {
#define NVGPU_SM_EXCEPTION_TYPE_MASK_NONE (0x0U)
#define NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL (0x1U << 0)
u32 sm_exception_mask_type;
struct nvgpu_mutex sm_exception_mask_lock;
};
int gk20a_enable_tsg(struct tsg_gk20a *tsg);
@@ -103,6 +104,8 @@ int gk20a_tsg_alloc_sm_error_states_mem(struct gk20a *g,
void gk20a_tsg_update_sm_error_state_locked(struct tsg_gk20a *tsg,
u32 sm_id,
struct nvgpu_tsg_sm_error_state *sm_error_state);
int gk20a_tsg_set_sm_exception_type_mask(struct channel_gk20a *ch,
u32 exception_mask);
struct gk20a_event_id_data {
struct gk20a *g;

View File

@@ -477,6 +477,7 @@ static const struct gpu_ops gm20b_ops = {
.get_sema_wait_cmd_size = gk20a_fifo_get_sema_wait_cmd_size,
.get_sema_incr_cmd_size = gk20a_fifo_get_sema_incr_cmd_size,
.add_sema_cmd = gk20a_fifo_add_sema_cmd,
.set_sm_exception_type_mask = gk20a_tsg_set_sm_exception_type_mask,
},
.gr_ctx = {
.get_netlist_name = gr_gm20b_get_netlist_name,

View File

@@ -552,6 +552,7 @@ static const struct gpu_ops gp106_ops = {
.get_sema_wait_cmd_size = gk20a_fifo_get_sema_wait_cmd_size,
.get_sema_incr_cmd_size = gk20a_fifo_get_sema_incr_cmd_size,
.add_sema_cmd = gk20a_fifo_add_sema_cmd,
.set_sm_exception_type_mask = gk20a_tsg_set_sm_exception_type_mask,
},
.gr_ctx = {
.get_netlist_name = gr_gp106_get_netlist_name,

View File

@@ -649,6 +649,7 @@ static const struct gpu_ops gv100_ops = {
.get_sema_wait_cmd_size = gv11b_fifo_get_sema_wait_cmd_size,
.get_sema_incr_cmd_size = gv11b_fifo_get_sema_incr_cmd_size,
.add_sema_cmd = gv11b_fifo_add_sema_cmd,
.set_sm_exception_type_mask = gk20a_tsg_set_sm_exception_type_mask,
},
.gr_ctx = {
.get_netlist_name = gr_gv100_get_netlist_name,

View File

@@ -612,6 +612,7 @@ static const struct gpu_ops gv11b_ops = {
.get_sema_wait_cmd_size = gv11b_fifo_get_sema_wait_cmd_size,
.get_sema_incr_cmd_size = gv11b_fifo_get_sema_incr_cmd_size,
.add_sema_cmd = gv11b_fifo_add_sema_cmd,
.set_sm_exception_type_mask = gk20a_tsg_set_sm_exception_type_mask,
},
.gr_ctx = {
.get_netlist_name = gr_gv11b_get_netlist_name,

View File

@@ -748,6 +748,8 @@ struct gpu_ops {
struct nvgpu_semaphore *s, u64 sema_va,
struct priv_cmd_entry *cmd,
u32 off, bool acquire, bool wfi);
int (*set_sm_exception_type_mask)(struct channel_gk20a *ch,
u32 exception_mask);
} fifo;
struct pmu_v {
u32 (*get_pmu_cmdline_args_size)(struct nvgpu_pmu *pmu);

View File

@@ -123,6 +123,7 @@ enum {
TEGRA_VGPU_CMD_RESUME = 83,
TEGRA_VGPU_CMD_GET_ECC_INFO = 84,
TEGRA_VGPU_CMD_GET_ECC_COUNTER_VALUE = 85,
TEGRA_VGPU_CMD_SET_SM_EXCEPTION_TYPE_MASK = 86,
};
struct tegra_vgpu_connect_params {
@@ -467,6 +468,11 @@ struct tegra_vgpu_gpu_clk_rate_params {
u32 rate; /* in kHz */
};
struct tegra_vgpu_set_sm_exception_type_mask_params {
u64 handle;
u32 mask;
};
/* TEGRA_VGPU_MAX_ENGINES must be equal or greater than num_engines */
#define TEGRA_VGPU_MAX_ENGINES 4
struct tegra_vgpu_engines_info {
@@ -678,6 +684,7 @@ struct tegra_vgpu_cmd_msg {
struct tegra_vgpu_channel_update_pc_sampling update_pc_sampling;
struct tegra_vgpu_ecc_info_params ecc_info;
struct tegra_vgpu_ecc_counter_params ecc_counter;
struct tegra_vgpu_set_sm_exception_type_mask_params set_sm_exception_mask;
char padding[192];
} params;
};

View File

@@ -154,10 +154,6 @@ static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s);
static int gk20a_dbg_gpu_do_dev_open(struct inode *inode,
struct file *filp, bool is_profiler);
static int nvgpu_set_sm_exception_type_mask_locked(
struct dbg_session_gk20a *dbg_s,
u32 exception_mask);
unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait)
{
unsigned int mask = 0;
@@ -1808,44 +1804,13 @@ out:
return err;
}
static int nvgpu_set_sm_exception_type_mask_locked(
struct dbg_session_gk20a *dbg_s,
u32 exception_mask)
{
struct gk20a *g = dbg_s->g;
int err = 0;
struct channel_gk20a *ch = NULL;
/*
* Obtain the fisrt channel from the channel list in
* dbg_session, find the context associated with channel
* and set the sm_mask_type to that context
*/
ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
if (ch != NULL) {
struct tsg_gk20a *tsg;
tsg = tsg_gk20a_from_ch(ch);
if (tsg != NULL) {
tsg->sm_exception_mask_type = exception_mask;
goto type_mask_end;
}
}
nvgpu_log_fn(g, "unable to find the TSG\n");
err = -EINVAL;
type_mask_end:
return err;
}
static int nvgpu_dbg_gpu_set_sm_exception_type_mask(
struct dbg_session_gk20a *dbg_s,
static int nvgpu_dbg_gpu_set_sm_exception_type_mask(struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_set_sm_exception_type_mask_args *args)
{
int err = 0;
struct gk20a *g = dbg_s->g;
u32 sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE;
struct channel_gk20a *ch = NULL;
switch (args->exception_type_mask) {
case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_FATAL:
@@ -1866,10 +1831,13 @@ static int nvgpu_dbg_gpu_set_sm_exception_type_mask(
return err;
}
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
err = nvgpu_set_sm_exception_type_mask_locked(dbg_s,
sm_exception_mask_type);
nvgpu_mutex_release(&g->dbg_sessions_lock);
ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
if (ch != NULL) {
err = g->ops.fifo.set_sm_exception_type_mask(ch,
sm_exception_mask_type);
} else {
err = -EINVAL;
}
return err;
}

View File

@@ -61,5 +61,5 @@ int vgpu_tsg_bind_channel(struct tsg_gk20a *tsg,
int vgpu_tsg_unbind_channel(struct channel_gk20a *ch);
int vgpu_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice);
int vgpu_enable_tsg(struct tsg_gk20a *tsg);
int vgpu_set_sm_exception_type_mask(struct channel_gk20a *ch, u32 mask);
#endif

View File

@@ -358,6 +358,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.get_sema_wait_cmd_size = gk20a_fifo_get_sema_wait_cmd_size,
.get_sema_incr_cmd_size = gk20a_fifo_get_sema_incr_cmd_size,
.add_sema_cmd = gk20a_fifo_add_sema_cmd,
.set_sm_exception_type_mask = gk20a_tsg_set_sm_exception_type_mask,
},
.gr_ctx = {
.get_netlist_name = gr_gp10b_get_netlist_name,

View File

@@ -424,6 +424,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.get_sema_wait_cmd_size = gv11b_fifo_get_sema_wait_cmd_size,
.get_sema_incr_cmd_size = gv11b_fifo_get_sema_incr_cmd_size,
.add_sema_cmd = gv11b_fifo_add_sema_cmd,
.set_sm_exception_type_mask = vgpu_set_sm_exception_type_mask,
},
.gr_ctx = {
.get_netlist_name = gr_gv11b_get_netlist_name,

View File

@@ -163,3 +163,26 @@ int vgpu_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice)
return err;
}
int vgpu_set_sm_exception_type_mask(struct channel_gk20a *ch,
u32 exception_mask)
{
struct tegra_vgpu_cmd_msg msg;
struct tegra_vgpu_set_sm_exception_type_mask_params *p =
&msg.params.set_sm_exception_mask;
int err = 0;
struct gk20a *g = ch->g;
nvgpu_log_fn(g, " ");
msg.cmd = TEGRA_VGPU_CMD_SET_SM_EXCEPTION_TYPE_MASK;
msg.handle = vgpu_get_handle(g);
p->handle = ch->virt_ctx;
p->mask = exception_mask;
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
err = err ? err : msg.ret;
WARN_ON(err);
return err;
}