mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: handle MMU fault for TSG
- add support to handle MMU faults on a channel in TSG - first get the ID and type of channel that engine is running - if TSG, abort each channel in it - if regular channel, abort that channel - also, add two versions of API set_ctx_mmu_error(), one for regular channel and another for TSG Bug 1470692 Change-Id: Ia7b01b81739598459702ed172180adb00e345eba Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/497874 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
Dan Willemsen
parent
e4a7bc1602
commit
2f232348e6
@@ -585,7 +585,7 @@ void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
|
|||||||
ch->error_notifier->info32 = error;
|
ch->error_notifier->info32 = error;
|
||||||
ch->error_notifier->status = 0xffff;
|
ch->error_notifier->status = 0xffff;
|
||||||
gk20a_err(dev_from_gk20a(ch->g),
|
gk20a_err(dev_from_gk20a(ch->g),
|
||||||
"error notifier set to %d\n", error);
|
"error notifier set to %d for ch %d\n", error, ch->hw_chid);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -911,14 +911,12 @@ void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g,
|
static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g,
|
||||||
struct channel_gk20a *ch) {
|
struct channel_gk20a *ch)
|
||||||
|
{
|
||||||
bool verbose = true;
|
bool verbose = true;
|
||||||
if (!ch)
|
if (!ch)
|
||||||
return verbose;
|
return verbose;
|
||||||
|
|
||||||
gk20a_err(dev_from_gk20a(g),
|
|
||||||
"channel %d generated a mmu fault",
|
|
||||||
ch->hw_chid);
|
|
||||||
if (ch->error_notifier) {
|
if (ch->error_notifier) {
|
||||||
u32 err = ch->error_notifier->info32;
|
u32 err = ch->error_notifier->info32;
|
||||||
if (ch->error_notifier->status == 0xffff) {
|
if (ch->error_notifier->status == 0xffff) {
|
||||||
@@ -944,6 +942,31 @@ static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g,
|
|||||||
return verbose;
|
return verbose;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
|
||||||
|
struct channel_gk20a *ch)
|
||||||
|
{
|
||||||
|
gk20a_err(dev_from_gk20a(g),
|
||||||
|
"channel %d generated a mmu fault", ch->hw_chid);
|
||||||
|
|
||||||
|
return gk20a_fifo_set_ctx_mmu_error(g, ch);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g,
|
||||||
|
struct tsg_gk20a *tsg)
|
||||||
|
{
|
||||||
|
bool ret = true;
|
||||||
|
struct channel_gk20a *ch = NULL;
|
||||||
|
|
||||||
|
gk20a_err(dev_from_gk20a(g),
|
||||||
|
"TSG %d generated a mmu fault", tsg->tsgid);
|
||||||
|
|
||||||
|
mutex_lock(&tsg->ch_list_lock);
|
||||||
|
list_for_each_entry(ch, &tsg->ch_list, ch_entry)
|
||||||
|
ret = gk20a_fifo_set_ctx_mmu_error(g, ch);
|
||||||
|
mutex_unlock(&tsg->ch_list_lock);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g)
|
static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g)
|
||||||
{
|
{
|
||||||
@@ -987,6 +1010,7 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g)
|
|||||||
u32 engine_id = gk20a_mmu_id_to_engine_id(engine_mmu_id);
|
u32 engine_id = gk20a_mmu_id_to_engine_id(engine_mmu_id);
|
||||||
struct fifo_mmu_fault_info_gk20a f;
|
struct fifo_mmu_fault_info_gk20a f;
|
||||||
struct channel_gk20a *ch = NULL;
|
struct channel_gk20a *ch = NULL;
|
||||||
|
struct tsg_gk20a *tsg = NULL;
|
||||||
|
|
||||||
get_exception_mmu_fault_info(g, engine_mmu_id, &f);
|
get_exception_mmu_fault_info(g, engine_mmu_id, &f);
|
||||||
trace_gk20a_mmu_fault(f.fault_hi_v,
|
trace_gk20a_mmu_fault(f.fault_hi_v,
|
||||||
@@ -1008,49 +1032,61 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g)
|
|||||||
f.fault_type_v, f.fault_type_desc,
|
f.fault_type_v, f.fault_type_desc,
|
||||||
f.fault_info_v, f.inst_ptr);
|
f.fault_info_v, f.inst_ptr);
|
||||||
|
|
||||||
/* get the channel */
|
/* get the channel/TSG */
|
||||||
if (fake_fault) {
|
if (fake_fault) {
|
||||||
/* read and parse engine status */
|
/* read and parse engine status */
|
||||||
u32 status = gk20a_readl(g,
|
u32 status = gk20a_readl(g,
|
||||||
fifo_engine_status_r(engine_id));
|
fifo_engine_status_r(engine_id));
|
||||||
u32 ctx_status =
|
u32 ctx_status =
|
||||||
fifo_engine_status_ctx_status_v(status);
|
fifo_engine_status_ctx_status_v(status);
|
||||||
bool type_ch = fifo_pbdma_status_id_type_v(status) ==
|
|
||||||
fifo_pbdma_status_id_type_chid_v();
|
|
||||||
|
|
||||||
/* use next_id if context load is failing */
|
/* use next_id if context load is failing */
|
||||||
u32 id = (ctx_status ==
|
u32 id = (ctx_status ==
|
||||||
fifo_engine_status_ctx_status_ctxsw_load_v()) ?
|
fifo_engine_status_ctx_status_ctxsw_load_v()) ?
|
||||||
fifo_engine_status_next_id_v(status) :
|
fifo_engine_status_next_id_v(status) :
|
||||||
fifo_engine_status_id_v(status);
|
fifo_engine_status_id_v(status);
|
||||||
|
u32 type = (ctx_status ==
|
||||||
|
fifo_engine_status_ctx_status_ctxsw_load_v()) ?
|
||||||
|
fifo_engine_status_next_id_type_v(status) :
|
||||||
|
fifo_engine_status_id_type_v(status);
|
||||||
|
|
||||||
if (type_ch) {
|
if (type == fifo_engine_status_id_type_tsgid_v())
|
||||||
ch = g->fifo.channel + id;
|
tsg = &g->fifo.tsg[id];
|
||||||
} else {
|
else if (type == fifo_engine_status_id_type_chid_v())
|
||||||
gk20a_err(dev_from_gk20a(g), "non-chid type not supported");
|
ch = &g->fifo.channel[id];
|
||||||
WARN_ON(1);
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
/* read channel based on instruction pointer */
|
/* read channel based on instruction pointer */
|
||||||
ch = channel_from_inst_ptr(&g->fifo, f.inst_ptr);
|
ch = channel_from_inst_ptr(&g->fifo, f.inst_ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ch) {
|
if (ch && gk20a_is_channel_marked_as_tsg(ch))
|
||||||
|
tsg = &g->fifo.tsg[ch->tsgid];
|
||||||
|
|
||||||
/* check if engine reset should be deferred */
|
/* check if engine reset should be deferred */
|
||||||
if (gk20a_fifo_should_defer_engine_reset(g, engine_id, &f, fake_fault)) {
|
if ((ch || tsg) && gk20a_fifo_should_defer_engine_reset(g,
|
||||||
|
engine_id, &f, fake_fault)) {
|
||||||
g->fifo.mmu_fault_engines = fault_id;
|
g->fifo.mmu_fault_engines = fault_id;
|
||||||
|
|
||||||
/* handled during channel free */
|
/* handled during channel free */
|
||||||
g->fifo.deferred_reset_pending = true;
|
g->fifo.deferred_reset_pending = true;
|
||||||
} else
|
|
||||||
verbose = gk20a_fifo_set_ctx_mmu_error(g, ch);
|
|
||||||
|
|
||||||
if (ch->in_use) {
|
|
||||||
/* disable the channel from hw and increment
|
|
||||||
* syncpoints */
|
|
||||||
gk20a_channel_abort(ch);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* disable the channel/TSG from hw and increment
|
||||||
|
* syncpoints */
|
||||||
|
if (tsg) {
|
||||||
|
struct channel_gk20a *ch = NULL;
|
||||||
|
if (!g->fifo.deferred_reset_pending)
|
||||||
|
verbose =
|
||||||
|
gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg);
|
||||||
|
mutex_lock(&tsg->ch_list_lock);
|
||||||
|
list_for_each_entry(ch, &tsg->ch_list, ch_entry)
|
||||||
|
gk20a_channel_abort(ch);
|
||||||
|
mutex_unlock(&tsg->ch_list_lock);
|
||||||
|
} else if (ch) {
|
||||||
|
if (!g->fifo.deferred_reset_pending)
|
||||||
|
verbose =
|
||||||
|
gk20a_fifo_set_ctx_mmu_error_ch(g, ch);
|
||||||
|
gk20a_channel_abort(ch);
|
||||||
} else if (f.inst_ptr ==
|
} else if (f.inst_ptr ==
|
||||||
g->mm.bar1.inst_block.cpu_pa) {
|
g->mm.bar1.inst_block.cpu_pa) {
|
||||||
gk20a_err(dev_from_gk20a(g), "mmu fault from bar1");
|
gk20a_err(dev_from_gk20a(g), "mmu fault from bar1");
|
||||||
@@ -1192,7 +1228,7 @@ void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose)
|
|||||||
|
|
||||||
gk20a_channel_abort(ch);
|
gk20a_channel_abort(ch);
|
||||||
|
|
||||||
if (gk20a_fifo_set_ctx_mmu_error(g, ch))
|
if (gk20a_fifo_set_ctx_mmu_error_ch(g, ch))
|
||||||
gk20a_debug_dump(g->dev);
|
gk20a_debug_dump(g->dev);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1206,13 +1242,12 @@ void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose)
|
|||||||
struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid];
|
struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid];
|
||||||
struct channel_gk20a *ch;
|
struct channel_gk20a *ch;
|
||||||
|
|
||||||
mutex_lock(&tsg->ch_list_lock);
|
if (gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg))
|
||||||
list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
|
|
||||||
if (gk20a_fifo_set_ctx_mmu_error(g, ch))
|
|
||||||
gk20a_debug_dump(g->dev);
|
gk20a_debug_dump(g->dev);
|
||||||
|
|
||||||
|
mutex_lock(&tsg->ch_list_lock);
|
||||||
|
list_for_each_entry(ch, &tsg->ch_list, ch_entry)
|
||||||
gk20a_channel_abort(ch);
|
gk20a_channel_abort(ch);
|
||||||
}
|
|
||||||
mutex_unlock(&tsg->ch_list_lock);
|
mutex_unlock(&tsg->ch_list_lock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user