gpu: nvgpu: add gops.fifo.set_error_notifier

RM Server overrides it for handling stall interrupts.

Jira VQRM-3058

Change-Id: I8b14f073e952d19c808cb693958626b8d8aee8ca
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1679709
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Richard Zhao
2018-03-19 17:03:46 -07:00
committed by mobile promotions
parent d436ad67b6
commit 8d8ff9d34e
12 changed files with 39 additions and 21 deletions

View File

@@ -1393,7 +1393,7 @@ void gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
{
nvgpu_err(g,
"channel %d generated a mmu fault", refch->chid);
nvgpu_set_error_notifier(refch,
g->ops.fifo.set_error_notifier(refch,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);
}
@@ -1938,7 +1938,8 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
nvgpu_list_for_each_entry(ch_tsg, &tsg->ch_list,
channel_gk20a, ch_entry) {
if (gk20a_channel_get(ch_tsg)) {
nvgpu_set_error_notifier(ch_tsg, err_code);
g->ops.fifo.set_error_notifier(ch_tsg,
err_code);
gk20a_channel_put(ch_tsg);
}
}
@@ -1946,7 +1947,7 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
gk20a_fifo_recover_tsg(g, ch->tsgid, verbose);
} else {
nvgpu_set_error_notifier(ch, err_code);
g->ops.fifo.set_error_notifier(ch, err_code);
gk20a_fifo_recover_ch(g, ch->chid, verbose);
}
@@ -2108,7 +2109,7 @@ bool gk20a_fifo_check_ch_ctxsw_timeout(struct channel_gk20a *ch,
*verbose = ch->timeout_debug_dump;
*ms = ch->timeout_accumulated_ms;
if (recover)
nvgpu_set_error_notifier(ch,
ch->g->ops.fifo.set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
gk20a_channel_put(ch);
@@ -2172,7 +2173,7 @@ bool gk20a_fifo_check_tsg_ctxsw_timeout(struct tsg_gk20a *tsg,
nvgpu_list_for_each_entry(ch, &tsg->ch_list,
channel_gk20a, ch_entry) {
if (gk20a_channel_get(ch)) {
nvgpu_set_error_notifier(ch,
ch->g->ops.fifo.set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
*verbose |= ch->timeout_debug_dump;
gk20a_channel_put(ch);
@@ -2487,7 +2488,7 @@ static void gk20a_fifo_pbdma_fault_rc(struct gk20a *g,
struct channel_gk20a *ch = &f->channel[id];
if (gk20a_channel_get(ch)) {
nvgpu_set_error_notifier(ch, error_notifier);
g->ops.fifo.set_error_notifier(ch, error_notifier);
gk20a_fifo_recover_ch(g, id, true);
gk20a_channel_put(ch);
}
@@ -2500,7 +2501,7 @@ static void gk20a_fifo_pbdma_fault_rc(struct gk20a *g,
nvgpu_list_for_each_entry(ch, &tsg->ch_list,
channel_gk20a, ch_entry) {
if (gk20a_channel_get(ch)) {
nvgpu_set_error_notifier(ch,
g->ops.fifo.set_error_notifier(ch,
error_notifier);
gk20a_channel_put(ch);
}
@@ -2662,7 +2663,7 @@ void __locked_fifo_preempt_timeout_rc(struct gk20a *g, u32 id,
channel_gk20a, ch_entry) {
if (!gk20a_channel_get(ch))
continue;
nvgpu_set_error_notifier(ch,
g->ops.fifo.set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
gk20a_channel_put(ch);
}
@@ -2675,7 +2676,7 @@ void __locked_fifo_preempt_timeout_rc(struct gk20a *g, u32 id,
"preempt channel %d timeout", id);
if (gk20a_channel_get(ch)) {
nvgpu_set_error_notifier(ch,
g->ops.fifo.set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
gk20a_fifo_recover_ch(g, id, true);
gk20a_channel_put(ch);

View File

@@ -635,6 +635,7 @@ struct gpu_ops {
bool *verbose, u32 *ms);
int (*channel_suspend)(struct gk20a *g);
int (*channel_resume)(struct gk20a *g);
void (*set_error_notifier)(struct channel_gk20a *ch, u32 error);
#ifdef CONFIG_TEGRA_GK20A_NVHOST
int (*alloc_syncpt_buf)(struct channel_gk20a *c,
u32 syncpt_id, struct nvgpu_mem *syncpt_buf);

View File

@@ -5154,14 +5154,14 @@ void gk20a_gr_set_error_notifier(struct gk20a *g,
nvgpu_list_for_each_entry(ch_tsg, &tsg->ch_list,
channel_gk20a, ch_entry) {
if (gk20a_channel_get(ch_tsg)) {
nvgpu_set_error_notifier(ch_tsg,
g->ops.fifo.set_error_notifier(ch_tsg,
error_notifier);
gk20a_channel_put(ch_tsg);
}
}
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
} else {
nvgpu_set_error_notifier(ch, error_notifier);
g->ops.fifo.set_error_notifier(ch, error_notifier);
}
}
}

View File

@@ -60,6 +60,7 @@
#include <nvgpu/bug.h>
#include <nvgpu/enabled.h>
#include <nvgpu/bus.h>
#include <nvgpu/error_notifier.h>
#include <nvgpu/hw/gm20b/hw_proj_gm20b.h>
#include <nvgpu/hw/gm20b/hw_fuse_gm20b.h>
@@ -443,6 +444,7 @@ static const struct gpu_ops gm20b_ops = {
.check_ch_ctxsw_timeout = gk20a_fifo_check_ch_ctxsw_timeout,
.channel_suspend = gk20a_channel_suspend,
.channel_resume = gk20a_channel_resume,
.set_error_notifier = nvgpu_set_error_notifier,
#ifdef CONFIG_TEGRA_GK20A_NVHOST
.alloc_syncpt_buf = gk20a_fifo_alloc_syncpt_buf,
.free_syncpt_buf = gk20a_fifo_free_syncpt_buf,

View File

@@ -89,6 +89,7 @@
#include <nvgpu/bus.h>
#include <nvgpu/enabled.h>
#include <nvgpu/ctxsw_trace.h>
#include <nvgpu/error_notifier.h>
#include <nvgpu/hw/gp106/hw_proj_gp106.h>
#include <nvgpu/hw/gp106/hw_fifo_gp106.h>
@@ -504,6 +505,7 @@ static const struct gpu_ops gp106_ops = {
.check_ch_ctxsw_timeout = gk20a_fifo_check_ch_ctxsw_timeout,
.channel_suspend = gk20a_channel_suspend,
.channel_resume = gk20a_channel_resume,
.set_error_notifier = nvgpu_set_error_notifier,
#ifdef CONFIG_TEGRA_GK20A_NVHOST
.alloc_syncpt_buf = gk20a_fifo_alloc_syncpt_buf,
.free_syncpt_buf = gk20a_fifo_free_syncpt_buf,

View File

@@ -71,6 +71,7 @@
#include <nvgpu/enabled.h>
#include <nvgpu/bus.h>
#include <nvgpu/ctxsw_trace.h>
#include <nvgpu/error_notifier.h>
#include <nvgpu/hw/gp10b/hw_proj_gp10b.h>
#include <nvgpu/hw/gp10b/hw_fuse_gp10b.h>
@@ -475,6 +476,7 @@ static const struct gpu_ops gp10b_ops = {
.check_ch_ctxsw_timeout = gk20a_fifo_check_ch_ctxsw_timeout,
.channel_suspend = gk20a_channel_suspend,
.channel_resume = gk20a_channel_resume,
.set_error_notifier = nvgpu_set_error_notifier,
#ifdef CONFIG_TEGRA_GK20A_NVHOST
.alloc_syncpt_buf = gk20a_fifo_alloc_syncpt_buf,
.free_syncpt_buf = gk20a_fifo_free_syncpt_buf,

View File

@@ -109,6 +109,7 @@
#include <nvgpu/debug.h>
#include <nvgpu/enabled.h>
#include <nvgpu/ctxsw_trace.h>
#include <nvgpu/error_notifier.h>
#include <nvgpu/hw/gv100/hw_proj_gv100.h>
#include <nvgpu/hw/gv100/hw_fifo_gv100.h>
@@ -516,6 +517,7 @@ static const struct gpu_ops gv100_ops = {
.check_ch_ctxsw_timeout = gk20a_fifo_check_ch_ctxsw_timeout,
.channel_suspend = gk20a_channel_suspend,
.channel_resume = gk20a_channel_resume,
.set_error_notifier = nvgpu_set_error_notifier,
#ifdef CONFIG_TEGRA_GK20A_NVHOST
.alloc_syncpt_buf = gv11b_fifo_alloc_syncpt_buf,
.free_syncpt_buf = gv11b_fifo_free_syncpt_buf,

View File

@@ -85,6 +85,7 @@
#include <nvgpu/debug.h>
#include <nvgpu/enabled.h>
#include <nvgpu/ctxsw_trace.h>
#include <nvgpu/error_notifier.h>
#include <nvgpu/hw/gv11b/hw_proj_gv11b.h>
#include <nvgpu/hw/gv11b/hw_fifo_gv11b.h>
@@ -533,6 +534,7 @@ static const struct gpu_ops gv11b_ops = {
.check_ch_ctxsw_timeout = gk20a_fifo_check_ch_ctxsw_timeout,
.channel_suspend = gk20a_channel_suspend,
.channel_resume = gk20a_channel_resume,
.set_error_notifier = nvgpu_set_error_notifier,
#ifdef CONFIG_TEGRA_GK20A_NVHOST
.alloc_syncpt_buf = gv11b_fifo_alloc_syncpt_buf,
.free_syncpt_buf = gv11b_fifo_free_syncpt_buf,

View File

@@ -643,7 +643,8 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
nvgpu_list_for_each_entry(ch_tsg, &tsg->ch_list,
channel_gk20a, ch_entry) {
if (gk20a_channel_get(ch_tsg)) {
nvgpu_set_error_notifier(ch_tsg, err_code);
g->ops.fifo.set_error_notifier(ch_tsg,
err_code);
ch_tsg->has_timedout = true;
gk20a_channel_put(ch_tsg);
}
@@ -651,7 +652,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
} else {
nvgpu_set_error_notifier(ch, err_code);
g->ops.fifo.set_error_notifier(ch, err_code);
ch->has_timedout = true;
}
@@ -726,10 +727,11 @@ int vgpu_fifo_isr(struct gk20a *g, struct tegra_vgpu_fifo_intr_info *info)
switch (info->type) {
case TEGRA_VGPU_FIFO_INTR_PBDMA:
nvgpu_set_error_notifier(ch, NVGPU_ERR_NOTIFIER_PBDMA_ERROR);
g->ops.fifo.set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_PBDMA_ERROR);
break;
case TEGRA_VGPU_FIFO_INTR_CTXSW_TIMEOUT:
nvgpu_set_error_notifier(ch,
g->ops.fifo.set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
break;
case TEGRA_VGPU_FIFO_INTR_MMU_FAULT:

View File

@@ -65,6 +65,7 @@
#include <nvgpu/enabled.h>
#include <nvgpu/vgpu/vgpu.h>
#include <nvgpu/error_notifier.h>
#include <nvgpu/hw/gp10b/hw_fuse_gp10b.h>
#include <nvgpu/hw/gp10b/hw_fifo_gp10b.h>
@@ -349,6 +350,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.check_ch_ctxsw_timeout = gk20a_fifo_check_ch_ctxsw_timeout,
.channel_suspend = gk20a_channel_suspend,
.channel_resume = gk20a_channel_resume,
.set_error_notifier = nvgpu_set_error_notifier,
#ifdef CONFIG_TEGRA_GK20A_NVHOST
.alloc_syncpt_buf = gk20a_fifo_alloc_syncpt_buf,
.free_syncpt_buf = gk20a_fifo_free_syncpt_buf,

View File

@@ -926,30 +926,30 @@ int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info)
nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq);
break;
case TEGRA_VGPU_GR_INTR_SEMAPHORE_TIMEOUT:
nvgpu_set_error_notifier(ch,
g->ops.fifo.set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT);
break;
case TEGRA_VGPU_GR_INTR_ILLEGAL_NOTIFY:
nvgpu_set_error_notifier(ch,
g->ops.fifo.set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY);
case TEGRA_VGPU_GR_INTR_ILLEGAL_METHOD:
break;
case TEGRA_VGPU_GR_INTR_ILLEGAL_CLASS:
nvgpu_set_error_notifier(ch,
g->ops.fifo.set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
break;
case TEGRA_VGPU_GR_INTR_FECS_ERROR:
break;
case TEGRA_VGPU_GR_INTR_CLASS_ERROR:
nvgpu_set_error_notifier(ch,
g->ops.fifo.set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
break;
case TEGRA_VGPU_GR_INTR_FIRMWARE_METHOD:
nvgpu_set_error_notifier(ch,
g->ops.fifo.set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
break;
case TEGRA_VGPU_GR_INTR_EXCEPTION:
nvgpu_set_error_notifier(ch,
g->ops.fifo.set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
break;
case TEGRA_VGPU_GR_INTR_SM_EXCEPTION:

View File

@@ -23,6 +23,7 @@
#include <gk20a/gk20a.h>
#include <gv11b/hal_gv11b.h>
#include <nvgpu/vgpu/vgpu.h>
#include <nvgpu/error_notifier.h>
#include "vgpu/fifo_vgpu.h"
#include "vgpu/gr_vgpu.h"
@@ -392,6 +393,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.check_ch_ctxsw_timeout = gk20a_fifo_check_ch_ctxsw_timeout,
.channel_suspend = gk20a_channel_suspend,
.channel_resume = gk20a_channel_resume,
.set_error_notifier = nvgpu_set_error_notifier,
#ifdef CONFIG_TEGRA_GK20A_NVHOST
.alloc_syncpt_buf = vgpu_gv11b_fifo_alloc_syncpt_buf,
.free_syncpt_buf = gv11b_fifo_free_syncpt_buf,