mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: define error_notifiers in common code
All the linux specific error_notifier codes are defined in linux specific header file <uapi/linux/nvgpu.h> and used in all the common driver But since they are defined in linux specific file, we need to move all the uses of those error_notifiers in linux specific code only Hence define new error_notifiers in include/nvgpu/error_notifier.h and use them in the common code Add new API nvgpu_error_notifier_to_channel_notifier() to convert common error_notifier of the form NVGPU_ERR_NOTIFIER_* to linux specific error notifier of the form NVGPU_CHANNEL_* Any future additions to error notifiers requires update to both the form of error notifiers Move all error notifier related metadata from channel_gk20a (common code) to linux specific structure nvgpu_channel_linux Update all accesses to this data from new structure instead of channel_gk20a Move and rename below APIs to linux specific file and declare them in error_notifier.h nvgpu_set_error_notifier_locked() nvgpu_set_error_notifier() nvgpu_is_error_notifier_set() Add below new API and use it in fifo_vgpu.c nvgpu_set_error_notifier_if_empty() Include <nvgpu/error_notifier.h> wherever new error_notifier codes are used NVGPU-426 Change-Id: Iaa5bfc150e6e9ec17d797d445c2d6407afe9f4bd Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1593361 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
a0cea295e7
commit
c6b9177cff
@@ -17,6 +17,7 @@
|
|||||||
#include <nvgpu/enabled.h>
|
#include <nvgpu/enabled.h>
|
||||||
#include <nvgpu/debug.h>
|
#include <nvgpu/debug.h>
|
||||||
#include <nvgpu/ltc.h>
|
#include <nvgpu/ltc.h>
|
||||||
|
#include <nvgpu/error_notifier.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is required for nvgpu_vm_find_buf() which is used in the tracing
|
* This is required for nvgpu_vm_find_buf() which is used in the tracing
|
||||||
@@ -37,6 +38,124 @@
|
|||||||
#include <trace/events/gk20a.h>
|
#include <trace/events/gk20a.h>
|
||||||
#include <uapi/linux/nvgpu.h>
|
#include <uapi/linux/nvgpu.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* API to convert error_notifiers in common code and of the form
|
||||||
|
* NVGPU_ERR_NOTIFIER_* into Linux specific error_notifiers exposed to user
|
||||||
|
* space and of the form NVGPU_CHANNEL_*
|
||||||
|
*/
|
||||||
|
static u32 nvgpu_error_notifier_to_channel_notifier(u32 error_notifier)
|
||||||
|
{
|
||||||
|
switch (error_notifier) {
|
||||||
|
case NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT:
|
||||||
|
return NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT;
|
||||||
|
case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD:
|
||||||
|
return NVGPU_CHANNEL_GR_ERROR_SW_METHOD;
|
||||||
|
case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY:
|
||||||
|
return NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY;
|
||||||
|
case NVGPU_ERR_NOTIFIER_GR_EXCEPTION:
|
||||||
|
return NVGPU_CHANNEL_GR_EXCEPTION;
|
||||||
|
case NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT:
|
||||||
|
return NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT;
|
||||||
|
case NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY:
|
||||||
|
return NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY;
|
||||||
|
case NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT:
|
||||||
|
return NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT;
|
||||||
|
case NVGPU_ERR_NOTIFIER_PBDMA_ERROR:
|
||||||
|
return NVGPU_CHANNEL_PBDMA_ERROR;
|
||||||
|
case NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD:
|
||||||
|
return NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD;
|
||||||
|
case NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR:
|
||||||
|
return NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR;
|
||||||
|
case NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH:
|
||||||
|
return NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH;
|
||||||
|
}
|
||||||
|
|
||||||
|
pr_warn("%s: invalid error_notifier requested %u\n", __func__, error_notifier);
|
||||||
|
|
||||||
|
return error_notifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* nvgpu_set_error_notifier_locked()
|
||||||
|
* Should be called with ch->error_notifier_mutex held
|
||||||
|
*
|
||||||
|
* error should be of the form NVGPU_ERR_NOTIFIER_*
|
||||||
|
*/
|
||||||
|
void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error)
|
||||||
|
{
|
||||||
|
struct nvgpu_channel_linux *priv = ch->os_priv;
|
||||||
|
|
||||||
|
error = nvgpu_error_notifier_to_channel_notifier(error);
|
||||||
|
|
||||||
|
if (priv->error_notifier.dmabuf) {
|
||||||
|
struct nvgpu_notification *notification =
|
||||||
|
priv->error_notifier.notification;
|
||||||
|
struct timespec time_data;
|
||||||
|
u64 nsec;
|
||||||
|
|
||||||
|
getnstimeofday(&time_data);
|
||||||
|
nsec = ((u64)time_data.tv_sec) * 1000000000u +
|
||||||
|
(u64)time_data.tv_nsec;
|
||||||
|
notification->time_stamp.nanoseconds[0] =
|
||||||
|
(u32)nsec;
|
||||||
|
notification->time_stamp.nanoseconds[1] =
|
||||||
|
(u32)(nsec >> 32);
|
||||||
|
notification->info32 = error;
|
||||||
|
notification->status = 0xffff;
|
||||||
|
|
||||||
|
nvgpu_err(ch->g,
|
||||||
|
"error notifier set to %d for ch %d", error, ch->chid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* error should be of the form NVGPU_ERR_NOTIFIER_* */
|
||||||
|
void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error)
|
||||||
|
{
|
||||||
|
struct nvgpu_channel_linux *priv = ch->os_priv;
|
||||||
|
|
||||||
|
nvgpu_mutex_acquire(&priv->error_notifier.mutex);
|
||||||
|
nvgpu_set_error_notifier_locked(ch, error);
|
||||||
|
nvgpu_mutex_release(&priv->error_notifier.mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error)
|
||||||
|
{
|
||||||
|
struct nvgpu_channel_linux *priv = ch->os_priv;
|
||||||
|
|
||||||
|
nvgpu_mutex_acquire(&priv->error_notifier.mutex);
|
||||||
|
if (priv->error_notifier.dmabuf) {
|
||||||
|
struct nvgpu_notification *notification =
|
||||||
|
priv->error_notifier.notification;
|
||||||
|
|
||||||
|
/* Don't overwrite error flag if it is already set */
|
||||||
|
if (notification->status != 0xffff)
|
||||||
|
nvgpu_set_error_notifier_locked(ch, error);
|
||||||
|
}
|
||||||
|
nvgpu_mutex_release(&priv->error_notifier.mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* error_notifier should be of the form NVGPU_ERR_NOTIFIER_* */
|
||||||
|
bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier)
|
||||||
|
{
|
||||||
|
struct nvgpu_channel_linux *priv = ch->os_priv;
|
||||||
|
bool notifier_set = false;
|
||||||
|
|
||||||
|
error_notifier = nvgpu_error_notifier_to_channel_notifier(error_notifier);
|
||||||
|
|
||||||
|
nvgpu_mutex_acquire(&priv->error_notifier.mutex);
|
||||||
|
if (priv->error_notifier.dmabuf) {
|
||||||
|
struct nvgpu_notification *notification =
|
||||||
|
priv->error_notifier.notification;
|
||||||
|
u32 err = notification->info32;
|
||||||
|
|
||||||
|
if (err == error_notifier)
|
||||||
|
notifier_set = true;
|
||||||
|
}
|
||||||
|
nvgpu_mutex_release(&priv->error_notifier.mutex);
|
||||||
|
|
||||||
|
return notifier_set;
|
||||||
|
}
|
||||||
|
|
||||||
static void gk20a_channel_update_runcb_fn(struct work_struct *work)
|
static void gk20a_channel_update_runcb_fn(struct work_struct *work)
|
||||||
{
|
{
|
||||||
struct nvgpu_channel_completion_cb *completion_cb =
|
struct nvgpu_channel_completion_cb *completion_cb =
|
||||||
@@ -128,6 +247,7 @@ static void nvgpu_channel_close_linux(struct channel_gk20a *ch)
|
|||||||
static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
|
static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
|
||||||
{
|
{
|
||||||
struct nvgpu_channel_linux *priv;
|
struct nvgpu_channel_linux *priv;
|
||||||
|
int err;
|
||||||
|
|
||||||
priv = nvgpu_kzalloc(g, sizeof(*priv));
|
priv = nvgpu_kzalloc(g, sizeof(*priv));
|
||||||
if (!priv)
|
if (!priv)
|
||||||
@@ -136,6 +256,12 @@ static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
|
|||||||
ch->os_priv = priv;
|
ch->os_priv = priv;
|
||||||
priv->ch = ch;
|
priv->ch = ch;
|
||||||
|
|
||||||
|
err = nvgpu_mutex_init(&priv->error_notifier.mutex);
|
||||||
|
if (err) {
|
||||||
|
nvgpu_kfree(g, priv);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
nvgpu_channel_work_completion_init(ch);
|
nvgpu_channel_work_completion_init(ch);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@@ -143,7 +269,10 @@ static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
|
|||||||
|
|
||||||
static void nvgpu_channel_free_linux(struct gk20a *g, struct channel_gk20a *ch)
|
static void nvgpu_channel_free_linux(struct gk20a *g, struct channel_gk20a *ch)
|
||||||
{
|
{
|
||||||
nvgpu_kfree(g, ch->os_priv);
|
struct nvgpu_channel_linux *priv = ch->os_priv;
|
||||||
|
|
||||||
|
nvgpu_mutex_destroy(&priv->error_notifier.mutex);
|
||||||
|
nvgpu_kfree(g, priv);
|
||||||
}
|
}
|
||||||
|
|
||||||
int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l)
|
int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l)
|
||||||
|
|||||||
@@ -42,10 +42,20 @@ struct nvgpu_channel_completion_cb {
|
|||||||
struct work_struct work;
|
struct work_struct work;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct nvgpu_error_notifier {
|
||||||
|
struct dma_buf *dmabuf;
|
||||||
|
void *vaddr;
|
||||||
|
|
||||||
|
struct nvgpu_notification *notification;
|
||||||
|
|
||||||
|
struct nvgpu_mutex mutex;
|
||||||
|
};
|
||||||
|
|
||||||
struct nvgpu_channel_linux {
|
struct nvgpu_channel_linux {
|
||||||
struct channel_gk20a *ch;
|
struct channel_gk20a *ch;
|
||||||
|
|
||||||
struct nvgpu_channel_completion_cb completion_cb;
|
struct nvgpu_channel_completion_cb completion_cb;
|
||||||
|
struct nvgpu_error_notifier error_notifier;
|
||||||
};
|
};
|
||||||
|
|
||||||
int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l);
|
int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l);
|
||||||
|
|||||||
@@ -30,6 +30,7 @@
|
|||||||
#include <nvgpu/list.h>
|
#include <nvgpu/list.h>
|
||||||
#include <nvgpu/debug.h>
|
#include <nvgpu/debug.h>
|
||||||
#include <nvgpu/enabled.h>
|
#include <nvgpu/enabled.h>
|
||||||
|
#include <nvgpu/error_notifier.h>
|
||||||
|
|
||||||
#include "gk20a/gk20a.h"
|
#include "gk20a/gk20a.h"
|
||||||
#include "gk20a/dbg_gpu_gk20a.h"
|
#include "gk20a/dbg_gpu_gk20a.h"
|
||||||
@@ -227,15 +228,17 @@ static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch,
|
|||||||
|
|
||||||
static void gk20a_channel_free_error_notifiers(struct channel_gk20a *ch)
|
static void gk20a_channel_free_error_notifiers(struct channel_gk20a *ch)
|
||||||
{
|
{
|
||||||
nvgpu_mutex_acquire(&ch->error_notifier_mutex);
|
struct nvgpu_channel_linux *priv = ch->os_priv;
|
||||||
if (ch->error_notifier_ref) {
|
|
||||||
dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
|
nvgpu_mutex_acquire(&priv->error_notifier.mutex);
|
||||||
dma_buf_put(ch->error_notifier_ref);
|
if (priv->error_notifier.dmabuf) {
|
||||||
ch->error_notifier_ref = NULL;
|
dma_buf_vunmap(priv->error_notifier.dmabuf, priv->error_notifier.vaddr);
|
||||||
ch->error_notifier = NULL;
|
dma_buf_put(priv->error_notifier.dmabuf);
|
||||||
ch->error_notifier_va = NULL;
|
priv->error_notifier.dmabuf = NULL;
|
||||||
|
priv->error_notifier.notification = NULL;
|
||||||
|
priv->error_notifier.vaddr = NULL;
|
||||||
}
|
}
|
||||||
nvgpu_mutex_release(&ch->error_notifier_mutex);
|
nvgpu_mutex_release(&priv->error_notifier.mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int gk20a_init_error_notifier(struct channel_gk20a *ch,
|
static int gk20a_init_error_notifier(struct channel_gk20a *ch,
|
||||||
@@ -244,6 +247,7 @@ static int gk20a_init_error_notifier(struct channel_gk20a *ch,
|
|||||||
struct dma_buf *dmabuf;
|
struct dma_buf *dmabuf;
|
||||||
void *va;
|
void *va;
|
||||||
u64 end = args->offset + sizeof(struct nvgpu_notification);
|
u64 end = args->offset + sizeof(struct nvgpu_notification);
|
||||||
|
struct nvgpu_channel_linux *priv = ch->os_priv;
|
||||||
|
|
||||||
if (!args->mem) {
|
if (!args->mem) {
|
||||||
pr_err("gk20a_init_error_notifier: invalid memory handle\n");
|
pr_err("gk20a_init_error_notifier: invalid memory handle\n");
|
||||||
@@ -273,14 +277,15 @@ static int gk20a_init_error_notifier(struct channel_gk20a *ch,
|
|||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
ch->error_notifier = va + args->offset;
|
priv->error_notifier.notification = va + args->offset;
|
||||||
ch->error_notifier_va = va;
|
priv->error_notifier.vaddr = va;
|
||||||
memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification));
|
memset(priv->error_notifier.notification, 0,
|
||||||
|
sizeof(struct nvgpu_notification));
|
||||||
|
|
||||||
/* set channel notifiers pointer */
|
/* set channel notifiers pointer */
|
||||||
nvgpu_mutex_acquire(&ch->error_notifier_mutex);
|
nvgpu_mutex_acquire(&priv->error_notifier.mutex);
|
||||||
ch->error_notifier_ref = dmabuf;
|
priv->error_notifier.dmabuf = dmabuf;
|
||||||
nvgpu_mutex_release(&ch->error_notifier_mutex);
|
nvgpu_mutex_release(&priv->error_notifier.mutex);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -1361,7 +1366,7 @@ long gk20a_channel_ioctl(struct file *filp,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
err = ch->g->ops.fifo.force_reset_ch(ch,
|
err = ch->g->ops.fifo.force_reset_ch(ch,
|
||||||
NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR, true);
|
NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR, true);
|
||||||
gk20a_idle(ch->g);
|
gk20a_idle(ch->g);
|
||||||
break;
|
break;
|
||||||
case NVGPU_IOCTL_CHANNEL_EVENT_ID_CTRL:
|
case NVGPU_IOCTL_CHANNEL_EVENT_ID_CTRL:
|
||||||
|
|||||||
@@ -25,10 +25,13 @@
|
|||||||
#include <nvgpu/atomic.h>
|
#include <nvgpu/atomic.h>
|
||||||
#include <nvgpu/bug.h>
|
#include <nvgpu/bug.h>
|
||||||
#include <nvgpu/barrier.h>
|
#include <nvgpu/barrier.h>
|
||||||
|
#include <nvgpu/error_notifier.h>
|
||||||
|
|
||||||
#include "vgpu.h"
|
#include "vgpu.h"
|
||||||
#include "fifo_vgpu.h"
|
#include "fifo_vgpu.h"
|
||||||
|
|
||||||
|
#include "common/linux/channel.h"
|
||||||
|
|
||||||
#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
|
#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
|
||||||
#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
|
#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
|
||||||
|
|
||||||
@@ -691,7 +694,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
|
|||||||
|
|
||||||
list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) {
|
list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) {
|
||||||
if (gk20a_channel_get(ch_tsg)) {
|
if (gk20a_channel_get(ch_tsg)) {
|
||||||
gk20a_set_error_notifier(ch_tsg, err_code);
|
nvgpu_set_error_notifier(ch_tsg, err_code);
|
||||||
ch_tsg->has_timedout = true;
|
ch_tsg->has_timedout = true;
|
||||||
gk20a_channel_put(ch_tsg);
|
gk20a_channel_put(ch_tsg);
|
||||||
}
|
}
|
||||||
@@ -699,7 +702,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
|
|||||||
|
|
||||||
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
|
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
|
||||||
} else {
|
} else {
|
||||||
gk20a_set_error_notifier(ch, err_code);
|
nvgpu_set_error_notifier(ch, err_code);
|
||||||
ch->has_timedout = true;
|
ch->has_timedout = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -716,19 +719,14 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
|
|||||||
static void vgpu_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
|
static void vgpu_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
|
||||||
struct channel_gk20a *ch)
|
struct channel_gk20a *ch)
|
||||||
{
|
{
|
||||||
nvgpu_mutex_acquire(&ch->error_notifier_mutex);
|
/*
|
||||||
if (ch->error_notifier_ref) {
|
* If error code is already set, this mmu fault
|
||||||
if (ch->error_notifier->status == 0xffff) {
|
* was triggered as part of recovery from other
|
||||||
/* If error code is already set, this mmu fault
|
* error condition.
|
||||||
* was triggered as part of recovery from other
|
* Don't overwrite error flag.
|
||||||
* error condition.
|
*/
|
||||||
* Don't overwrite error flag. */
|
nvgpu_set_error_notifier_if_empty(ch,
|
||||||
} else {
|
NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);
|
||||||
gk20a_set_error_notifier_locked(ch,
|
|
||||||
NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
nvgpu_mutex_release(&ch->error_notifier_mutex);
|
|
||||||
|
|
||||||
/* mark channel as faulted */
|
/* mark channel as faulted */
|
||||||
ch->has_timedout = true;
|
ch->has_timedout = true;
|
||||||
@@ -778,11 +776,11 @@ int vgpu_fifo_isr(struct gk20a *g, struct tegra_vgpu_fifo_intr_info *info)
|
|||||||
|
|
||||||
switch (info->type) {
|
switch (info->type) {
|
||||||
case TEGRA_VGPU_FIFO_INTR_PBDMA:
|
case TEGRA_VGPU_FIFO_INTR_PBDMA:
|
||||||
gk20a_set_error_notifier(ch, NVGPU_CHANNEL_PBDMA_ERROR);
|
nvgpu_set_error_notifier(ch, NVGPU_ERR_NOTIFIER_PBDMA_ERROR);
|
||||||
break;
|
break;
|
||||||
case TEGRA_VGPU_FIFO_INTR_CTXSW_TIMEOUT:
|
case TEGRA_VGPU_FIFO_INTR_CTXSW_TIMEOUT:
|
||||||
gk20a_set_error_notifier(ch,
|
nvgpu_set_error_notifier(ch,
|
||||||
NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
|
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
|
||||||
break;
|
break;
|
||||||
case TEGRA_VGPU_FIFO_INTR_MMU_FAULT:
|
case TEGRA_VGPU_FIFO_INTR_MMU_FAULT:
|
||||||
vgpu_fifo_set_ctx_mmu_error_ch_tsg(g, ch);
|
vgpu_fifo_set_ctx_mmu_error_ch_tsg(g, ch);
|
||||||
|
|||||||
@@ -20,6 +20,7 @@
|
|||||||
|
|
||||||
#include <nvgpu/kmem.h>
|
#include <nvgpu/kmem.h>
|
||||||
#include <nvgpu/bug.h>
|
#include <nvgpu/bug.h>
|
||||||
|
#include <nvgpu/error_notifier.h>
|
||||||
|
|
||||||
#include "vgpu.h"
|
#include "vgpu.h"
|
||||||
#include "gr_vgpu.h"
|
#include "gr_vgpu.h"
|
||||||
@@ -941,31 +942,31 @@ int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info)
|
|||||||
nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq);
|
nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq);
|
||||||
break;
|
break;
|
||||||
case TEGRA_VGPU_GR_INTR_SEMAPHORE_TIMEOUT:
|
case TEGRA_VGPU_GR_INTR_SEMAPHORE_TIMEOUT:
|
||||||
gk20a_set_error_notifier(ch,
|
nvgpu_set_error_notifier(ch,
|
||||||
NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT);
|
NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT);
|
||||||
break;
|
break;
|
||||||
case TEGRA_VGPU_GR_INTR_ILLEGAL_NOTIFY:
|
case TEGRA_VGPU_GR_INTR_ILLEGAL_NOTIFY:
|
||||||
gk20a_set_error_notifier(ch,
|
nvgpu_set_error_notifier(ch,
|
||||||
NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY);
|
NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY);
|
||||||
case TEGRA_VGPU_GR_INTR_ILLEGAL_METHOD:
|
case TEGRA_VGPU_GR_INTR_ILLEGAL_METHOD:
|
||||||
break;
|
break;
|
||||||
case TEGRA_VGPU_GR_INTR_ILLEGAL_CLASS:
|
case TEGRA_VGPU_GR_INTR_ILLEGAL_CLASS:
|
||||||
gk20a_set_error_notifier(ch,
|
nvgpu_set_error_notifier(ch,
|
||||||
NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
|
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
|
||||||
break;
|
break;
|
||||||
case TEGRA_VGPU_GR_INTR_FECS_ERROR:
|
case TEGRA_VGPU_GR_INTR_FECS_ERROR:
|
||||||
break;
|
break;
|
||||||
case TEGRA_VGPU_GR_INTR_CLASS_ERROR:
|
case TEGRA_VGPU_GR_INTR_CLASS_ERROR:
|
||||||
gk20a_set_error_notifier(ch,
|
nvgpu_set_error_notifier(ch,
|
||||||
NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
|
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
|
||||||
break;
|
break;
|
||||||
case TEGRA_VGPU_GR_INTR_FIRMWARE_METHOD:
|
case TEGRA_VGPU_GR_INTR_FIRMWARE_METHOD:
|
||||||
gk20a_set_error_notifier(ch,
|
nvgpu_set_error_notifier(ch,
|
||||||
NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
|
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
|
||||||
break;
|
break;
|
||||||
case TEGRA_VGPU_GR_INTR_EXCEPTION:
|
case TEGRA_VGPU_GR_INTR_EXCEPTION:
|
||||||
gk20a_set_error_notifier(ch,
|
nvgpu_set_error_notifier(ch,
|
||||||
NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
|
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
|
||||||
break;
|
break;
|
||||||
case TEGRA_VGPU_GR_INTR_SM_EXCEPTION:
|
case TEGRA_VGPU_GR_INTR_SM_EXCEPTION:
|
||||||
gk20a_dbg_gpu_post_events(ch);
|
gk20a_dbg_gpu_post_events(ch);
|
||||||
|
|||||||
@@ -44,6 +44,7 @@
|
|||||||
#include <nvgpu/ltc.h>
|
#include <nvgpu/ltc.h>
|
||||||
#include <nvgpu/barrier.h>
|
#include <nvgpu/barrier.h>
|
||||||
#include <nvgpu/ctxsw_trace.h>
|
#include <nvgpu/ctxsw_trace.h>
|
||||||
|
#include <nvgpu/error_notifier.h>
|
||||||
|
|
||||||
#include "gk20a.h"
|
#include "gk20a.h"
|
||||||
#include "dbg_gpu_gk20a.h"
|
#include "dbg_gpu_gk20a.h"
|
||||||
@@ -339,37 +340,6 @@ int gk20a_channel_set_runlist_interleave(struct channel_gk20a *ch,
|
|||||||
return ret ? ret : g->ops.fifo.update_runlist(g, ch->runlist_id, ~0, true, true);
|
return ret ? ret : g->ops.fifo.update_runlist(g, ch->runlist_id, ~0, true, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* gk20a_set_error_notifier_locked()
|
|
||||||
* Should be called with ch->error_notifier_mutex held
|
|
||||||
*/
|
|
||||||
void gk20a_set_error_notifier_locked(struct channel_gk20a *ch, __u32 error)
|
|
||||||
{
|
|
||||||
if (ch->error_notifier_ref) {
|
|
||||||
struct timespec time_data;
|
|
||||||
u64 nsec;
|
|
||||||
getnstimeofday(&time_data);
|
|
||||||
nsec = ((u64)time_data.tv_sec) * 1000000000u +
|
|
||||||
(u64)time_data.tv_nsec;
|
|
||||||
ch->error_notifier->time_stamp.nanoseconds[0] =
|
|
||||||
(u32)nsec;
|
|
||||||
ch->error_notifier->time_stamp.nanoseconds[1] =
|
|
||||||
(u32)(nsec >> 32);
|
|
||||||
ch->error_notifier->info32 = error;
|
|
||||||
ch->error_notifier->status = 0xffff;
|
|
||||||
|
|
||||||
nvgpu_err(ch->g,
|
|
||||||
"error notifier set to %d for ch %d", error, ch->chid);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
|
|
||||||
{
|
|
||||||
nvgpu_mutex_acquire(&ch->error_notifier_mutex);
|
|
||||||
gk20a_set_error_notifier_locked(ch, error);
|
|
||||||
nvgpu_mutex_release(&ch->error_notifier_mutex);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void gk20a_wait_until_counter_is_N(
|
static void gk20a_wait_until_counter_is_N(
|
||||||
struct channel_gk20a *ch, nvgpu_atomic_t *counter, int wait_value,
|
struct channel_gk20a *ch, nvgpu_atomic_t *counter, int wait_value,
|
||||||
struct nvgpu_cond *c, const char *caller, const char *counter_name)
|
struct nvgpu_cond *c, const char *caller, const char *counter_name)
|
||||||
@@ -1550,7 +1520,7 @@ static void gk20a_channel_timeout_handler(struct channel_gk20a *ch)
|
|||||||
gk20a_gr_debug_dump(g);
|
gk20a_gr_debug_dump(g);
|
||||||
|
|
||||||
g->ops.fifo.force_reset_ch(ch,
|
g->ops.fifo.force_reset_ch(ch,
|
||||||
NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT, true);
|
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -2210,53 +2180,48 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
|
|||||||
err = nvgpu_mutex_init(&c->ioctl_lock);
|
err = nvgpu_mutex_init(&c->ioctl_lock);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
err = nvgpu_mutex_init(&c->error_notifier_mutex);
|
|
||||||
if (err)
|
|
||||||
goto fail_1;
|
|
||||||
err = nvgpu_mutex_init(&c->joblist.cleanup_lock);
|
err = nvgpu_mutex_init(&c->joblist.cleanup_lock);
|
||||||
if (err)
|
if (err)
|
||||||
goto fail_2;
|
goto fail_1;
|
||||||
err = nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock);
|
err = nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock);
|
||||||
if (err)
|
if (err)
|
||||||
goto fail_3;
|
goto fail_2;
|
||||||
err = nvgpu_mutex_init(&c->sync_lock);
|
err = nvgpu_mutex_init(&c->sync_lock);
|
||||||
if (err)
|
if (err)
|
||||||
goto fail_4;
|
goto fail_3;
|
||||||
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
||||||
err = nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
|
err = nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
|
||||||
if (err)
|
if (err)
|
||||||
goto fail_5;
|
goto fail_4;
|
||||||
err = nvgpu_mutex_init(&c->cs_client_mutex);
|
err = nvgpu_mutex_init(&c->cs_client_mutex);
|
||||||
if (err)
|
if (err)
|
||||||
goto fail_6;
|
goto fail_5;
|
||||||
#endif
|
#endif
|
||||||
err = nvgpu_mutex_init(&c->event_id_list_lock);
|
err = nvgpu_mutex_init(&c->event_id_list_lock);
|
||||||
if (err)
|
if (err)
|
||||||
goto fail_7;
|
goto fail_6;
|
||||||
err = nvgpu_mutex_init(&c->dbg_s_lock);
|
err = nvgpu_mutex_init(&c->dbg_s_lock);
|
||||||
if (err)
|
if (err)
|
||||||
goto fail_8;
|
goto fail_7;
|
||||||
|
|
||||||
nvgpu_list_add(&c->free_chs, &g->fifo.free_chs);
|
nvgpu_list_add(&c->free_chs, &g->fifo.free_chs);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
fail_8:
|
|
||||||
nvgpu_mutex_destroy(&c->event_id_list_lock);
|
|
||||||
fail_7:
|
fail_7:
|
||||||
|
nvgpu_mutex_destroy(&c->event_id_list_lock);
|
||||||
|
fail_6:
|
||||||
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
||||||
nvgpu_mutex_destroy(&c->cs_client_mutex);
|
nvgpu_mutex_destroy(&c->cs_client_mutex);
|
||||||
fail_6:
|
|
||||||
nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex);
|
|
||||||
fail_5:
|
fail_5:
|
||||||
|
nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex);
|
||||||
|
fail_4:
|
||||||
#endif
|
#endif
|
||||||
nvgpu_mutex_destroy(&c->sync_lock);
|
nvgpu_mutex_destroy(&c->sync_lock);
|
||||||
fail_4:
|
|
||||||
nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
|
|
||||||
fail_3:
|
fail_3:
|
||||||
nvgpu_mutex_destroy(&c->joblist.cleanup_lock);
|
nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
|
||||||
fail_2:
|
fail_2:
|
||||||
nvgpu_mutex_destroy(&c->error_notifier_mutex);
|
nvgpu_mutex_destroy(&c->joblist.cleanup_lock);
|
||||||
fail_1:
|
fail_1:
|
||||||
nvgpu_mutex_destroy(&c->ioctl_lock);
|
nvgpu_mutex_destroy(&c->ioctl_lock);
|
||||||
|
|
||||||
|
|||||||
@@ -273,11 +273,6 @@ struct channel_gk20a {
|
|||||||
bool timeout_debug_dump;
|
bool timeout_debug_dump;
|
||||||
unsigned int timeslice_us;
|
unsigned int timeslice_us;
|
||||||
|
|
||||||
struct dma_buf *error_notifier_ref;
|
|
||||||
struct nvgpu_notification *error_notifier;
|
|
||||||
void *error_notifier_va;
|
|
||||||
struct nvgpu_mutex error_notifier_mutex;
|
|
||||||
|
|
||||||
struct nvgpu_mutex sync_lock;
|
struct nvgpu_mutex sync_lock;
|
||||||
struct gk20a_channel_sync *sync;
|
struct gk20a_channel_sync *sync;
|
||||||
|
|
||||||
@@ -335,8 +330,6 @@ bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
|
|||||||
void gk20a_disable_channel(struct channel_gk20a *ch);
|
void gk20a_disable_channel(struct channel_gk20a *ch);
|
||||||
void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt);
|
void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt);
|
||||||
void gk20a_channel_abort_clean_up(struct channel_gk20a *ch);
|
void gk20a_channel_abort_clean_up(struct channel_gk20a *ch);
|
||||||
void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error);
|
|
||||||
void gk20a_set_error_notifier_locked(struct channel_gk20a *ch, __u32 error);
|
|
||||||
void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events);
|
void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events);
|
||||||
int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size,
|
int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size,
|
||||||
struct priv_cmd_entry *entry);
|
struct priv_cmd_entry *entry);
|
||||||
|
|||||||
@@ -39,6 +39,7 @@
|
|||||||
#include <nvgpu/nvhost.h>
|
#include <nvgpu/nvhost.h>
|
||||||
#include <nvgpu/barrier.h>
|
#include <nvgpu/barrier.h>
|
||||||
#include <nvgpu/ctxsw_trace.h>
|
#include <nvgpu/ctxsw_trace.h>
|
||||||
|
#include <nvgpu/error_notifier.h>
|
||||||
|
|
||||||
#include "gk20a.h"
|
#include "gk20a.h"
|
||||||
#include "mm_gk20a.h"
|
#include "mm_gk20a.h"
|
||||||
@@ -557,7 +558,6 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
|
|||||||
nvgpu_mutex_destroy(&tsg->event_id_list_lock);
|
nvgpu_mutex_destroy(&tsg->event_id_list_lock);
|
||||||
|
|
||||||
nvgpu_mutex_destroy(&c->ioctl_lock);
|
nvgpu_mutex_destroy(&c->ioctl_lock);
|
||||||
nvgpu_mutex_destroy(&c->error_notifier_mutex);
|
|
||||||
nvgpu_mutex_destroy(&c->joblist.cleanup_lock);
|
nvgpu_mutex_destroy(&c->joblist.cleanup_lock);
|
||||||
nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
|
nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
|
||||||
nvgpu_mutex_destroy(&c->sync_lock);
|
nvgpu_mutex_destroy(&c->sync_lock);
|
||||||
@@ -1339,14 +1339,10 @@ static bool gk20a_fifo_ch_timeout_debug_dump_state(struct gk20a *g,
|
|||||||
if (!refch)
|
if (!refch)
|
||||||
return verbose;
|
return verbose;
|
||||||
|
|
||||||
nvgpu_mutex_acquire(&refch->error_notifier_mutex);
|
if (nvgpu_is_error_notifier_set(refch,
|
||||||
if (refch->error_notifier_ref) {
|
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT))
|
||||||
u32 err = refch->error_notifier->info32;
|
verbose = refch->timeout_debug_dump;
|
||||||
|
|
||||||
if (err == NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT)
|
|
||||||
verbose = refch->timeout_debug_dump;
|
|
||||||
}
|
|
||||||
nvgpu_mutex_release(&refch->error_notifier_mutex);
|
|
||||||
return verbose;
|
return verbose;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1400,8 +1396,8 @@ void gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
|
|||||||
{
|
{
|
||||||
nvgpu_err(g,
|
nvgpu_err(g,
|
||||||
"channel %d generated a mmu fault", refch->chid);
|
"channel %d generated a mmu fault", refch->chid);
|
||||||
gk20a_set_error_notifier(refch,
|
nvgpu_set_error_notifier(refch,
|
||||||
NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT);
|
NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);
|
||||||
}
|
}
|
||||||
|
|
||||||
void gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g,
|
void gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g,
|
||||||
@@ -1939,7 +1935,7 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
|
|||||||
|
|
||||||
list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) {
|
list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) {
|
||||||
if (gk20a_channel_get(ch_tsg)) {
|
if (gk20a_channel_get(ch_tsg)) {
|
||||||
gk20a_set_error_notifier(ch_tsg, err_code);
|
nvgpu_set_error_notifier(ch_tsg, err_code);
|
||||||
gk20a_channel_put(ch_tsg);
|
gk20a_channel_put(ch_tsg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1947,7 +1943,7 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
|
|||||||
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
|
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
|
||||||
gk20a_fifo_recover_tsg(g, ch->tsgid, verbose);
|
gk20a_fifo_recover_tsg(g, ch->tsgid, verbose);
|
||||||
} else {
|
} else {
|
||||||
gk20a_set_error_notifier(ch, err_code);
|
nvgpu_set_error_notifier(ch, err_code);
|
||||||
gk20a_fifo_recover_ch(g, ch->chid, verbose);
|
gk20a_fifo_recover_ch(g, ch->chid, verbose);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2108,8 +2104,8 @@ static bool gk20a_fifo_check_ch_ctxsw_timeout(struct channel_gk20a *ch,
|
|||||||
*verbose = ch->timeout_debug_dump;
|
*verbose = ch->timeout_debug_dump;
|
||||||
*ms = ch->timeout_accumulated_ms;
|
*ms = ch->timeout_accumulated_ms;
|
||||||
if (recover)
|
if (recover)
|
||||||
gk20a_set_error_notifier(ch,
|
nvgpu_set_error_notifier(ch,
|
||||||
NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
|
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
|
||||||
|
|
||||||
gk20a_channel_put(ch);
|
gk20a_channel_put(ch);
|
||||||
}
|
}
|
||||||
@@ -2170,8 +2166,8 @@ bool gk20a_fifo_check_tsg_ctxsw_timeout(struct tsg_gk20a *tsg,
|
|||||||
gk20a_channel_put(ch);
|
gk20a_channel_put(ch);
|
||||||
list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
|
list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
|
||||||
if (gk20a_channel_get(ch)) {
|
if (gk20a_channel_get(ch)) {
|
||||||
gk20a_set_error_notifier(ch,
|
nvgpu_set_error_notifier(ch,
|
||||||
NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
|
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
|
||||||
*verbose |= ch->timeout_debug_dump;
|
*verbose |= ch->timeout_debug_dump;
|
||||||
gk20a_channel_put(ch);
|
gk20a_channel_put(ch);
|
||||||
}
|
}
|
||||||
@@ -2413,7 +2409,7 @@ unsigned int gk20a_fifo_handle_pbdma_intr_0(struct gk20a *g, u32 pbdma_id,
|
|||||||
rc_type = RC_TYPE_PBDMA_FAULT;
|
rc_type = RC_TYPE_PBDMA_FAULT;
|
||||||
nvgpu_err(g,
|
nvgpu_err(g,
|
||||||
"semaphore acquire timeout!");
|
"semaphore acquire timeout!");
|
||||||
*error_notifier = NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT;
|
*error_notifier = NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT;
|
||||||
}
|
}
|
||||||
*handled |= pbdma_intr_0_acquire_pending_f();
|
*handled |= pbdma_intr_0_acquire_pending_f();
|
||||||
}
|
}
|
||||||
@@ -2431,7 +2427,7 @@ unsigned int gk20a_fifo_handle_pbdma_intr_0(struct gk20a *g, u32 pbdma_id,
|
|||||||
|
|
||||||
if (pbdma_intr_0 & pbdma_intr_0_pbcrc_pending_f()) {
|
if (pbdma_intr_0 & pbdma_intr_0_pbcrc_pending_f()) {
|
||||||
*error_notifier =
|
*error_notifier =
|
||||||
NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH;
|
NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH;
|
||||||
rc_type = RC_TYPE_PBDMA_FAULT;
|
rc_type = RC_TYPE_PBDMA_FAULT;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2485,7 +2481,7 @@ static void gk20a_fifo_pbdma_fault_rc(struct gk20a *g,
|
|||||||
struct channel_gk20a *ch = &f->channel[id];
|
struct channel_gk20a *ch = &f->channel[id];
|
||||||
|
|
||||||
if (gk20a_channel_get(ch)) {
|
if (gk20a_channel_get(ch)) {
|
||||||
gk20a_set_error_notifier(ch, error_notifier);
|
nvgpu_set_error_notifier(ch, error_notifier);
|
||||||
gk20a_fifo_recover_ch(g, id, true);
|
gk20a_fifo_recover_ch(g, id, true);
|
||||||
gk20a_channel_put(ch);
|
gk20a_channel_put(ch);
|
||||||
}
|
}
|
||||||
@@ -2497,7 +2493,7 @@ static void gk20a_fifo_pbdma_fault_rc(struct gk20a *g,
|
|||||||
nvgpu_rwsem_down_read(&tsg->ch_list_lock);
|
nvgpu_rwsem_down_read(&tsg->ch_list_lock);
|
||||||
list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
|
list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
|
||||||
if (gk20a_channel_get(ch)) {
|
if (gk20a_channel_get(ch)) {
|
||||||
gk20a_set_error_notifier(ch,
|
nvgpu_set_error_notifier(ch,
|
||||||
error_notifier);
|
error_notifier);
|
||||||
gk20a_channel_put(ch);
|
gk20a_channel_put(ch);
|
||||||
}
|
}
|
||||||
@@ -2514,7 +2510,7 @@ u32 gk20a_fifo_handle_pbdma_intr(struct gk20a *g, struct fifo_gk20a *f,
|
|||||||
u32 pbdma_intr_1 = gk20a_readl(g, pbdma_intr_1_r(pbdma_id));
|
u32 pbdma_intr_1 = gk20a_readl(g, pbdma_intr_1_r(pbdma_id));
|
||||||
|
|
||||||
u32 handled = 0;
|
u32 handled = 0;
|
||||||
u32 error_notifier = NVGPU_CHANNEL_PBDMA_ERROR;
|
u32 error_notifier = NVGPU_ERR_NOTIFIER_PBDMA_ERROR;
|
||||||
unsigned int rc_type = RC_TYPE_NO_RC;
|
unsigned int rc_type = RC_TYPE_NO_RC;
|
||||||
|
|
||||||
if (pbdma_intr_0) {
|
if (pbdma_intr_0) {
|
||||||
@@ -2658,8 +2654,8 @@ void __locked_fifo_preempt_timeout_rc(struct gk20a *g, u32 id,
|
|||||||
list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
|
list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
|
||||||
if (!gk20a_channel_get(ch))
|
if (!gk20a_channel_get(ch))
|
||||||
continue;
|
continue;
|
||||||
gk20a_set_error_notifier(ch,
|
nvgpu_set_error_notifier(ch,
|
||||||
NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
|
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
|
||||||
gk20a_channel_put(ch);
|
gk20a_channel_put(ch);
|
||||||
}
|
}
|
||||||
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
|
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
|
||||||
@@ -2671,8 +2667,8 @@ void __locked_fifo_preempt_timeout_rc(struct gk20a *g, u32 id,
|
|||||||
"preempt channel %d timeout", id);
|
"preempt channel %d timeout", id);
|
||||||
|
|
||||||
if (gk20a_channel_get(ch)) {
|
if (gk20a_channel_get(ch)) {
|
||||||
gk20a_set_error_notifier(ch,
|
nvgpu_set_error_notifier(ch,
|
||||||
NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
|
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
|
||||||
gk20a_fifo_recover_ch(g, id, true);
|
gk20a_fifo_recover_ch(g, id, true);
|
||||||
gk20a_channel_put(ch);
|
gk20a_channel_put(ch);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -40,6 +40,7 @@
|
|||||||
#include <nvgpu/barrier.h>
|
#include <nvgpu/barrier.h>
|
||||||
#include <nvgpu/mm.h>
|
#include <nvgpu/mm.h>
|
||||||
#include <nvgpu/ctxsw_trace.h>
|
#include <nvgpu/ctxsw_trace.h>
|
||||||
|
#include <nvgpu/error_notifier.h>
|
||||||
|
|
||||||
#include "gk20a.h"
|
#include "gk20a.h"
|
||||||
#include "gr_ctx_gk20a.h"
|
#include "gr_ctx_gk20a.h"
|
||||||
@@ -5113,14 +5114,14 @@ static void gk20a_gr_set_error_notifier(struct gk20a *g,
|
|||||||
nvgpu_rwsem_down_read(&tsg->ch_list_lock);
|
nvgpu_rwsem_down_read(&tsg->ch_list_lock);
|
||||||
list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) {
|
list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) {
|
||||||
if (gk20a_channel_get(ch_tsg)) {
|
if (gk20a_channel_get(ch_tsg)) {
|
||||||
gk20a_set_error_notifier(ch_tsg,
|
nvgpu_set_error_notifier(ch_tsg,
|
||||||
error_notifier);
|
error_notifier);
|
||||||
gk20a_channel_put(ch_tsg);
|
gk20a_channel_put(ch_tsg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
|
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
|
||||||
} else {
|
} else {
|
||||||
gk20a_set_error_notifier(ch, error_notifier);
|
nvgpu_set_error_notifier(ch, error_notifier);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -5130,7 +5131,7 @@ static int gk20a_gr_handle_semaphore_timeout_pending(struct gk20a *g,
|
|||||||
{
|
{
|
||||||
gk20a_dbg_fn("");
|
gk20a_dbg_fn("");
|
||||||
gk20a_gr_set_error_notifier(g, isr_data,
|
gk20a_gr_set_error_notifier(g, isr_data,
|
||||||
NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT);
|
NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT);
|
||||||
nvgpu_err(g,
|
nvgpu_err(g,
|
||||||
"gr semaphore timeout");
|
"gr semaphore timeout");
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
@@ -5141,7 +5142,7 @@ static int gk20a_gr_intr_illegal_notify_pending(struct gk20a *g,
|
|||||||
{
|
{
|
||||||
gk20a_dbg_fn("");
|
gk20a_dbg_fn("");
|
||||||
gk20a_gr_set_error_notifier(g, isr_data,
|
gk20a_gr_set_error_notifier(g, isr_data,
|
||||||
NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY);
|
NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY);
|
||||||
/* This is an unrecoverable error, reset is needed */
|
/* This is an unrecoverable error, reset is needed */
|
||||||
nvgpu_err(g,
|
nvgpu_err(g,
|
||||||
"gr semaphore timeout");
|
"gr semaphore timeout");
|
||||||
@@ -5156,7 +5157,7 @@ static int gk20a_gr_handle_illegal_method(struct gk20a *g,
|
|||||||
isr_data->data_lo);
|
isr_data->data_lo);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
gk20a_gr_set_error_notifier(g, isr_data,
|
gk20a_gr_set_error_notifier(g, isr_data,
|
||||||
NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY);
|
NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY);
|
||||||
nvgpu_err(g, "invalid method class 0x%08x"
|
nvgpu_err(g, "invalid method class 0x%08x"
|
||||||
", offset 0x%08x address 0x%08x",
|
", offset 0x%08x address 0x%08x",
|
||||||
isr_data->class_num, isr_data->offset, isr_data->addr);
|
isr_data->class_num, isr_data->offset, isr_data->addr);
|
||||||
@@ -5169,7 +5170,7 @@ static int gk20a_gr_handle_illegal_class(struct gk20a *g,
|
|||||||
{
|
{
|
||||||
gk20a_dbg_fn("");
|
gk20a_dbg_fn("");
|
||||||
gk20a_gr_set_error_notifier(g, isr_data,
|
gk20a_gr_set_error_notifier(g, isr_data,
|
||||||
NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
|
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
|
||||||
nvgpu_err(g,
|
nvgpu_err(g,
|
||||||
"invalid class 0x%08x, offset 0x%08x",
|
"invalid class 0x%08x, offset 0x%08x",
|
||||||
isr_data->class_num, isr_data->offset);
|
isr_data->class_num, isr_data->offset);
|
||||||
@@ -5193,7 +5194,7 @@ int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
|
|||||||
|
|
||||||
if (gr_fecs_intr & gr_fecs_host_int_status_umimp_firmware_method_f(1)) {
|
if (gr_fecs_intr & gr_fecs_host_int_status_umimp_firmware_method_f(1)) {
|
||||||
gk20a_gr_set_error_notifier(g, isr_data,
|
gk20a_gr_set_error_notifier(g, isr_data,
|
||||||
NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD);
|
NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD);
|
||||||
nvgpu_err(g,
|
nvgpu_err(g,
|
||||||
"firmware method error 0x%08x for offset 0x%04x",
|
"firmware method error 0x%08x for offset 0x%04x",
|
||||||
gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(6)),
|
gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(6)),
|
||||||
@@ -5215,7 +5216,7 @@ static int gk20a_gr_handle_class_error(struct gk20a *g,
|
|||||||
gr_class_error =
|
gr_class_error =
|
||||||
gr_class_error_code_v(gk20a_readl(g, gr_class_error_r()));
|
gr_class_error_code_v(gk20a_readl(g, gr_class_error_r()));
|
||||||
gk20a_gr_set_error_notifier(g, isr_data,
|
gk20a_gr_set_error_notifier(g, isr_data,
|
||||||
NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
|
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
|
||||||
nvgpu_err(g, "class error 0x%08x, offset 0x%08x,"
|
nvgpu_err(g, "class error 0x%08x, offset 0x%08x,"
|
||||||
"sub channel 0x%08x mme generated %d,"
|
"sub channel 0x%08x mme generated %d,"
|
||||||
" mme pc 0x%08xdata high %d priv status %d"
|
" mme pc 0x%08xdata high %d priv status %d"
|
||||||
@@ -5244,7 +5245,7 @@ static int gk20a_gr_handle_firmware_method(struct gk20a *g,
|
|||||||
gk20a_dbg_fn("");
|
gk20a_dbg_fn("");
|
||||||
|
|
||||||
gk20a_gr_set_error_notifier(g, isr_data,
|
gk20a_gr_set_error_notifier(g, isr_data,
|
||||||
NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
|
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
|
||||||
nvgpu_err(g,
|
nvgpu_err(g,
|
||||||
"firmware method 0x%08x, offset 0x%08x for channel %u",
|
"firmware method 0x%08x, offset 0x%08x for channel %u",
|
||||||
isr_data->class_num, isr_data->offset,
|
isr_data->class_num, isr_data->offset,
|
||||||
@@ -6024,7 +6025,7 @@ int gk20a_gr_isr(struct gk20a *g)
|
|||||||
if (need_reset) {
|
if (need_reset) {
|
||||||
nvgpu_err(g, "set gr exception notifier");
|
nvgpu_err(g, "set gr exception notifier");
|
||||||
gk20a_gr_set_error_notifier(g, &isr_data,
|
gk20a_gr_set_error_notifier(g, &isr_data,
|
||||||
NVGPU_CHANNEL_GR_EXCEPTION);
|
NVGPU_ERR_NOTIFIER_GR_EXCEPTION);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
49
drivers/gpu/nvgpu/include/nvgpu/error_notifier.h
Normal file
49
drivers/gpu/nvgpu/include/nvgpu/error_notifier.h
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __NVGPU_ERROR_NOTIFIER_H__
|
||||||
|
#define __NVGPU_ERROR_NOTIFIER_H__
|
||||||
|
|
||||||
|
#include <nvgpu/types.h>
|
||||||
|
|
||||||
|
struct channel_gk20a;
|
||||||
|
|
||||||
|
enum {
|
||||||
|
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT = 0,
|
||||||
|
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD,
|
||||||
|
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY,
|
||||||
|
NVGPU_ERR_NOTIFIER_GR_EXCEPTION,
|
||||||
|
NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT,
|
||||||
|
NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY,
|
||||||
|
NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT,
|
||||||
|
NVGPU_ERR_NOTIFIER_PBDMA_ERROR,
|
||||||
|
NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD,
|
||||||
|
NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR,
|
||||||
|
NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH,
|
||||||
|
};
|
||||||
|
|
||||||
|
void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error);
|
||||||
|
void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error);
|
||||||
|
void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error);
|
||||||
|
bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier);
|
||||||
|
|
||||||
|
#endif /* __NVGPU_ERROR_NOTIFIER_H__ */
|
||||||
Reference in New Issue
Block a user