mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 09:57:08 +03:00
gpu: nvgpu: define error_notifiers in common code
All the linux specific error_notifier codes are defined in linux specific header file <uapi/linux/nvgpu.h> and used in all the common driver But since they are defined in linux specific file, we need to move all the uses of those error_notifiers in linux specific code only Hence define new error_notifiers in include/nvgpu/error_notifier.h and use them in the common code Add new API nvgpu_error_notifier_to_channel_notifier() to convert common error_notifier of the form NVGPU_ERR_NOTIFIER_* to linux specific error notifier of the form NVGPU_CHANNEL_* Any future additions to error notifiers requires update to both the form of error notifiers Move all error notifier related metadata from channel_gk20a (common code) to linux specific structure nvgpu_channel_linux Update all accesses to this data from new structure instead of channel_gk20a Move and rename below APIs to linux specific file and declare them in error_notifier.h nvgpu_set_error_notifier_locked() nvgpu_set_error_notifier() nvgpu_is_error_notifier_set() Add below new API and use it in fifo_vgpu.c nvgpu_set_error_notifier_if_empty() Include <nvgpu/error_notifier.h> wherever new error_notifier codes are used NVGPU-426 Change-Id: Iaa5bfc150e6e9ec17d797d445c2d6407afe9f4bd Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1593361 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
a0cea295e7
commit
c6b9177cff
@@ -17,6 +17,7 @@
|
||||
#include <nvgpu/enabled.h>
|
||||
#include <nvgpu/debug.h>
|
||||
#include <nvgpu/ltc.h>
|
||||
#include <nvgpu/error_notifier.h>
|
||||
|
||||
/*
|
||||
* This is required for nvgpu_vm_find_buf() which is used in the tracing
|
||||
@@ -37,6 +38,124 @@
|
||||
#include <trace/events/gk20a.h>
|
||||
#include <uapi/linux/nvgpu.h>
|
||||
|
||||
/*
|
||||
* API to convert error_notifiers in common code and of the form
|
||||
* NVGPU_ERR_NOTIFIER_* into Linux specific error_notifiers exposed to user
|
||||
* space and of the form NVGPU_CHANNEL_*
|
||||
*/
|
||||
static u32 nvgpu_error_notifier_to_channel_notifier(u32 error_notifier)
|
||||
{
|
||||
switch (error_notifier) {
|
||||
case NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT:
|
||||
return NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT;
|
||||
case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD:
|
||||
return NVGPU_CHANNEL_GR_ERROR_SW_METHOD;
|
||||
case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY:
|
||||
return NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY;
|
||||
case NVGPU_ERR_NOTIFIER_GR_EXCEPTION:
|
||||
return NVGPU_CHANNEL_GR_EXCEPTION;
|
||||
case NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT:
|
||||
return NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT;
|
||||
case NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY:
|
||||
return NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY;
|
||||
case NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT:
|
||||
return NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT;
|
||||
case NVGPU_ERR_NOTIFIER_PBDMA_ERROR:
|
||||
return NVGPU_CHANNEL_PBDMA_ERROR;
|
||||
case NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD:
|
||||
return NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD;
|
||||
case NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR:
|
||||
return NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR;
|
||||
case NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH:
|
||||
return NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH;
|
||||
}
|
||||
|
||||
pr_warn("%s: invalid error_notifier requested %u\n", __func__, error_notifier);
|
||||
|
||||
return error_notifier;
|
||||
}
|
||||
|
||||
/**
|
||||
* nvgpu_set_error_notifier_locked()
|
||||
* Should be called with ch->error_notifier_mutex held
|
||||
*
|
||||
* error should be of the form NVGPU_ERR_NOTIFIER_*
|
||||
*/
|
||||
void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error)
|
||||
{
|
||||
struct nvgpu_channel_linux *priv = ch->os_priv;
|
||||
|
||||
error = nvgpu_error_notifier_to_channel_notifier(error);
|
||||
|
||||
if (priv->error_notifier.dmabuf) {
|
||||
struct nvgpu_notification *notification =
|
||||
priv->error_notifier.notification;
|
||||
struct timespec time_data;
|
||||
u64 nsec;
|
||||
|
||||
getnstimeofday(&time_data);
|
||||
nsec = ((u64)time_data.tv_sec) * 1000000000u +
|
||||
(u64)time_data.tv_nsec;
|
||||
notification->time_stamp.nanoseconds[0] =
|
||||
(u32)nsec;
|
||||
notification->time_stamp.nanoseconds[1] =
|
||||
(u32)(nsec >> 32);
|
||||
notification->info32 = error;
|
||||
notification->status = 0xffff;
|
||||
|
||||
nvgpu_err(ch->g,
|
||||
"error notifier set to %d for ch %d", error, ch->chid);
|
||||
}
|
||||
}
|
||||
|
||||
/* error should be of the form NVGPU_ERR_NOTIFIER_* */
|
||||
void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error)
|
||||
{
|
||||
struct nvgpu_channel_linux *priv = ch->os_priv;
|
||||
|
||||
nvgpu_mutex_acquire(&priv->error_notifier.mutex);
|
||||
nvgpu_set_error_notifier_locked(ch, error);
|
||||
nvgpu_mutex_release(&priv->error_notifier.mutex);
|
||||
}
|
||||
|
||||
void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error)
|
||||
{
|
||||
struct nvgpu_channel_linux *priv = ch->os_priv;
|
||||
|
||||
nvgpu_mutex_acquire(&priv->error_notifier.mutex);
|
||||
if (priv->error_notifier.dmabuf) {
|
||||
struct nvgpu_notification *notification =
|
||||
priv->error_notifier.notification;
|
||||
|
||||
/* Don't overwrite error flag if it is already set */
|
||||
if (notification->status != 0xffff)
|
||||
nvgpu_set_error_notifier_locked(ch, error);
|
||||
}
|
||||
nvgpu_mutex_release(&priv->error_notifier.mutex);
|
||||
}
|
||||
|
||||
/* error_notifier should be of the form NVGPU_ERR_NOTIFIER_* */
|
||||
bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier)
|
||||
{
|
||||
struct nvgpu_channel_linux *priv = ch->os_priv;
|
||||
bool notifier_set = false;
|
||||
|
||||
error_notifier = nvgpu_error_notifier_to_channel_notifier(error_notifier);
|
||||
|
||||
nvgpu_mutex_acquire(&priv->error_notifier.mutex);
|
||||
if (priv->error_notifier.dmabuf) {
|
||||
struct nvgpu_notification *notification =
|
||||
priv->error_notifier.notification;
|
||||
u32 err = notification->info32;
|
||||
|
||||
if (err == error_notifier)
|
||||
notifier_set = true;
|
||||
}
|
||||
nvgpu_mutex_release(&priv->error_notifier.mutex);
|
||||
|
||||
return notifier_set;
|
||||
}
|
||||
|
||||
static void gk20a_channel_update_runcb_fn(struct work_struct *work)
|
||||
{
|
||||
struct nvgpu_channel_completion_cb *completion_cb =
|
||||
@@ -128,6 +247,7 @@ static void nvgpu_channel_close_linux(struct channel_gk20a *ch)
|
||||
static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
|
||||
{
|
||||
struct nvgpu_channel_linux *priv;
|
||||
int err;
|
||||
|
||||
priv = nvgpu_kzalloc(g, sizeof(*priv));
|
||||
if (!priv)
|
||||
@@ -136,6 +256,12 @@ static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
|
||||
ch->os_priv = priv;
|
||||
priv->ch = ch;
|
||||
|
||||
err = nvgpu_mutex_init(&priv->error_notifier.mutex);
|
||||
if (err) {
|
||||
nvgpu_kfree(g, priv);
|
||||
return err;
|
||||
}
|
||||
|
||||
nvgpu_channel_work_completion_init(ch);
|
||||
|
||||
return 0;
|
||||
@@ -143,7 +269,10 @@ static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
|
||||
|
||||
static void nvgpu_channel_free_linux(struct gk20a *g, struct channel_gk20a *ch)
|
||||
{
|
||||
nvgpu_kfree(g, ch->os_priv);
|
||||
struct nvgpu_channel_linux *priv = ch->os_priv;
|
||||
|
||||
nvgpu_mutex_destroy(&priv->error_notifier.mutex);
|
||||
nvgpu_kfree(g, priv);
|
||||
}
|
||||
|
||||
int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l)
|
||||
|
||||
@@ -42,10 +42,20 @@ struct nvgpu_channel_completion_cb {
|
||||
struct work_struct work;
|
||||
};
|
||||
|
||||
struct nvgpu_error_notifier {
|
||||
struct dma_buf *dmabuf;
|
||||
void *vaddr;
|
||||
|
||||
struct nvgpu_notification *notification;
|
||||
|
||||
struct nvgpu_mutex mutex;
|
||||
};
|
||||
|
||||
struct nvgpu_channel_linux {
|
||||
struct channel_gk20a *ch;
|
||||
|
||||
struct nvgpu_channel_completion_cb completion_cb;
|
||||
struct nvgpu_error_notifier error_notifier;
|
||||
};
|
||||
|
||||
int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l);
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
#include <nvgpu/list.h>
|
||||
#include <nvgpu/debug.h>
|
||||
#include <nvgpu/enabled.h>
|
||||
#include <nvgpu/error_notifier.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "gk20a/dbg_gpu_gk20a.h"
|
||||
@@ -227,15 +228,17 @@ static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch,
|
||||
|
||||
static void gk20a_channel_free_error_notifiers(struct channel_gk20a *ch)
|
||||
{
|
||||
nvgpu_mutex_acquire(&ch->error_notifier_mutex);
|
||||
if (ch->error_notifier_ref) {
|
||||
dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
|
||||
dma_buf_put(ch->error_notifier_ref);
|
||||
ch->error_notifier_ref = NULL;
|
||||
ch->error_notifier = NULL;
|
||||
ch->error_notifier_va = NULL;
|
||||
struct nvgpu_channel_linux *priv = ch->os_priv;
|
||||
|
||||
nvgpu_mutex_acquire(&priv->error_notifier.mutex);
|
||||
if (priv->error_notifier.dmabuf) {
|
||||
dma_buf_vunmap(priv->error_notifier.dmabuf, priv->error_notifier.vaddr);
|
||||
dma_buf_put(priv->error_notifier.dmabuf);
|
||||
priv->error_notifier.dmabuf = NULL;
|
||||
priv->error_notifier.notification = NULL;
|
||||
priv->error_notifier.vaddr = NULL;
|
||||
}
|
||||
nvgpu_mutex_release(&ch->error_notifier_mutex);
|
||||
nvgpu_mutex_release(&priv->error_notifier.mutex);
|
||||
}
|
||||
|
||||
static int gk20a_init_error_notifier(struct channel_gk20a *ch,
|
||||
@@ -244,6 +247,7 @@ static int gk20a_init_error_notifier(struct channel_gk20a *ch,
|
||||
struct dma_buf *dmabuf;
|
||||
void *va;
|
||||
u64 end = args->offset + sizeof(struct nvgpu_notification);
|
||||
struct nvgpu_channel_linux *priv = ch->os_priv;
|
||||
|
||||
if (!args->mem) {
|
||||
pr_err("gk20a_init_error_notifier: invalid memory handle\n");
|
||||
@@ -273,14 +277,15 @@ static int gk20a_init_error_notifier(struct channel_gk20a *ch,
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ch->error_notifier = va + args->offset;
|
||||
ch->error_notifier_va = va;
|
||||
memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification));
|
||||
priv->error_notifier.notification = va + args->offset;
|
||||
priv->error_notifier.vaddr = va;
|
||||
memset(priv->error_notifier.notification, 0,
|
||||
sizeof(struct nvgpu_notification));
|
||||
|
||||
/* set channel notifiers pointer */
|
||||
nvgpu_mutex_acquire(&ch->error_notifier_mutex);
|
||||
ch->error_notifier_ref = dmabuf;
|
||||
nvgpu_mutex_release(&ch->error_notifier_mutex);
|
||||
nvgpu_mutex_acquire(&priv->error_notifier.mutex);
|
||||
priv->error_notifier.dmabuf = dmabuf;
|
||||
nvgpu_mutex_release(&priv->error_notifier.mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1361,7 +1366,7 @@ long gk20a_channel_ioctl(struct file *filp,
|
||||
break;
|
||||
}
|
||||
err = ch->g->ops.fifo.force_reset_ch(ch,
|
||||
NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR, true);
|
||||
NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR, true);
|
||||
gk20a_idle(ch->g);
|
||||
break;
|
||||
case NVGPU_IOCTL_CHANNEL_EVENT_ID_CTRL:
|
||||
|
||||
@@ -25,10 +25,13 @@
|
||||
#include <nvgpu/atomic.h>
|
||||
#include <nvgpu/bug.h>
|
||||
#include <nvgpu/barrier.h>
|
||||
#include <nvgpu/error_notifier.h>
|
||||
|
||||
#include "vgpu.h"
|
||||
#include "fifo_vgpu.h"
|
||||
|
||||
#include "common/linux/channel.h"
|
||||
|
||||
#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
|
||||
#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
|
||||
|
||||
@@ -691,7 +694,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
|
||||
|
||||
list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) {
|
||||
if (gk20a_channel_get(ch_tsg)) {
|
||||
gk20a_set_error_notifier(ch_tsg, err_code);
|
||||
nvgpu_set_error_notifier(ch_tsg, err_code);
|
||||
ch_tsg->has_timedout = true;
|
||||
gk20a_channel_put(ch_tsg);
|
||||
}
|
||||
@@ -699,7 +702,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
|
||||
|
||||
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
|
||||
} else {
|
||||
gk20a_set_error_notifier(ch, err_code);
|
||||
nvgpu_set_error_notifier(ch, err_code);
|
||||
ch->has_timedout = true;
|
||||
}
|
||||
|
||||
@@ -716,19 +719,14 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
|
||||
static void vgpu_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
|
||||
struct channel_gk20a *ch)
|
||||
{
|
||||
nvgpu_mutex_acquire(&ch->error_notifier_mutex);
|
||||
if (ch->error_notifier_ref) {
|
||||
if (ch->error_notifier->status == 0xffff) {
|
||||
/* If error code is already set, this mmu fault
|
||||
* was triggered as part of recovery from other
|
||||
* error condition.
|
||||
* Don't overwrite error flag. */
|
||||
} else {
|
||||
gk20a_set_error_notifier_locked(ch,
|
||||
NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT);
|
||||
}
|
||||
}
|
||||
nvgpu_mutex_release(&ch->error_notifier_mutex);
|
||||
/*
|
||||
* If error code is already set, this mmu fault
|
||||
* was triggered as part of recovery from other
|
||||
* error condition.
|
||||
* Don't overwrite error flag.
|
||||
*/
|
||||
nvgpu_set_error_notifier_if_empty(ch,
|
||||
NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);
|
||||
|
||||
/* mark channel as faulted */
|
||||
ch->has_timedout = true;
|
||||
@@ -778,11 +776,11 @@ int vgpu_fifo_isr(struct gk20a *g, struct tegra_vgpu_fifo_intr_info *info)
|
||||
|
||||
switch (info->type) {
|
||||
case TEGRA_VGPU_FIFO_INTR_PBDMA:
|
||||
gk20a_set_error_notifier(ch, NVGPU_CHANNEL_PBDMA_ERROR);
|
||||
nvgpu_set_error_notifier(ch, NVGPU_ERR_NOTIFIER_PBDMA_ERROR);
|
||||
break;
|
||||
case TEGRA_VGPU_FIFO_INTR_CTXSW_TIMEOUT:
|
||||
gk20a_set_error_notifier(ch,
|
||||
NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
|
||||
nvgpu_set_error_notifier(ch,
|
||||
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
|
||||
break;
|
||||
case TEGRA_VGPU_FIFO_INTR_MMU_FAULT:
|
||||
vgpu_fifo_set_ctx_mmu_error_ch_tsg(g, ch);
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/bug.h>
|
||||
#include <nvgpu/error_notifier.h>
|
||||
|
||||
#include "vgpu.h"
|
||||
#include "gr_vgpu.h"
|
||||
@@ -941,31 +942,31 @@ int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info)
|
||||
nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq);
|
||||
break;
|
||||
case TEGRA_VGPU_GR_INTR_SEMAPHORE_TIMEOUT:
|
||||
gk20a_set_error_notifier(ch,
|
||||
NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT);
|
||||
nvgpu_set_error_notifier(ch,
|
||||
NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT);
|
||||
break;
|
||||
case TEGRA_VGPU_GR_INTR_ILLEGAL_NOTIFY:
|
||||
gk20a_set_error_notifier(ch,
|
||||
NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY);
|
||||
nvgpu_set_error_notifier(ch,
|
||||
NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY);
|
||||
case TEGRA_VGPU_GR_INTR_ILLEGAL_METHOD:
|
||||
break;
|
||||
case TEGRA_VGPU_GR_INTR_ILLEGAL_CLASS:
|
||||
gk20a_set_error_notifier(ch,
|
||||
NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
|
||||
nvgpu_set_error_notifier(ch,
|
||||
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
|
||||
break;
|
||||
case TEGRA_VGPU_GR_INTR_FECS_ERROR:
|
||||
break;
|
||||
case TEGRA_VGPU_GR_INTR_CLASS_ERROR:
|
||||
gk20a_set_error_notifier(ch,
|
||||
NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
|
||||
nvgpu_set_error_notifier(ch,
|
||||
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
|
||||
break;
|
||||
case TEGRA_VGPU_GR_INTR_FIRMWARE_METHOD:
|
||||
gk20a_set_error_notifier(ch,
|
||||
NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
|
||||
nvgpu_set_error_notifier(ch,
|
||||
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
|
||||
break;
|
||||
case TEGRA_VGPU_GR_INTR_EXCEPTION:
|
||||
gk20a_set_error_notifier(ch,
|
||||
NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
|
||||
nvgpu_set_error_notifier(ch,
|
||||
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
|
||||
break;
|
||||
case TEGRA_VGPU_GR_INTR_SM_EXCEPTION:
|
||||
gk20a_dbg_gpu_post_events(ch);
|
||||
|
||||
Reference in New Issue
Block a user