gpu: nvgpu: define error_notifiers in common code

All the linux specific error_notifier codes are defined in linux specific
header file <uapi/linux/nvgpu.h> and used in all the common driver

But since they are defined in linux specific file, we need to move all the
uses of those error_notifiers in linux specific code only

Hence define new error_notifiers in include/nvgpu/error_notifier.h and
use them in the common code

Add new API nvgpu_error_notifier_to_channel_notifier() to convert common
error_notifier of the form NVGPU_ERR_NOTIFIER_* to linux specific error
notifier of the form NVGPU_CHANNEL_*

Any future additions to error notifiers requires update to both the form
of error notifiers

Move all error notifier related metadata from channel_gk20a (common code)
to linux specific structure nvgpu_channel_linux
Update all accesses to this data from new structure instead of channel_gk20a

Move and rename below APIs to linux specific file and declare them
in error_notifier.h
nvgpu_set_error_notifier_locked()
nvgpu_set_error_notifier()
nvgpu_is_error_notifier_set()

Add below new API and use it in fifo_vgpu.c
nvgpu_set_error_notifier_if_empty()

Include <nvgpu/error_notifier.h> wherever new error_notifier codes are used

NVGPU-426

Change-Id: Iaa5bfc150e6e9ec17d797d445c2d6407afe9f4bd
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1593361
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Deepak Nibade
2017-11-06 05:44:23 -08:00
committed by mobile promotions
parent a0cea295e7
commit c6b9177cff
10 changed files with 285 additions and 138 deletions

View File

@@ -17,6 +17,7 @@
#include <nvgpu/enabled.h>
#include <nvgpu/debug.h>
#include <nvgpu/ltc.h>
#include <nvgpu/error_notifier.h>
/*
* This is required for nvgpu_vm_find_buf() which is used in the tracing
@@ -37,6 +38,124 @@
#include <trace/events/gk20a.h>
#include <uapi/linux/nvgpu.h>
/*
* API to convert error_notifiers in common code and of the form
* NVGPU_ERR_NOTIFIER_* into Linux specific error_notifiers exposed to user
* space and of the form NVGPU_CHANNEL_*
*/
static u32 nvgpu_error_notifier_to_channel_notifier(u32 error_notifier)
{
switch (error_notifier) {
case NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT:
return NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT;
case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD:
return NVGPU_CHANNEL_GR_ERROR_SW_METHOD;
case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY:
return NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY;
case NVGPU_ERR_NOTIFIER_GR_EXCEPTION:
return NVGPU_CHANNEL_GR_EXCEPTION;
case NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT:
return NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT;
case NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY:
return NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY;
case NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT:
return NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT;
case NVGPU_ERR_NOTIFIER_PBDMA_ERROR:
return NVGPU_CHANNEL_PBDMA_ERROR;
case NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD:
return NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD;
case NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR:
return NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR;
case NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH:
return NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH;
}
pr_warn("%s: invalid error_notifier requested %u\n", __func__, error_notifier);
return error_notifier;
}
/**
* nvgpu_set_error_notifier_locked()
* Should be called with ch->error_notifier_mutex held
*
* error should be of the form NVGPU_ERR_NOTIFIER_*
*/
void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error)
{
struct nvgpu_channel_linux *priv = ch->os_priv;
error = nvgpu_error_notifier_to_channel_notifier(error);
if (priv->error_notifier.dmabuf) {
struct nvgpu_notification *notification =
priv->error_notifier.notification;
struct timespec time_data;
u64 nsec;
getnstimeofday(&time_data);
nsec = ((u64)time_data.tv_sec) * 1000000000u +
(u64)time_data.tv_nsec;
notification->time_stamp.nanoseconds[0] =
(u32)nsec;
notification->time_stamp.nanoseconds[1] =
(u32)(nsec >> 32);
notification->info32 = error;
notification->status = 0xffff;
nvgpu_err(ch->g,
"error notifier set to %d for ch %d", error, ch->chid);
}
}
/* error should be of the form NVGPU_ERR_NOTIFIER_* */
void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error)
{
struct nvgpu_channel_linux *priv = ch->os_priv;
nvgpu_mutex_acquire(&priv->error_notifier.mutex);
nvgpu_set_error_notifier_locked(ch, error);
nvgpu_mutex_release(&priv->error_notifier.mutex);
}
void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error)
{
struct nvgpu_channel_linux *priv = ch->os_priv;
nvgpu_mutex_acquire(&priv->error_notifier.mutex);
if (priv->error_notifier.dmabuf) {
struct nvgpu_notification *notification =
priv->error_notifier.notification;
/* Don't overwrite error flag if it is already set */
if (notification->status != 0xffff)
nvgpu_set_error_notifier_locked(ch, error);
}
nvgpu_mutex_release(&priv->error_notifier.mutex);
}
/* error_notifier should be of the form NVGPU_ERR_NOTIFIER_* */
bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier)
{
struct nvgpu_channel_linux *priv = ch->os_priv;
bool notifier_set = false;
error_notifier = nvgpu_error_notifier_to_channel_notifier(error_notifier);
nvgpu_mutex_acquire(&priv->error_notifier.mutex);
if (priv->error_notifier.dmabuf) {
struct nvgpu_notification *notification =
priv->error_notifier.notification;
u32 err = notification->info32;
if (err == error_notifier)
notifier_set = true;
}
nvgpu_mutex_release(&priv->error_notifier.mutex);
return notifier_set;
}
static void gk20a_channel_update_runcb_fn(struct work_struct *work)
{
struct nvgpu_channel_completion_cb *completion_cb =
@@ -128,6 +247,7 @@ static void nvgpu_channel_close_linux(struct channel_gk20a *ch)
static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
{
struct nvgpu_channel_linux *priv;
int err;
priv = nvgpu_kzalloc(g, sizeof(*priv));
if (!priv)
@@ -136,6 +256,12 @@ static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
ch->os_priv = priv;
priv->ch = ch;
err = nvgpu_mutex_init(&priv->error_notifier.mutex);
if (err) {
nvgpu_kfree(g, priv);
return err;
}
nvgpu_channel_work_completion_init(ch);
return 0;
@@ -143,7 +269,10 @@ static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
static void nvgpu_channel_free_linux(struct gk20a *g, struct channel_gk20a *ch)
{
nvgpu_kfree(g, ch->os_priv);
struct nvgpu_channel_linux *priv = ch->os_priv;
nvgpu_mutex_destroy(&priv->error_notifier.mutex);
nvgpu_kfree(g, priv);
}
int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l)

View File

@@ -42,10 +42,20 @@ struct nvgpu_channel_completion_cb {
struct work_struct work;
};
struct nvgpu_error_notifier {
struct dma_buf *dmabuf;
void *vaddr;
struct nvgpu_notification *notification;
struct nvgpu_mutex mutex;
};
struct nvgpu_channel_linux {
struct channel_gk20a *ch;
struct nvgpu_channel_completion_cb completion_cb;
struct nvgpu_error_notifier error_notifier;
};
int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l);

View File

@@ -30,6 +30,7 @@
#include <nvgpu/list.h>
#include <nvgpu/debug.h>
#include <nvgpu/enabled.h>
#include <nvgpu/error_notifier.h>
#include "gk20a/gk20a.h"
#include "gk20a/dbg_gpu_gk20a.h"
@@ -227,15 +228,17 @@ static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch,
static void gk20a_channel_free_error_notifiers(struct channel_gk20a *ch)
{
nvgpu_mutex_acquire(&ch->error_notifier_mutex);
if (ch->error_notifier_ref) {
dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
dma_buf_put(ch->error_notifier_ref);
ch->error_notifier_ref = NULL;
ch->error_notifier = NULL;
ch->error_notifier_va = NULL;
struct nvgpu_channel_linux *priv = ch->os_priv;
nvgpu_mutex_acquire(&priv->error_notifier.mutex);
if (priv->error_notifier.dmabuf) {
dma_buf_vunmap(priv->error_notifier.dmabuf, priv->error_notifier.vaddr);
dma_buf_put(priv->error_notifier.dmabuf);
priv->error_notifier.dmabuf = NULL;
priv->error_notifier.notification = NULL;
priv->error_notifier.vaddr = NULL;
}
nvgpu_mutex_release(&ch->error_notifier_mutex);
nvgpu_mutex_release(&priv->error_notifier.mutex);
}
static int gk20a_init_error_notifier(struct channel_gk20a *ch,
@@ -244,6 +247,7 @@ static int gk20a_init_error_notifier(struct channel_gk20a *ch,
struct dma_buf *dmabuf;
void *va;
u64 end = args->offset + sizeof(struct nvgpu_notification);
struct nvgpu_channel_linux *priv = ch->os_priv;
if (!args->mem) {
pr_err("gk20a_init_error_notifier: invalid memory handle\n");
@@ -273,14 +277,15 @@ static int gk20a_init_error_notifier(struct channel_gk20a *ch,
return -ENOMEM;
}
ch->error_notifier = va + args->offset;
ch->error_notifier_va = va;
memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification));
priv->error_notifier.notification = va + args->offset;
priv->error_notifier.vaddr = va;
memset(priv->error_notifier.notification, 0,
sizeof(struct nvgpu_notification));
/* set channel notifiers pointer */
nvgpu_mutex_acquire(&ch->error_notifier_mutex);
ch->error_notifier_ref = dmabuf;
nvgpu_mutex_release(&ch->error_notifier_mutex);
nvgpu_mutex_acquire(&priv->error_notifier.mutex);
priv->error_notifier.dmabuf = dmabuf;
nvgpu_mutex_release(&priv->error_notifier.mutex);
return 0;
}
@@ -1361,7 +1366,7 @@ long gk20a_channel_ioctl(struct file *filp,
break;
}
err = ch->g->ops.fifo.force_reset_ch(ch,
NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR, true);
NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR, true);
gk20a_idle(ch->g);
break;
case NVGPU_IOCTL_CHANNEL_EVENT_ID_CTRL:

View File

@@ -25,10 +25,13 @@
#include <nvgpu/atomic.h>
#include <nvgpu/bug.h>
#include <nvgpu/barrier.h>
#include <nvgpu/error_notifier.h>
#include "vgpu.h"
#include "fifo_vgpu.h"
#include "common/linux/channel.h"
#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
@@ -691,7 +694,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) {
if (gk20a_channel_get(ch_tsg)) {
gk20a_set_error_notifier(ch_tsg, err_code);
nvgpu_set_error_notifier(ch_tsg, err_code);
ch_tsg->has_timedout = true;
gk20a_channel_put(ch_tsg);
}
@@ -699,7 +702,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
} else {
gk20a_set_error_notifier(ch, err_code);
nvgpu_set_error_notifier(ch, err_code);
ch->has_timedout = true;
}
@@ -716,19 +719,14 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
static void vgpu_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
struct channel_gk20a *ch)
{
nvgpu_mutex_acquire(&ch->error_notifier_mutex);
if (ch->error_notifier_ref) {
if (ch->error_notifier->status == 0xffff) {
/* If error code is already set, this mmu fault
* was triggered as part of recovery from other
* error condition.
* Don't overwrite error flag. */
} else {
gk20a_set_error_notifier_locked(ch,
NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT);
}
}
nvgpu_mutex_release(&ch->error_notifier_mutex);
/*
* If error code is already set, this mmu fault
* was triggered as part of recovery from other
* error condition.
* Don't overwrite error flag.
*/
nvgpu_set_error_notifier_if_empty(ch,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);
/* mark channel as faulted */
ch->has_timedout = true;
@@ -778,11 +776,11 @@ int vgpu_fifo_isr(struct gk20a *g, struct tegra_vgpu_fifo_intr_info *info)
switch (info->type) {
case TEGRA_VGPU_FIFO_INTR_PBDMA:
gk20a_set_error_notifier(ch, NVGPU_CHANNEL_PBDMA_ERROR);
nvgpu_set_error_notifier(ch, NVGPU_ERR_NOTIFIER_PBDMA_ERROR);
break;
case TEGRA_VGPU_FIFO_INTR_CTXSW_TIMEOUT:
gk20a_set_error_notifier(ch,
NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
nvgpu_set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
break;
case TEGRA_VGPU_FIFO_INTR_MMU_FAULT:
vgpu_fifo_set_ctx_mmu_error_ch_tsg(g, ch);

View File

@@ -20,6 +20,7 @@
#include <nvgpu/kmem.h>
#include <nvgpu/bug.h>
#include <nvgpu/error_notifier.h>
#include "vgpu.h"
#include "gr_vgpu.h"
@@ -941,31 +942,31 @@ int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info)
nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq);
break;
case TEGRA_VGPU_GR_INTR_SEMAPHORE_TIMEOUT:
gk20a_set_error_notifier(ch,
NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT);
nvgpu_set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT);
break;
case TEGRA_VGPU_GR_INTR_ILLEGAL_NOTIFY:
gk20a_set_error_notifier(ch,
NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY);
nvgpu_set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY);
case TEGRA_VGPU_GR_INTR_ILLEGAL_METHOD:
break;
case TEGRA_VGPU_GR_INTR_ILLEGAL_CLASS:
gk20a_set_error_notifier(ch,
NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
nvgpu_set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
break;
case TEGRA_VGPU_GR_INTR_FECS_ERROR:
break;
case TEGRA_VGPU_GR_INTR_CLASS_ERROR:
gk20a_set_error_notifier(ch,
NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
nvgpu_set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
break;
case TEGRA_VGPU_GR_INTR_FIRMWARE_METHOD:
gk20a_set_error_notifier(ch,
NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
nvgpu_set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
break;
case TEGRA_VGPU_GR_INTR_EXCEPTION:
gk20a_set_error_notifier(ch,
NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
nvgpu_set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
break;
case TEGRA_VGPU_GR_INTR_SM_EXCEPTION:
gk20a_dbg_gpu_post_events(ch);