gpu: nvgpu: define error_notifiers in common code

All the linux specific error_notifier codes are defined in linux specific
header file <uapi/linux/nvgpu.h> and used in all the common driver

But since they are defined in linux specific file, we need to move all the
uses of those error_notifiers in linux specific code only

Hence define new error_notifiers in include/nvgpu/error_notifier.h and
use them in the common code

Add new API nvgpu_error_notifier_to_channel_notifier() to convert common
error_notifier of the form NVGPU_ERR_NOTIFIER_* to linux specific error
notifier of the form NVGPU_CHANNEL_*

Any future additions to error notifiers requires update to both the form
of error notifiers

Move all error notifier related metadata from channel_gk20a (common code)
to linux specific structure nvgpu_channel_linux
Update all accesses to this data from new structure instead of channel_gk20a

Move and rename below APIs to linux specific file and declare them
in error_notifier.h
nvgpu_set_error_notifier_locked()
nvgpu_set_error_notifier()
nvgpu_is_error_notifier_set()

Add below new API and use it in fifo_vgpu.c
nvgpu_set_error_notifier_if_empty()

Include <nvgpu/error_notifier.h> wherever new error_notifier codes are used

NVGPU-426

Change-Id: Iaa5bfc150e6e9ec17d797d445c2d6407afe9f4bd
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1593361
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Deepak Nibade
2017-11-06 05:44:23 -08:00
committed by mobile promotions
parent a0cea295e7
commit c6b9177cff
10 changed files with 285 additions and 138 deletions

View File

@@ -44,6 +44,7 @@
#include <nvgpu/ltc.h>
#include <nvgpu/barrier.h>
#include <nvgpu/ctxsw_trace.h>
#include <nvgpu/error_notifier.h>
#include "gk20a.h"
#include "dbg_gpu_gk20a.h"
@@ -339,37 +340,6 @@ int gk20a_channel_set_runlist_interleave(struct channel_gk20a *ch,
return ret ? ret : g->ops.fifo.update_runlist(g, ch->runlist_id, ~0, true, true);
}
/**
* gk20a_set_error_notifier_locked()
* Should be called with ch->error_notifier_mutex held
*/
void gk20a_set_error_notifier_locked(struct channel_gk20a *ch, __u32 error)
{
if (ch->error_notifier_ref) {
struct timespec time_data;
u64 nsec;
getnstimeofday(&time_data);
nsec = ((u64)time_data.tv_sec) * 1000000000u +
(u64)time_data.tv_nsec;
ch->error_notifier->time_stamp.nanoseconds[0] =
(u32)nsec;
ch->error_notifier->time_stamp.nanoseconds[1] =
(u32)(nsec >> 32);
ch->error_notifier->info32 = error;
ch->error_notifier->status = 0xffff;
nvgpu_err(ch->g,
"error notifier set to %d for ch %d", error, ch->chid);
}
}
void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
{
nvgpu_mutex_acquire(&ch->error_notifier_mutex);
gk20a_set_error_notifier_locked(ch, error);
nvgpu_mutex_release(&ch->error_notifier_mutex);
}
static void gk20a_wait_until_counter_is_N(
struct channel_gk20a *ch, nvgpu_atomic_t *counter, int wait_value,
struct nvgpu_cond *c, const char *caller, const char *counter_name)
@@ -1550,7 +1520,7 @@ static void gk20a_channel_timeout_handler(struct channel_gk20a *ch)
gk20a_gr_debug_dump(g);
g->ops.fifo.force_reset_ch(ch,
NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT, true);
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT, true);
}
/**
@@ -2210,53 +2180,48 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
err = nvgpu_mutex_init(&c->ioctl_lock);
if (err)
return err;
err = nvgpu_mutex_init(&c->error_notifier_mutex);
if (err)
goto fail_1;
err = nvgpu_mutex_init(&c->joblist.cleanup_lock);
if (err)
goto fail_2;
goto fail_1;
err = nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock);
if (err)
goto fail_3;
goto fail_2;
err = nvgpu_mutex_init(&c->sync_lock);
if (err)
goto fail_4;
goto fail_3;
#if defined(CONFIG_GK20A_CYCLE_STATS)
err = nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
if (err)
goto fail_5;
goto fail_4;
err = nvgpu_mutex_init(&c->cs_client_mutex);
if (err)
goto fail_6;
goto fail_5;
#endif
err = nvgpu_mutex_init(&c->event_id_list_lock);
if (err)
goto fail_7;
goto fail_6;
err = nvgpu_mutex_init(&c->dbg_s_lock);
if (err)
goto fail_8;
goto fail_7;
nvgpu_list_add(&c->free_chs, &g->fifo.free_chs);
return 0;
fail_8:
nvgpu_mutex_destroy(&c->event_id_list_lock);
fail_7:
nvgpu_mutex_destroy(&c->event_id_list_lock);
fail_6:
#if defined(CONFIG_GK20A_CYCLE_STATS)
nvgpu_mutex_destroy(&c->cs_client_mutex);
fail_6:
nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex);
fail_5:
nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex);
fail_4:
#endif
nvgpu_mutex_destroy(&c->sync_lock);
fail_4:
nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
fail_3:
nvgpu_mutex_destroy(&c->joblist.cleanup_lock);
nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
fail_2:
nvgpu_mutex_destroy(&c->error_notifier_mutex);
nvgpu_mutex_destroy(&c->joblist.cleanup_lock);
fail_1:
nvgpu_mutex_destroy(&c->ioctl_lock);

View File

@@ -273,11 +273,6 @@ struct channel_gk20a {
bool timeout_debug_dump;
unsigned int timeslice_us;
struct dma_buf *error_notifier_ref;
struct nvgpu_notification *error_notifier;
void *error_notifier_va;
struct nvgpu_mutex error_notifier_mutex;
struct nvgpu_mutex sync_lock;
struct gk20a_channel_sync *sync;
@@ -335,8 +330,6 @@ bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
void gk20a_disable_channel(struct channel_gk20a *ch);
void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt);
void gk20a_channel_abort_clean_up(struct channel_gk20a *ch);
void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error);
void gk20a_set_error_notifier_locked(struct channel_gk20a *ch, __u32 error);
void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events);
int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size,
struct priv_cmd_entry *entry);

View File

@@ -39,6 +39,7 @@
#include <nvgpu/nvhost.h>
#include <nvgpu/barrier.h>
#include <nvgpu/ctxsw_trace.h>
#include <nvgpu/error_notifier.h>
#include "gk20a.h"
#include "mm_gk20a.h"
@@ -557,7 +558,6 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
nvgpu_mutex_destroy(&tsg->event_id_list_lock);
nvgpu_mutex_destroy(&c->ioctl_lock);
nvgpu_mutex_destroy(&c->error_notifier_mutex);
nvgpu_mutex_destroy(&c->joblist.cleanup_lock);
nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
nvgpu_mutex_destroy(&c->sync_lock);
@@ -1339,14 +1339,10 @@ static bool gk20a_fifo_ch_timeout_debug_dump_state(struct gk20a *g,
if (!refch)
return verbose;
nvgpu_mutex_acquire(&refch->error_notifier_mutex);
if (refch->error_notifier_ref) {
u32 err = refch->error_notifier->info32;
if (nvgpu_is_error_notifier_set(refch,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT))
verbose = refch->timeout_debug_dump;
if (err == NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT)
verbose = refch->timeout_debug_dump;
}
nvgpu_mutex_release(&refch->error_notifier_mutex);
return verbose;
}
@@ -1400,8 +1396,8 @@ void gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
{
nvgpu_err(g,
"channel %d generated a mmu fault", refch->chid);
gk20a_set_error_notifier(refch,
NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT);
nvgpu_set_error_notifier(refch,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);
}
void gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g,
@@ -1939,7 +1935,7 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) {
if (gk20a_channel_get(ch_tsg)) {
gk20a_set_error_notifier(ch_tsg, err_code);
nvgpu_set_error_notifier(ch_tsg, err_code);
gk20a_channel_put(ch_tsg);
}
}
@@ -1947,7 +1943,7 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
gk20a_fifo_recover_tsg(g, ch->tsgid, verbose);
} else {
gk20a_set_error_notifier(ch, err_code);
nvgpu_set_error_notifier(ch, err_code);
gk20a_fifo_recover_ch(g, ch->chid, verbose);
}
@@ -2108,8 +2104,8 @@ static bool gk20a_fifo_check_ch_ctxsw_timeout(struct channel_gk20a *ch,
*verbose = ch->timeout_debug_dump;
*ms = ch->timeout_accumulated_ms;
if (recover)
gk20a_set_error_notifier(ch,
NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
nvgpu_set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
gk20a_channel_put(ch);
}
@@ -2170,8 +2166,8 @@ bool gk20a_fifo_check_tsg_ctxsw_timeout(struct tsg_gk20a *tsg,
gk20a_channel_put(ch);
list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
if (gk20a_channel_get(ch)) {
gk20a_set_error_notifier(ch,
NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
nvgpu_set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
*verbose |= ch->timeout_debug_dump;
gk20a_channel_put(ch);
}
@@ -2413,7 +2409,7 @@ unsigned int gk20a_fifo_handle_pbdma_intr_0(struct gk20a *g, u32 pbdma_id,
rc_type = RC_TYPE_PBDMA_FAULT;
nvgpu_err(g,
"semaphore acquire timeout!");
*error_notifier = NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT;
*error_notifier = NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT;
}
*handled |= pbdma_intr_0_acquire_pending_f();
}
@@ -2431,7 +2427,7 @@ unsigned int gk20a_fifo_handle_pbdma_intr_0(struct gk20a *g, u32 pbdma_id,
if (pbdma_intr_0 & pbdma_intr_0_pbcrc_pending_f()) {
*error_notifier =
NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH;
NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH;
rc_type = RC_TYPE_PBDMA_FAULT;
}
@@ -2485,7 +2481,7 @@ static void gk20a_fifo_pbdma_fault_rc(struct gk20a *g,
struct channel_gk20a *ch = &f->channel[id];
if (gk20a_channel_get(ch)) {
gk20a_set_error_notifier(ch, error_notifier);
nvgpu_set_error_notifier(ch, error_notifier);
gk20a_fifo_recover_ch(g, id, true);
gk20a_channel_put(ch);
}
@@ -2497,7 +2493,7 @@ static void gk20a_fifo_pbdma_fault_rc(struct gk20a *g,
nvgpu_rwsem_down_read(&tsg->ch_list_lock);
list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
if (gk20a_channel_get(ch)) {
gk20a_set_error_notifier(ch,
nvgpu_set_error_notifier(ch,
error_notifier);
gk20a_channel_put(ch);
}
@@ -2514,7 +2510,7 @@ u32 gk20a_fifo_handle_pbdma_intr(struct gk20a *g, struct fifo_gk20a *f,
u32 pbdma_intr_1 = gk20a_readl(g, pbdma_intr_1_r(pbdma_id));
u32 handled = 0;
u32 error_notifier = NVGPU_CHANNEL_PBDMA_ERROR;
u32 error_notifier = NVGPU_ERR_NOTIFIER_PBDMA_ERROR;
unsigned int rc_type = RC_TYPE_NO_RC;
if (pbdma_intr_0) {
@@ -2658,8 +2654,8 @@ void __locked_fifo_preempt_timeout_rc(struct gk20a *g, u32 id,
list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
if (!gk20a_channel_get(ch))
continue;
gk20a_set_error_notifier(ch,
NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
nvgpu_set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
gk20a_channel_put(ch);
}
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
@@ -2671,8 +2667,8 @@ void __locked_fifo_preempt_timeout_rc(struct gk20a *g, u32 id,
"preempt channel %d timeout", id);
if (gk20a_channel_get(ch)) {
gk20a_set_error_notifier(ch,
NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
nvgpu_set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
gk20a_fifo_recover_ch(g, id, true);
gk20a_channel_put(ch);
}

View File

@@ -40,6 +40,7 @@
#include <nvgpu/barrier.h>
#include <nvgpu/mm.h>
#include <nvgpu/ctxsw_trace.h>
#include <nvgpu/error_notifier.h>
#include "gk20a.h"
#include "gr_ctx_gk20a.h"
@@ -5113,14 +5114,14 @@ static void gk20a_gr_set_error_notifier(struct gk20a *g,
nvgpu_rwsem_down_read(&tsg->ch_list_lock);
list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) {
if (gk20a_channel_get(ch_tsg)) {
gk20a_set_error_notifier(ch_tsg,
nvgpu_set_error_notifier(ch_tsg,
error_notifier);
gk20a_channel_put(ch_tsg);
}
}
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
} else {
gk20a_set_error_notifier(ch, error_notifier);
nvgpu_set_error_notifier(ch, error_notifier);
}
}
}
@@ -5130,7 +5131,7 @@ static int gk20a_gr_handle_semaphore_timeout_pending(struct gk20a *g,
{
gk20a_dbg_fn("");
gk20a_gr_set_error_notifier(g, isr_data,
NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT);
NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT);
nvgpu_err(g,
"gr semaphore timeout");
return -EINVAL;
@@ -5141,7 +5142,7 @@ static int gk20a_gr_intr_illegal_notify_pending(struct gk20a *g,
{
gk20a_dbg_fn("");
gk20a_gr_set_error_notifier(g, isr_data,
NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY);
NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY);
/* This is an unrecoverable error, reset is needed */
nvgpu_err(g,
"gr semaphore timeout");
@@ -5156,7 +5157,7 @@ static int gk20a_gr_handle_illegal_method(struct gk20a *g,
isr_data->data_lo);
if (ret) {
gk20a_gr_set_error_notifier(g, isr_data,
NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY);
NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY);
nvgpu_err(g, "invalid method class 0x%08x"
", offset 0x%08x address 0x%08x",
isr_data->class_num, isr_data->offset, isr_data->addr);
@@ -5169,7 +5170,7 @@ static int gk20a_gr_handle_illegal_class(struct gk20a *g,
{
gk20a_dbg_fn("");
gk20a_gr_set_error_notifier(g, isr_data,
NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
nvgpu_err(g,
"invalid class 0x%08x, offset 0x%08x",
isr_data->class_num, isr_data->offset);
@@ -5193,7 +5194,7 @@ int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
if (gr_fecs_intr & gr_fecs_host_int_status_umimp_firmware_method_f(1)) {
gk20a_gr_set_error_notifier(g, isr_data,
NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD);
NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD);
nvgpu_err(g,
"firmware method error 0x%08x for offset 0x%04x",
gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(6)),
@@ -5215,7 +5216,7 @@ static int gk20a_gr_handle_class_error(struct gk20a *g,
gr_class_error =
gr_class_error_code_v(gk20a_readl(g, gr_class_error_r()));
gk20a_gr_set_error_notifier(g, isr_data,
NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
nvgpu_err(g, "class error 0x%08x, offset 0x%08x,"
"sub channel 0x%08x mme generated %d,"
" mme pc 0x%08xdata high %d priv status %d"
@@ -5244,7 +5245,7 @@ static int gk20a_gr_handle_firmware_method(struct gk20a *g,
gk20a_dbg_fn("");
gk20a_gr_set_error_notifier(g, isr_data,
NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
nvgpu_err(g,
"firmware method 0x%08x, offset 0x%08x for channel %u",
isr_data->class_num, isr_data->offset,
@@ -6024,7 +6025,7 @@ int gk20a_gr_isr(struct gk20a *g)
if (need_reset) {
nvgpu_err(g, "set gr exception notifier");
gk20a_gr_set_error_notifier(g, &isr_data,
NVGPU_CHANNEL_GR_EXCEPTION);
NVGPU_ERR_NOTIFIER_GR_EXCEPTION);
}
}