diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c index 8366ed884..1ae2d4443 100644 --- a/drivers/gpu/nvgpu/common/linux/channel.c +++ b/drivers/gpu/nvgpu/common/linux/channel.c @@ -17,6 +17,7 @@ #include #include #include +#include /* * This is required for nvgpu_vm_find_buf() which is used in the tracing @@ -37,6 +38,124 @@ #include #include +/* + * API to convert error_notifiers in common code and of the form + * NVGPU_ERR_NOTIFIER_* into Linux specific error_notifiers exposed to user + * space and of the form NVGPU_CHANNEL_* + */ +static u32 nvgpu_error_notifier_to_channel_notifier(u32 error_notifier) +{ + switch (error_notifier) { + case NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT: + return NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT; + case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD: + return NVGPU_CHANNEL_GR_ERROR_SW_METHOD; + case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY: + return NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY; + case NVGPU_ERR_NOTIFIER_GR_EXCEPTION: + return NVGPU_CHANNEL_GR_EXCEPTION; + case NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT: + return NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT; + case NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY: + return NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY; + case NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT: + return NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT; + case NVGPU_ERR_NOTIFIER_PBDMA_ERROR: + return NVGPU_CHANNEL_PBDMA_ERROR; + case NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD: + return NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD; + case NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR: + return NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR; + case NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH: + return NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH; + } + + pr_warn("%s: invalid error_notifier requested %u\n", __func__, error_notifier); + + return error_notifier; +} + +/** + * nvgpu_set_error_notifier_locked() + * Should be called with ch->error_notifier_mutex held + * + * error should be of the form NVGPU_ERR_NOTIFIER_* + */ +void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + error = nvgpu_error_notifier_to_channel_notifier(error); + + if (priv->error_notifier.dmabuf) { + struct nvgpu_notification *notification = + priv->error_notifier.notification; + struct timespec time_data; + u64 nsec; + + getnstimeofday(&time_data); + nsec = ((u64)time_data.tv_sec) * 1000000000u + + (u64)time_data.tv_nsec; + notification->time_stamp.nanoseconds[0] = + (u32)nsec; + notification->time_stamp.nanoseconds[1] = + (u32)(nsec >> 32); + notification->info32 = error; + notification->status = 0xffff; + + nvgpu_err(ch->g, + "error notifier set to %d for ch %d", error, ch->chid); + } +} + +/* error should be of the form NVGPU_ERR_NOTIFIER_* */ +void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + nvgpu_mutex_acquire(&priv->error_notifier.mutex); + nvgpu_set_error_notifier_locked(ch, error); + nvgpu_mutex_release(&priv->error_notifier.mutex); +} + +void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + nvgpu_mutex_acquire(&priv->error_notifier.mutex); + if (priv->error_notifier.dmabuf) { + struct nvgpu_notification *notification = + priv->error_notifier.notification; + + /* Don't overwrite error flag if it is already set */ + if (notification->status != 0xffff) + nvgpu_set_error_notifier_locked(ch, error); + } + nvgpu_mutex_release(&priv->error_notifier.mutex); +} + +/* error_notifier should be of the form NVGPU_ERR_NOTIFIER_* */ +bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + bool notifier_set = false; + + error_notifier = nvgpu_error_notifier_to_channel_notifier(error_notifier); + + nvgpu_mutex_acquire(&priv->error_notifier.mutex); + if (priv->error_notifier.dmabuf) { + struct nvgpu_notification *notification = + priv->error_notifier.notification; + u32 err = notification->info32; + + if (err == error_notifier) + notifier_set = true; + } + nvgpu_mutex_release(&priv->error_notifier.mutex); + + return notifier_set; +} + static void gk20a_channel_update_runcb_fn(struct work_struct *work) { struct nvgpu_channel_completion_cb *completion_cb = @@ -128,6 +247,7 @@ static void nvgpu_channel_close_linux(struct channel_gk20a *ch) static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch) { struct nvgpu_channel_linux *priv; + int err; priv = nvgpu_kzalloc(g, sizeof(*priv)); if (!priv) @@ -136,6 +256,12 @@ static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch) ch->os_priv = priv; priv->ch = ch; + err = nvgpu_mutex_init(&priv->error_notifier.mutex); + if (err) { + nvgpu_kfree(g, priv); + return err; + } + nvgpu_channel_work_completion_init(ch); return 0; @@ -143,7 +269,10 @@ static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch) static void nvgpu_channel_free_linux(struct gk20a *g, struct channel_gk20a *ch) { - nvgpu_kfree(g, ch->os_priv); + struct nvgpu_channel_linux *priv = ch->os_priv; + + nvgpu_mutex_destroy(&priv->error_notifier.mutex); + nvgpu_kfree(g, priv); } int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l) diff --git a/drivers/gpu/nvgpu/common/linux/channel.h b/drivers/gpu/nvgpu/common/linux/channel.h index ba1935f37..a4df75d6f 100644 --- a/drivers/gpu/nvgpu/common/linux/channel.h +++ b/drivers/gpu/nvgpu/common/linux/channel.h @@ -42,10 +42,20 @@ struct nvgpu_channel_completion_cb { struct work_struct work; }; +struct nvgpu_error_notifier { + struct dma_buf *dmabuf; + void *vaddr; + + struct nvgpu_notification *notification; + + struct nvgpu_mutex mutex; +}; + struct nvgpu_channel_linux { struct channel_gk20a *ch; struct nvgpu_channel_completion_cb completion_cb; + struct nvgpu_error_notifier error_notifier; }; int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l); diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c index 0ac50140c..67bec31b2 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "gk20a/gk20a.h" #include "gk20a/dbg_gpu_gk20a.h" @@ -227,15 +228,17 @@ static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch, static void gk20a_channel_free_error_notifiers(struct channel_gk20a *ch) { - nvgpu_mutex_acquire(&ch->error_notifier_mutex); - if (ch->error_notifier_ref) { - dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va); - dma_buf_put(ch->error_notifier_ref); - ch->error_notifier_ref = NULL; - ch->error_notifier = NULL; - ch->error_notifier_va = NULL; + struct nvgpu_channel_linux *priv = ch->os_priv; + + nvgpu_mutex_acquire(&priv->error_notifier.mutex); + if (priv->error_notifier.dmabuf) { + dma_buf_vunmap(priv->error_notifier.dmabuf, priv->error_notifier.vaddr); + dma_buf_put(priv->error_notifier.dmabuf); + priv->error_notifier.dmabuf = NULL; + priv->error_notifier.notification = NULL; + priv->error_notifier.vaddr = NULL; } - nvgpu_mutex_release(&ch->error_notifier_mutex); + nvgpu_mutex_release(&priv->error_notifier.mutex); } static int gk20a_init_error_notifier(struct channel_gk20a *ch, @@ -244,6 +247,7 @@ static int gk20a_init_error_notifier(struct channel_gk20a *ch, struct dma_buf *dmabuf; void *va; u64 end = args->offset + sizeof(struct nvgpu_notification); + struct nvgpu_channel_linux *priv = ch->os_priv; if (!args->mem) { pr_err("gk20a_init_error_notifier: invalid memory handle\n"); @@ -273,14 +277,15 @@ static int gk20a_init_error_notifier(struct channel_gk20a *ch, return -ENOMEM; } - ch->error_notifier = va + args->offset; - ch->error_notifier_va = va; - memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification)); + priv->error_notifier.notification = va + args->offset; + priv->error_notifier.vaddr = va; + memset(priv->error_notifier.notification, 0, + sizeof(struct nvgpu_notification)); /* set channel notifiers pointer */ - nvgpu_mutex_acquire(&ch->error_notifier_mutex); - ch->error_notifier_ref = dmabuf; - nvgpu_mutex_release(&ch->error_notifier_mutex); + nvgpu_mutex_acquire(&priv->error_notifier.mutex); + priv->error_notifier.dmabuf = dmabuf; + nvgpu_mutex_release(&priv->error_notifier.mutex); return 0; } @@ -1361,7 +1366,7 @@ long gk20a_channel_ioctl(struct file *filp, break; } err = ch->g->ops.fifo.force_reset_ch(ch, - NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR, true); + NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR, true); gk20a_idle(ch->g); break; case NVGPU_IOCTL_CHANNEL_EVENT_ID_CTRL: diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.c index cdcecca5b..7a2a02e94 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.c @@ -25,10 +25,13 @@ #include #include #include +#include #include "vgpu.h" #include "fifo_vgpu.h" +#include "common/linux/channel.h" + #include #include @@ -691,7 +694,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch, list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) { if (gk20a_channel_get(ch_tsg)) { - gk20a_set_error_notifier(ch_tsg, err_code); + nvgpu_set_error_notifier(ch_tsg, err_code); ch_tsg->has_timedout = true; gk20a_channel_put(ch_tsg); } @@ -699,7 +702,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch, nvgpu_rwsem_up_read(&tsg->ch_list_lock); } else { - gk20a_set_error_notifier(ch, err_code); + nvgpu_set_error_notifier(ch, err_code); ch->has_timedout = true; } @@ -716,19 +719,14 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch, static void vgpu_fifo_set_ctx_mmu_error_ch(struct gk20a *g, struct channel_gk20a *ch) { - nvgpu_mutex_acquire(&ch->error_notifier_mutex); - if (ch->error_notifier_ref) { - if (ch->error_notifier->status == 0xffff) { - /* If error code is already set, this mmu fault - * was triggered as part of recovery from other - * error condition. - * Don't overwrite error flag. */ - } else { - gk20a_set_error_notifier_locked(ch, - NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT); - } - } - nvgpu_mutex_release(&ch->error_notifier_mutex); + /* + * If error code is already set, this mmu fault + * was triggered as part of recovery from other + * error condition. + * Don't overwrite error flag. + */ + nvgpu_set_error_notifier_if_empty(ch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT); /* mark channel as faulted */ ch->has_timedout = true; @@ -778,11 +776,11 @@ int vgpu_fifo_isr(struct gk20a *g, struct tegra_vgpu_fifo_intr_info *info) switch (info->type) { case TEGRA_VGPU_FIFO_INTR_PBDMA: - gk20a_set_error_notifier(ch, NVGPU_CHANNEL_PBDMA_ERROR); + nvgpu_set_error_notifier(ch, NVGPU_ERR_NOTIFIER_PBDMA_ERROR); break; case TEGRA_VGPU_FIFO_INTR_CTXSW_TIMEOUT: - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); + nvgpu_set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT); break; case TEGRA_VGPU_FIFO_INTR_MMU_FAULT: vgpu_fifo_set_ctx_mmu_error_ch_tsg(g, ch); diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c index dd2ae3061..33551d17c 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c @@ -20,6 +20,7 @@ #include #include +#include #include "vgpu.h" #include "gr_vgpu.h" @@ -941,31 +942,31 @@ int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info) nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq); break; case TEGRA_VGPU_GR_INTR_SEMAPHORE_TIMEOUT: - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT); + nvgpu_set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT); break; case TEGRA_VGPU_GR_INTR_ILLEGAL_NOTIFY: - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY); + nvgpu_set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY); case TEGRA_VGPU_GR_INTR_ILLEGAL_METHOD: break; case TEGRA_VGPU_GR_INTR_ILLEGAL_CLASS: - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); + nvgpu_set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY); break; case TEGRA_VGPU_GR_INTR_FECS_ERROR: break; case TEGRA_VGPU_GR_INTR_CLASS_ERROR: - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); + nvgpu_set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY); break; case TEGRA_VGPU_GR_INTR_FIRMWARE_METHOD: - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); + nvgpu_set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY); break; case TEGRA_VGPU_GR_INTR_EXCEPTION: - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); + nvgpu_set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY); break; case TEGRA_VGPU_GR_INTR_SM_EXCEPTION: gk20a_dbg_gpu_post_events(ch); diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index e01d6cdba..a04158612 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -44,6 +44,7 @@ #include #include #include +#include #include "gk20a.h" #include "dbg_gpu_gk20a.h" @@ -339,37 +340,6 @@ int gk20a_channel_set_runlist_interleave(struct channel_gk20a *ch, return ret ? ret : g->ops.fifo.update_runlist(g, ch->runlist_id, ~0, true, true); } -/** - * gk20a_set_error_notifier_locked() - * Should be called with ch->error_notifier_mutex held - */ -void gk20a_set_error_notifier_locked(struct channel_gk20a *ch, __u32 error) -{ - if (ch->error_notifier_ref) { - struct timespec time_data; - u64 nsec; - getnstimeofday(&time_data); - nsec = ((u64)time_data.tv_sec) * 1000000000u + - (u64)time_data.tv_nsec; - ch->error_notifier->time_stamp.nanoseconds[0] = - (u32)nsec; - ch->error_notifier->time_stamp.nanoseconds[1] = - (u32)(nsec >> 32); - ch->error_notifier->info32 = error; - ch->error_notifier->status = 0xffff; - - nvgpu_err(ch->g, - "error notifier set to %d for ch %d", error, ch->chid); - } -} - -void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error) -{ - nvgpu_mutex_acquire(&ch->error_notifier_mutex); - gk20a_set_error_notifier_locked(ch, error); - nvgpu_mutex_release(&ch->error_notifier_mutex); -} - static void gk20a_wait_until_counter_is_N( struct channel_gk20a *ch, nvgpu_atomic_t *counter, int wait_value, struct nvgpu_cond *c, const char *caller, const char *counter_name) @@ -1550,7 +1520,7 @@ static void gk20a_channel_timeout_handler(struct channel_gk20a *ch) gk20a_gr_debug_dump(g); g->ops.fifo.force_reset_ch(ch, - NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT, true); + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT, true); } /** @@ -2210,53 +2180,48 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid) err = nvgpu_mutex_init(&c->ioctl_lock); if (err) return err; - err = nvgpu_mutex_init(&c->error_notifier_mutex); - if (err) - goto fail_1; err = nvgpu_mutex_init(&c->joblist.cleanup_lock); if (err) - goto fail_2; + goto fail_1; err = nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock); if (err) - goto fail_3; + goto fail_2; err = nvgpu_mutex_init(&c->sync_lock); if (err) - goto fail_4; + goto fail_3; #if defined(CONFIG_GK20A_CYCLE_STATS) err = nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex); if (err) - goto fail_5; + goto fail_4; err = nvgpu_mutex_init(&c->cs_client_mutex); if (err) - goto fail_6; + goto fail_5; #endif err = nvgpu_mutex_init(&c->event_id_list_lock); if (err) - goto fail_7; + goto fail_6; err = nvgpu_mutex_init(&c->dbg_s_lock); if (err) - goto fail_8; + goto fail_7; nvgpu_list_add(&c->free_chs, &g->fifo.free_chs); return 0; -fail_8: - nvgpu_mutex_destroy(&c->event_id_list_lock); fail_7: + nvgpu_mutex_destroy(&c->event_id_list_lock); +fail_6: #if defined(CONFIG_GK20A_CYCLE_STATS) nvgpu_mutex_destroy(&c->cs_client_mutex); -fail_6: - nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex); fail_5: + nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex); +fail_4: #endif nvgpu_mutex_destroy(&c->sync_lock); -fail_4: - nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock); fail_3: - nvgpu_mutex_destroy(&c->joblist.cleanup_lock); + nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock); fail_2: - nvgpu_mutex_destroy(&c->error_notifier_mutex); + nvgpu_mutex_destroy(&c->joblist.cleanup_lock); fail_1: nvgpu_mutex_destroy(&c->ioctl_lock); diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 8c9095b2a..ff96d0d7d 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -273,11 +273,6 @@ struct channel_gk20a { bool timeout_debug_dump; unsigned int timeslice_us; - struct dma_buf *error_notifier_ref; - struct nvgpu_notification *error_notifier; - void *error_notifier_va; - struct nvgpu_mutex error_notifier_mutex; - struct nvgpu_mutex sync_lock; struct gk20a_channel_sync *sync; @@ -335,8 +330,6 @@ bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch, void gk20a_disable_channel(struct channel_gk20a *ch); void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt); void gk20a_channel_abort_clean_up(struct channel_gk20a *ch); -void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error); -void gk20a_set_error_notifier_locked(struct channel_gk20a *ch, __u32 error); void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events); int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size, struct priv_cmd_entry *entry); diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 648a8c863..38aecc93b 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "gk20a.h" #include "mm_gk20a.h" @@ -557,7 +558,6 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f) nvgpu_mutex_destroy(&tsg->event_id_list_lock); nvgpu_mutex_destroy(&c->ioctl_lock); - nvgpu_mutex_destroy(&c->error_notifier_mutex); nvgpu_mutex_destroy(&c->joblist.cleanup_lock); nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock); nvgpu_mutex_destroy(&c->sync_lock); @@ -1339,14 +1339,10 @@ static bool gk20a_fifo_ch_timeout_debug_dump_state(struct gk20a *g, if (!refch) return verbose; - nvgpu_mutex_acquire(&refch->error_notifier_mutex); - if (refch->error_notifier_ref) { - u32 err = refch->error_notifier->info32; + if (nvgpu_is_error_notifier_set(refch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT)) + verbose = refch->timeout_debug_dump; - if (err == NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT) - verbose = refch->timeout_debug_dump; - } - nvgpu_mutex_release(&refch->error_notifier_mutex); return verbose; } @@ -1400,8 +1396,8 @@ void gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g, { nvgpu_err(g, "channel %d generated a mmu fault", refch->chid); - gk20a_set_error_notifier(refch, - NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT); + nvgpu_set_error_notifier(refch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT); } void gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g, @@ -1939,7 +1935,7 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) { if (gk20a_channel_get(ch_tsg)) { - gk20a_set_error_notifier(ch_tsg, err_code); + nvgpu_set_error_notifier(ch_tsg, err_code); gk20a_channel_put(ch_tsg); } } @@ -1947,7 +1943,7 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, nvgpu_rwsem_up_read(&tsg->ch_list_lock); gk20a_fifo_recover_tsg(g, ch->tsgid, verbose); } else { - gk20a_set_error_notifier(ch, err_code); + nvgpu_set_error_notifier(ch, err_code); gk20a_fifo_recover_ch(g, ch->chid, verbose); } @@ -2108,8 +2104,8 @@ static bool gk20a_fifo_check_ch_ctxsw_timeout(struct channel_gk20a *ch, *verbose = ch->timeout_debug_dump; *ms = ch->timeout_accumulated_ms; if (recover) - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); + nvgpu_set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT); gk20a_channel_put(ch); } @@ -2170,8 +2166,8 @@ bool gk20a_fifo_check_tsg_ctxsw_timeout(struct tsg_gk20a *tsg, gk20a_channel_put(ch); list_for_each_entry(ch, &tsg->ch_list, ch_entry) { if (gk20a_channel_get(ch)) { - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); + nvgpu_set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT); *verbose |= ch->timeout_debug_dump; gk20a_channel_put(ch); } @@ -2413,7 +2409,7 @@ unsigned int gk20a_fifo_handle_pbdma_intr_0(struct gk20a *g, u32 pbdma_id, rc_type = RC_TYPE_PBDMA_FAULT; nvgpu_err(g, "semaphore acquire timeout!"); - *error_notifier = NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT; + *error_notifier = NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT; } *handled |= pbdma_intr_0_acquire_pending_f(); } @@ -2431,7 +2427,7 @@ unsigned int gk20a_fifo_handle_pbdma_intr_0(struct gk20a *g, u32 pbdma_id, if (pbdma_intr_0 & pbdma_intr_0_pbcrc_pending_f()) { *error_notifier = - NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH; + NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH; rc_type = RC_TYPE_PBDMA_FAULT; } @@ -2485,7 +2481,7 @@ static void gk20a_fifo_pbdma_fault_rc(struct gk20a *g, struct channel_gk20a *ch = &f->channel[id]; if (gk20a_channel_get(ch)) { - gk20a_set_error_notifier(ch, error_notifier); + nvgpu_set_error_notifier(ch, error_notifier); gk20a_fifo_recover_ch(g, id, true); gk20a_channel_put(ch); } @@ -2497,7 +2493,7 @@ static void gk20a_fifo_pbdma_fault_rc(struct gk20a *g, nvgpu_rwsem_down_read(&tsg->ch_list_lock); list_for_each_entry(ch, &tsg->ch_list, ch_entry) { if (gk20a_channel_get(ch)) { - gk20a_set_error_notifier(ch, + nvgpu_set_error_notifier(ch, error_notifier); gk20a_channel_put(ch); } @@ -2514,7 +2510,7 @@ u32 gk20a_fifo_handle_pbdma_intr(struct gk20a *g, struct fifo_gk20a *f, u32 pbdma_intr_1 = gk20a_readl(g, pbdma_intr_1_r(pbdma_id)); u32 handled = 0; - u32 error_notifier = NVGPU_CHANNEL_PBDMA_ERROR; + u32 error_notifier = NVGPU_ERR_NOTIFIER_PBDMA_ERROR; unsigned int rc_type = RC_TYPE_NO_RC; if (pbdma_intr_0) { @@ -2658,8 +2654,8 @@ void __locked_fifo_preempt_timeout_rc(struct gk20a *g, u32 id, list_for_each_entry(ch, &tsg->ch_list, ch_entry) { if (!gk20a_channel_get(ch)) continue; - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); + nvgpu_set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT); gk20a_channel_put(ch); } nvgpu_rwsem_up_read(&tsg->ch_list_lock); @@ -2671,8 +2667,8 @@ void __locked_fifo_preempt_timeout_rc(struct gk20a *g, u32 id, "preempt channel %d timeout", id); if (gk20a_channel_get(ch)) { - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); + nvgpu_set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT); gk20a_fifo_recover_ch(g, id, true); gk20a_channel_put(ch); } diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 270d36d69..b3969b603 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "gk20a.h" #include "gr_ctx_gk20a.h" @@ -5113,14 +5114,14 @@ static void gk20a_gr_set_error_notifier(struct gk20a *g, nvgpu_rwsem_down_read(&tsg->ch_list_lock); list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) { if (gk20a_channel_get(ch_tsg)) { - gk20a_set_error_notifier(ch_tsg, + nvgpu_set_error_notifier(ch_tsg, error_notifier); gk20a_channel_put(ch_tsg); } } nvgpu_rwsem_up_read(&tsg->ch_list_lock); } else { - gk20a_set_error_notifier(ch, error_notifier); + nvgpu_set_error_notifier(ch, error_notifier); } } } @@ -5130,7 +5131,7 @@ static int gk20a_gr_handle_semaphore_timeout_pending(struct gk20a *g, { gk20a_dbg_fn(""); gk20a_gr_set_error_notifier(g, isr_data, - NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT); + NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT); nvgpu_err(g, "gr semaphore timeout"); return -EINVAL; @@ -5141,7 +5142,7 @@ static int gk20a_gr_intr_illegal_notify_pending(struct gk20a *g, { gk20a_dbg_fn(""); gk20a_gr_set_error_notifier(g, isr_data, - NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY); + NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY); /* This is an unrecoverable error, reset is needed */ nvgpu_err(g, "gr semaphore timeout"); @@ -5156,7 +5157,7 @@ static int gk20a_gr_handle_illegal_method(struct gk20a *g, isr_data->data_lo); if (ret) { gk20a_gr_set_error_notifier(g, isr_data, - NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY); + NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY); nvgpu_err(g, "invalid method class 0x%08x" ", offset 0x%08x address 0x%08x", isr_data->class_num, isr_data->offset, isr_data->addr); @@ -5169,7 +5170,7 @@ static int gk20a_gr_handle_illegal_class(struct gk20a *g, { gk20a_dbg_fn(""); gk20a_gr_set_error_notifier(g, isr_data, - NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY); nvgpu_err(g, "invalid class 0x%08x, offset 0x%08x", isr_data->class_num, isr_data->offset); @@ -5193,7 +5194,7 @@ int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch, if (gr_fecs_intr & gr_fecs_host_int_status_umimp_firmware_method_f(1)) { gk20a_gr_set_error_notifier(g, isr_data, - NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD); + NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD); nvgpu_err(g, "firmware method error 0x%08x for offset 0x%04x", gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(6)), @@ -5215,7 +5216,7 @@ static int gk20a_gr_handle_class_error(struct gk20a *g, gr_class_error = gr_class_error_code_v(gk20a_readl(g, gr_class_error_r())); gk20a_gr_set_error_notifier(g, isr_data, - NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY); nvgpu_err(g, "class error 0x%08x, offset 0x%08x," "sub channel 0x%08x mme generated %d," " mme pc 0x%08xdata high %d priv status %d" @@ -5244,7 +5245,7 @@ static int gk20a_gr_handle_firmware_method(struct gk20a *g, gk20a_dbg_fn(""); gk20a_gr_set_error_notifier(g, isr_data, - NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY); nvgpu_err(g, "firmware method 0x%08x, offset 0x%08x for channel %u", isr_data->class_num, isr_data->offset, @@ -6024,7 +6025,7 @@ int gk20a_gr_isr(struct gk20a *g) if (need_reset) { nvgpu_err(g, "set gr exception notifier"); gk20a_gr_set_error_notifier(g, &isr_data, - NVGPU_CHANNEL_GR_EXCEPTION); + NVGPU_ERR_NOTIFIER_GR_EXCEPTION); } } diff --git a/drivers/gpu/nvgpu/include/nvgpu/error_notifier.h b/drivers/gpu/nvgpu/include/nvgpu/error_notifier.h new file mode 100644 index 000000000..3e70c3799 --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/error_notifier.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __NVGPU_ERROR_NOTIFIER_H__ +#define __NVGPU_ERROR_NOTIFIER_H__ + +#include + +struct channel_gk20a; + +enum { + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT = 0, + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD, + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY, + NVGPU_ERR_NOTIFIER_GR_EXCEPTION, + NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT, + NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT, + NVGPU_ERR_NOTIFIER_PBDMA_ERROR, + NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD, + NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR, + NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH, +}; + +void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error); +void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error); +void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error); +bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier); + +#endif /* __NVGPU_ERROR_NOTIFIER_H__ */