From 1f54ea09e3445d9ca3cf7a69b4967849cc9defc8 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Fri, 19 Oct 2018 12:08:46 -0700 Subject: [PATCH] gpu: nvgpu: rename has_timedout and make it thread safe Currently has_timedout variable is protected by wmb at places where it is being set and there is no correspoding rmb whenever has_timedout variable is read. This is prone to errors for concurrent execution. This change is supposed to fix this issue. Rename has_timedout variable of channel struct to ch_timedout. Also to avoid rmb every time ch_timedout is read, ch_timedout_spinlock is added to protect ch_timedout variable for taking care of concurrent execution. Bug 2404865 Bug 2092051 Change-Id: I0bee9f50af0a48720aa8b54cbc3af97ef9f6df00 Signed-off-by: Seema Khowala Reviewed-on: https://git-master.nvidia.com/r/1930935 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/fifo/channel.c | 26 +++++++++++++++++++--- drivers/gpu/nvgpu/common/fifo/submit.c | 4 ++-- drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 8 +++---- drivers/gpu/nvgpu/include/nvgpu/channel.h | 6 ++++- drivers/gpu/nvgpu/os/linux/cde.c | 5 +++-- drivers/gpu/nvgpu/os/linux/ioctl_channel.c | 17 +++++++++----- drivers/gpu/nvgpu/os/linux/ioctl_tsg.c | 2 +- drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | 8 +++---- drivers/gpu/nvgpu/vgpu/vgpu.c | 2 +- 9 files changed, 54 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c index d8c9487ab..33ce8d0fc 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel.c +++ b/drivers/gpu/nvgpu/common/fifo/channel.c @@ -212,6 +212,24 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch) gk20a_channel_update(ch); } +void gk20a_channel_set_timedout(struct channel_gk20a *ch) +{ + nvgpu_spinlock_acquire(&ch->ch_timedout_lock); + ch->ch_timedout = true; + nvgpu_spinlock_release(&ch->ch_timedout_lock); +} + +bool gk20a_channel_check_timedout(struct channel_gk20a *ch) +{ + bool ch_timedout_status; + + nvgpu_spinlock_acquire(&ch->ch_timedout_lock); + ch_timedout_status = ch->ch_timedout; + nvgpu_spinlock_release(&ch->ch_timedout_lock); + + return ch_timedout_status; +} + void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt) { nvgpu_log_fn(ch->g, " "); @@ -221,7 +239,7 @@ void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt) } /* make sure new kickoffs are prevented */ - ch->has_timedout = true; + gk20a_channel_set_timedout(ch); ch->g->ops.fifo.disable_channel(ch); @@ -423,7 +441,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force) * Set user managed syncpoint to safe state * But it's already done if channel has timedout */ - if (ch->has_timedout) { + if (gk20a_channel_check_timedout(ch)) { nvgpu_channel_sync_destroy(ch->user_sync, false); } else { nvgpu_channel_sync_destroy(ch->user_sync, true); @@ -697,7 +715,7 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g, /* set gr host default timeout */ ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g); ch->timeout_debug_dump = true; - ch->has_timedout = false; + ch->ch_timedout = false; /* init kernel watchdog timeout */ ch->timeout.enabled = true; @@ -2187,6 +2205,8 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid) c->referenceable = false; nvgpu_cond_init(&c->ref_count_dec_wq); + nvgpu_spinlock_init(&c->ch_timedout_lock); + #if GK20A_CHANNEL_REFCOUNT_TRACKING nvgpu_spinlock_init(&c->ref_actions_lock); #endif diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c index 81f8b5605..886264533 100644 --- a/drivers/gpu/nvgpu/common/fifo/submit.c +++ b/drivers/gpu/nvgpu/common/fifo/submit.c @@ -348,7 +348,7 @@ static int nvgpu_submit_channel_gpfifo(struct channel_gk20a *c, return -ENODEV; } - if (c->has_timedout) { + if (gk20a_channel_check_timedout(c)) { return -ETIMEDOUT; } @@ -513,7 +513,7 @@ static int nvgpu_submit_channel_gpfifo(struct channel_gk20a *c, } } - if (c->has_timedout) { + if (gk20a_channel_check_timedout(c)) { err = -ETIMEDOUT; goto clean_up; } diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index f3a2245fc..cda44b360 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -1383,8 +1383,8 @@ static void gk20a_fifo_set_has_timedout_and_wake_up_wqs(struct gk20a *g, { if (refch != NULL) { /* mark channel as faulted */ - refch->has_timedout = true; - nvgpu_smp_wmb(); + gk20a_channel_set_timedout(refch); + /* unblock pending waits */ nvgpu_cond_broadcast_interruptible(&refch->semaphore_wq); nvgpu_cond_broadcast_interruptible(&refch->notifier_wq); @@ -1468,7 +1468,7 @@ void gk20a_fifo_abort_tsg(struct gk20a *g, u32 tsgid, bool preempt) nvgpu_rwsem_down_read(&tsg->ch_list_lock); nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) { if (gk20a_channel_get(ch) != NULL) { - ch->has_timedout = true; + gk20a_channel_set_timedout(ch); if (ch->g->ops.fifo.ch_abort_clean_up != NULL) { ch->g->ops.fifo.ch_abort_clean_up(ch); } @@ -2093,7 +2093,7 @@ int gk20a_fifo_tsg_unbind_channel(struct channel_gk20a *ch) /* If one channel in TSG times out, we disable all channels */ nvgpu_rwsem_down_write(&tsg->ch_list_lock); - tsg_timedout = ch->has_timedout; + tsg_timedout = gk20a_channel_check_timedout(ch); nvgpu_rwsem_up_write(&tsg->ch_list_lock); /* Disable TSG and examine status before unbinding channel */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h index ddb21c7ef..aa941d8d9 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h @@ -300,6 +300,8 @@ struct channel_gk20a { struct nvgpu_mem ctx_header; + struct nvgpu_spinlock ch_timedout_lock; + bool ch_timedout; /* Any operating system specific data. */ void *os_priv; @@ -315,7 +317,6 @@ struct channel_gk20a { u32 runlist_id; bool mmu_nack_handled; - bool has_timedout; bool referenceable; bool vpr; bool deterministic; @@ -461,4 +462,7 @@ static inline void trace_write_pushbuffers(struct channel_gk20a *c, u32 count) } #endif +void gk20a_channel_set_timedout(struct channel_gk20a *ch); +bool gk20a_channel_check_timedout(struct channel_gk20a *ch); + #endif diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c index 98a5f8686..df80bbd3e 100644 --- a/drivers/gpu/nvgpu/os/linux/cde.c +++ b/drivers/gpu/nvgpu/os/linux/cde.c @@ -1275,7 +1275,7 @@ __releases(&cde_app->mutex) nvgpu_log_info(g, "double finish cde context %p on channel %p", cde_ctx, ch); - if (ch->has_timedout) { + if (gk20a_channel_check_timedout(ch)) { if (cde_ctx->is_temporary) { nvgpu_warn(g, "cde: channel had timed out" @@ -1302,8 +1302,9 @@ __releases(&cde_app->mutex) msecs_to_jiffies(CTX_DELETE_TIME)); } - if (!ch->has_timedout) + if (!gk20a_channel_check_timedout(ch)) { gk20a_cde_ctx_release(cde_ctx); + } } static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c index 9fd7aa677..5133074b7 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c @@ -661,8 +661,9 @@ static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch, int ret = 0; /* do not wait if channel has timed out */ - if (ch->has_timedout) + if (gk20a_channel_check_timedout(ch)) { return -ETIMEDOUT; + } dmabuf = dma_buf_get(id); if (IS_ERR(dmabuf)) { @@ -681,7 +682,8 @@ static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch, ret = NVGPU_COND_WAIT_INTERRUPTIBLE( &ch->semaphore_wq, - *semaphore == payload || ch->has_timedout, + *semaphore == payload || + gk20a_channel_check_timedout(ch), timeout); dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data); @@ -705,8 +707,9 @@ static int gk20a_channel_wait(struct channel_gk20a *ch, nvgpu_log_fn(g, " "); - if (ch->has_timedout) + if (gk20a_channel_check_timedout(ch)) { return -ETIMEDOUT; + } switch (args->type) { case NVGPU_WAIT_TYPE_NOTIFIER: @@ -741,7 +744,8 @@ static int gk20a_channel_wait(struct channel_gk20a *ch, * calling this ioctl */ remain = NVGPU_COND_WAIT_INTERRUPTIBLE( &ch->notifier_wq, - notif->status == 0 || ch->has_timedout, + notif->status == 0 || + gk20a_channel_check_timedout(ch), args->timeout); if (remain == 0 && notif->status != 0) { @@ -811,8 +815,9 @@ static int gk20a_ioctl_channel_submit_gpfifo( profile = gk20a_fifo_profile_acquire(ch->g); gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_ENTRY); - if (ch->has_timedout) + if (gk20a_channel_check_timedout(ch)) { return -ETIMEDOUT; + } nvgpu_get_fence_args(&args->fence, &fence); submit_flags = @@ -1309,7 +1314,7 @@ long gk20a_channel_ioctl(struct file *filp, } case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT: ((struct nvgpu_get_param_args *)buf)->value = - ch->has_timedout; + gk20a_channel_check_timedout(ch); break; case NVGPU_IOCTL_CHANNEL_ENABLE: err = gk20a_busy(ch->g); diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c index f4118ccfc..513a35c4f 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c @@ -141,7 +141,7 @@ static int gk20a_tsg_unbind_channel_fd(struct tsg_gk20a *tsg, int ch_fd) * Mark the channel timedout since channel unbound from TSG * has no context of its own so it can't serve any job */ - ch->has_timedout = true; + gk20a_channel_set_timedout(ch); out: gk20a_channel_put(ch); diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c index 75cd2e86c..73ba84336 100644 --- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c @@ -655,7 +655,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch, if (gk20a_channel_get(ch_tsg)) { g->ops.fifo.set_error_notifier(ch_tsg, err_code); - ch_tsg->has_timedout = true; + gk20a_channel_set_timedout(ch_tsg); gk20a_channel_put(ch_tsg); } } @@ -663,7 +663,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch, nvgpu_rwsem_up_read(&tsg->ch_list_lock); } else { g->ops.fifo.set_error_notifier(ch, err_code); - ch->has_timedout = true; + gk20a_channel_set_timedout(ch); } msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FORCE_RESET; @@ -689,8 +689,8 @@ static void vgpu_fifo_set_ctx_mmu_error_ch(struct gk20a *g, NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT); /* mark channel as faulted */ - ch->has_timedout = true; - nvgpu_smp_wmb(); + gk20a_channel_set_timedout(ch); + /* unblock pending waits */ nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq); nvgpu_cond_broadcast_interruptible(&ch->notifier_wq); diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c index 9fa8eb78f..6d3ee68c8 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c @@ -127,7 +127,7 @@ static void vgpu_channel_abort_cleanup(struct gk20a *g, u32 chid) return; } - ch->has_timedout = true; + gk20a_channel_set_timedout(ch); g->ops.fifo.ch_abort_clean_up(ch); gk20a_channel_put(ch); }