mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: rename has_timedout and make it thread safe
Currently has_timedout variable is protected by wmb at places where it is being set and there is no correspoding rmb whenever has_timedout variable is read. This is prone to errors for concurrent execution. This change is supposed to fix this issue. Rename has_timedout variable of channel struct to ch_timedout. Also to avoid rmb every time ch_timedout is read, ch_timedout_spinlock is added to protect ch_timedout variable for taking care of concurrent execution. Bug 2404865 Bug 2092051 Change-Id: I0bee9f50af0a48720aa8b54cbc3af97ef9f6df00 Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1930935 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
503b897b45
commit
1f54ea09e3
@@ -212,6 +212,24 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
|
|||||||
gk20a_channel_update(ch);
|
gk20a_channel_update(ch);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void gk20a_channel_set_timedout(struct channel_gk20a *ch)
|
||||||
|
{
|
||||||
|
nvgpu_spinlock_acquire(&ch->ch_timedout_lock);
|
||||||
|
ch->ch_timedout = true;
|
||||||
|
nvgpu_spinlock_release(&ch->ch_timedout_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool gk20a_channel_check_timedout(struct channel_gk20a *ch)
|
||||||
|
{
|
||||||
|
bool ch_timedout_status;
|
||||||
|
|
||||||
|
nvgpu_spinlock_acquire(&ch->ch_timedout_lock);
|
||||||
|
ch_timedout_status = ch->ch_timedout;
|
||||||
|
nvgpu_spinlock_release(&ch->ch_timedout_lock);
|
||||||
|
|
||||||
|
return ch_timedout_status;
|
||||||
|
}
|
||||||
|
|
||||||
void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt)
|
void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt)
|
||||||
{
|
{
|
||||||
nvgpu_log_fn(ch->g, " ");
|
nvgpu_log_fn(ch->g, " ");
|
||||||
@@ -221,7 +239,7 @@ void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* make sure new kickoffs are prevented */
|
/* make sure new kickoffs are prevented */
|
||||||
ch->has_timedout = true;
|
gk20a_channel_set_timedout(ch);
|
||||||
|
|
||||||
ch->g->ops.fifo.disable_channel(ch);
|
ch->g->ops.fifo.disable_channel(ch);
|
||||||
|
|
||||||
@@ -423,7 +441,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
|
|||||||
* Set user managed syncpoint to safe state
|
* Set user managed syncpoint to safe state
|
||||||
* But it's already done if channel has timedout
|
* But it's already done if channel has timedout
|
||||||
*/
|
*/
|
||||||
if (ch->has_timedout) {
|
if (gk20a_channel_check_timedout(ch)) {
|
||||||
nvgpu_channel_sync_destroy(ch->user_sync, false);
|
nvgpu_channel_sync_destroy(ch->user_sync, false);
|
||||||
} else {
|
} else {
|
||||||
nvgpu_channel_sync_destroy(ch->user_sync, true);
|
nvgpu_channel_sync_destroy(ch->user_sync, true);
|
||||||
@@ -697,7 +715,7 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
|
|||||||
/* set gr host default timeout */
|
/* set gr host default timeout */
|
||||||
ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
|
ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
|
||||||
ch->timeout_debug_dump = true;
|
ch->timeout_debug_dump = true;
|
||||||
ch->has_timedout = false;
|
ch->ch_timedout = false;
|
||||||
|
|
||||||
/* init kernel watchdog timeout */
|
/* init kernel watchdog timeout */
|
||||||
ch->timeout.enabled = true;
|
ch->timeout.enabled = true;
|
||||||
@@ -2187,6 +2205,8 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
|
|||||||
c->referenceable = false;
|
c->referenceable = false;
|
||||||
nvgpu_cond_init(&c->ref_count_dec_wq);
|
nvgpu_cond_init(&c->ref_count_dec_wq);
|
||||||
|
|
||||||
|
nvgpu_spinlock_init(&c->ch_timedout_lock);
|
||||||
|
|
||||||
#if GK20A_CHANNEL_REFCOUNT_TRACKING
|
#if GK20A_CHANNEL_REFCOUNT_TRACKING
|
||||||
nvgpu_spinlock_init(&c->ref_actions_lock);
|
nvgpu_spinlock_init(&c->ref_actions_lock);
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -348,7 +348,7 @@ static int nvgpu_submit_channel_gpfifo(struct channel_gk20a *c,
|
|||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c->has_timedout) {
|
if (gk20a_channel_check_timedout(c)) {
|
||||||
return -ETIMEDOUT;
|
return -ETIMEDOUT;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -513,7 +513,7 @@ static int nvgpu_submit_channel_gpfifo(struct channel_gk20a *c,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c->has_timedout) {
|
if (gk20a_channel_check_timedout(c)) {
|
||||||
err = -ETIMEDOUT;
|
err = -ETIMEDOUT;
|
||||||
goto clean_up;
|
goto clean_up;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1383,8 +1383,8 @@ static void gk20a_fifo_set_has_timedout_and_wake_up_wqs(struct gk20a *g,
|
|||||||
{
|
{
|
||||||
if (refch != NULL) {
|
if (refch != NULL) {
|
||||||
/* mark channel as faulted */
|
/* mark channel as faulted */
|
||||||
refch->has_timedout = true;
|
gk20a_channel_set_timedout(refch);
|
||||||
nvgpu_smp_wmb();
|
|
||||||
/* unblock pending waits */
|
/* unblock pending waits */
|
||||||
nvgpu_cond_broadcast_interruptible(&refch->semaphore_wq);
|
nvgpu_cond_broadcast_interruptible(&refch->semaphore_wq);
|
||||||
nvgpu_cond_broadcast_interruptible(&refch->notifier_wq);
|
nvgpu_cond_broadcast_interruptible(&refch->notifier_wq);
|
||||||
@@ -1468,7 +1468,7 @@ void gk20a_fifo_abort_tsg(struct gk20a *g, u32 tsgid, bool preempt)
|
|||||||
nvgpu_rwsem_down_read(&tsg->ch_list_lock);
|
nvgpu_rwsem_down_read(&tsg->ch_list_lock);
|
||||||
nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
|
nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
|
||||||
if (gk20a_channel_get(ch) != NULL) {
|
if (gk20a_channel_get(ch) != NULL) {
|
||||||
ch->has_timedout = true;
|
gk20a_channel_set_timedout(ch);
|
||||||
if (ch->g->ops.fifo.ch_abort_clean_up != NULL) {
|
if (ch->g->ops.fifo.ch_abort_clean_up != NULL) {
|
||||||
ch->g->ops.fifo.ch_abort_clean_up(ch);
|
ch->g->ops.fifo.ch_abort_clean_up(ch);
|
||||||
}
|
}
|
||||||
@@ -2093,7 +2093,7 @@ int gk20a_fifo_tsg_unbind_channel(struct channel_gk20a *ch)
|
|||||||
|
|
||||||
/* If one channel in TSG times out, we disable all channels */
|
/* If one channel in TSG times out, we disable all channels */
|
||||||
nvgpu_rwsem_down_write(&tsg->ch_list_lock);
|
nvgpu_rwsem_down_write(&tsg->ch_list_lock);
|
||||||
tsg_timedout = ch->has_timedout;
|
tsg_timedout = gk20a_channel_check_timedout(ch);
|
||||||
nvgpu_rwsem_up_write(&tsg->ch_list_lock);
|
nvgpu_rwsem_up_write(&tsg->ch_list_lock);
|
||||||
|
|
||||||
/* Disable TSG and examine status before unbinding channel */
|
/* Disable TSG and examine status before unbinding channel */
|
||||||
|
|||||||
@@ -300,6 +300,8 @@ struct channel_gk20a {
|
|||||||
|
|
||||||
struct nvgpu_mem ctx_header;
|
struct nvgpu_mem ctx_header;
|
||||||
|
|
||||||
|
struct nvgpu_spinlock ch_timedout_lock;
|
||||||
|
bool ch_timedout;
|
||||||
/* Any operating system specific data. */
|
/* Any operating system specific data. */
|
||||||
void *os_priv;
|
void *os_priv;
|
||||||
|
|
||||||
@@ -315,7 +317,6 @@ struct channel_gk20a {
|
|||||||
u32 runlist_id;
|
u32 runlist_id;
|
||||||
|
|
||||||
bool mmu_nack_handled;
|
bool mmu_nack_handled;
|
||||||
bool has_timedout;
|
|
||||||
bool referenceable;
|
bool referenceable;
|
||||||
bool vpr;
|
bool vpr;
|
||||||
bool deterministic;
|
bool deterministic;
|
||||||
@@ -461,4 +462,7 @@ static inline void trace_write_pushbuffers(struct channel_gk20a *c, u32 count)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
void gk20a_channel_set_timedout(struct channel_gk20a *ch);
|
||||||
|
bool gk20a_channel_check_timedout(struct channel_gk20a *ch);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -1275,7 +1275,7 @@ __releases(&cde_app->mutex)
|
|||||||
nvgpu_log_info(g, "double finish cde context %p on channel %p",
|
nvgpu_log_info(g, "double finish cde context %p on channel %p",
|
||||||
cde_ctx, ch);
|
cde_ctx, ch);
|
||||||
|
|
||||||
if (ch->has_timedout) {
|
if (gk20a_channel_check_timedout(ch)) {
|
||||||
if (cde_ctx->is_temporary) {
|
if (cde_ctx->is_temporary) {
|
||||||
nvgpu_warn(g,
|
nvgpu_warn(g,
|
||||||
"cde: channel had timed out"
|
"cde: channel had timed out"
|
||||||
@@ -1302,9 +1302,10 @@ __releases(&cde_app->mutex)
|
|||||||
msecs_to_jiffies(CTX_DELETE_TIME));
|
msecs_to_jiffies(CTX_DELETE_TIME));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!ch->has_timedout)
|
if (!gk20a_channel_check_timedout(ch)) {
|
||||||
gk20a_cde_ctx_release(cde_ctx);
|
gk20a_cde_ctx_release(cde_ctx);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
|
static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -661,8 +661,9 @@ static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
|
|||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
/* do not wait if channel has timed out */
|
/* do not wait if channel has timed out */
|
||||||
if (ch->has_timedout)
|
if (gk20a_channel_check_timedout(ch)) {
|
||||||
return -ETIMEDOUT;
|
return -ETIMEDOUT;
|
||||||
|
}
|
||||||
|
|
||||||
dmabuf = dma_buf_get(id);
|
dmabuf = dma_buf_get(id);
|
||||||
if (IS_ERR(dmabuf)) {
|
if (IS_ERR(dmabuf)) {
|
||||||
@@ -681,7 +682,8 @@ static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
|
|||||||
|
|
||||||
ret = NVGPU_COND_WAIT_INTERRUPTIBLE(
|
ret = NVGPU_COND_WAIT_INTERRUPTIBLE(
|
||||||
&ch->semaphore_wq,
|
&ch->semaphore_wq,
|
||||||
*semaphore == payload || ch->has_timedout,
|
*semaphore == payload ||
|
||||||
|
gk20a_channel_check_timedout(ch),
|
||||||
timeout);
|
timeout);
|
||||||
|
|
||||||
dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
|
dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
|
||||||
@@ -705,8 +707,9 @@ static int gk20a_channel_wait(struct channel_gk20a *ch,
|
|||||||
|
|
||||||
nvgpu_log_fn(g, " ");
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
if (ch->has_timedout)
|
if (gk20a_channel_check_timedout(ch)) {
|
||||||
return -ETIMEDOUT;
|
return -ETIMEDOUT;
|
||||||
|
}
|
||||||
|
|
||||||
switch (args->type) {
|
switch (args->type) {
|
||||||
case NVGPU_WAIT_TYPE_NOTIFIER:
|
case NVGPU_WAIT_TYPE_NOTIFIER:
|
||||||
@@ -741,7 +744,8 @@ static int gk20a_channel_wait(struct channel_gk20a *ch,
|
|||||||
* calling this ioctl */
|
* calling this ioctl */
|
||||||
remain = NVGPU_COND_WAIT_INTERRUPTIBLE(
|
remain = NVGPU_COND_WAIT_INTERRUPTIBLE(
|
||||||
&ch->notifier_wq,
|
&ch->notifier_wq,
|
||||||
notif->status == 0 || ch->has_timedout,
|
notif->status == 0 ||
|
||||||
|
gk20a_channel_check_timedout(ch),
|
||||||
args->timeout);
|
args->timeout);
|
||||||
|
|
||||||
if (remain == 0 && notif->status != 0) {
|
if (remain == 0 && notif->status != 0) {
|
||||||
@@ -811,8 +815,9 @@ static int gk20a_ioctl_channel_submit_gpfifo(
|
|||||||
profile = gk20a_fifo_profile_acquire(ch->g);
|
profile = gk20a_fifo_profile_acquire(ch->g);
|
||||||
gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_ENTRY);
|
gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_ENTRY);
|
||||||
|
|
||||||
if (ch->has_timedout)
|
if (gk20a_channel_check_timedout(ch)) {
|
||||||
return -ETIMEDOUT;
|
return -ETIMEDOUT;
|
||||||
|
}
|
||||||
|
|
||||||
nvgpu_get_fence_args(&args->fence, &fence);
|
nvgpu_get_fence_args(&args->fence, &fence);
|
||||||
submit_flags =
|
submit_flags =
|
||||||
@@ -1309,7 +1314,7 @@ long gk20a_channel_ioctl(struct file *filp,
|
|||||||
}
|
}
|
||||||
case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT:
|
case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT:
|
||||||
((struct nvgpu_get_param_args *)buf)->value =
|
((struct nvgpu_get_param_args *)buf)->value =
|
||||||
ch->has_timedout;
|
gk20a_channel_check_timedout(ch);
|
||||||
break;
|
break;
|
||||||
case NVGPU_IOCTL_CHANNEL_ENABLE:
|
case NVGPU_IOCTL_CHANNEL_ENABLE:
|
||||||
err = gk20a_busy(ch->g);
|
err = gk20a_busy(ch->g);
|
||||||
|
|||||||
@@ -141,7 +141,7 @@ static int gk20a_tsg_unbind_channel_fd(struct tsg_gk20a *tsg, int ch_fd)
|
|||||||
* Mark the channel timedout since channel unbound from TSG
|
* Mark the channel timedout since channel unbound from TSG
|
||||||
* has no context of its own so it can't serve any job
|
* has no context of its own so it can't serve any job
|
||||||
*/
|
*/
|
||||||
ch->has_timedout = true;
|
gk20a_channel_set_timedout(ch);
|
||||||
|
|
||||||
out:
|
out:
|
||||||
gk20a_channel_put(ch);
|
gk20a_channel_put(ch);
|
||||||
|
|||||||
@@ -655,7 +655,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
|
|||||||
if (gk20a_channel_get(ch_tsg)) {
|
if (gk20a_channel_get(ch_tsg)) {
|
||||||
g->ops.fifo.set_error_notifier(ch_tsg,
|
g->ops.fifo.set_error_notifier(ch_tsg,
|
||||||
err_code);
|
err_code);
|
||||||
ch_tsg->has_timedout = true;
|
gk20a_channel_set_timedout(ch_tsg);
|
||||||
gk20a_channel_put(ch_tsg);
|
gk20a_channel_put(ch_tsg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -663,7 +663,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
|
|||||||
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
|
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
|
||||||
} else {
|
} else {
|
||||||
g->ops.fifo.set_error_notifier(ch, err_code);
|
g->ops.fifo.set_error_notifier(ch, err_code);
|
||||||
ch->has_timedout = true;
|
gk20a_channel_set_timedout(ch);
|
||||||
}
|
}
|
||||||
|
|
||||||
msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FORCE_RESET;
|
msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FORCE_RESET;
|
||||||
@@ -689,8 +689,8 @@ static void vgpu_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
|
|||||||
NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);
|
NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);
|
||||||
|
|
||||||
/* mark channel as faulted */
|
/* mark channel as faulted */
|
||||||
ch->has_timedout = true;
|
gk20a_channel_set_timedout(ch);
|
||||||
nvgpu_smp_wmb();
|
|
||||||
/* unblock pending waits */
|
/* unblock pending waits */
|
||||||
nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq);
|
nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq);
|
||||||
nvgpu_cond_broadcast_interruptible(&ch->notifier_wq);
|
nvgpu_cond_broadcast_interruptible(&ch->notifier_wq);
|
||||||
|
|||||||
@@ -127,7 +127,7 @@ static void vgpu_channel_abort_cleanup(struct gk20a *g, u32 chid)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
ch->has_timedout = true;
|
gk20a_channel_set_timedout(ch);
|
||||||
g->ops.fifo.ch_abort_clean_up(ch);
|
g->ops.fifo.ch_abort_clean_up(ch);
|
||||||
gk20a_channel_put(ch);
|
gk20a_channel_put(ch);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user