diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c index 0ae9e38d1..e153982c3 100644 --- a/drivers/gpu/nvgpu/common/fifo/submit.c +++ b/drivers/gpu/nvgpu/common/fifo/submit.c @@ -80,7 +80,7 @@ static int nvgpu_submit_create_wait_cmd(struct nvgpu_channel *c, static int nvgpu_submit_create_incr_cmd(struct nvgpu_channel *c, struct priv_cmd_entry **incr_cmd, struct nvgpu_fence_type **post_fence, bool flag_fence_get, - bool need_wfi, bool need_sync_fence, bool register_irq) + bool need_wfi, bool need_sync_fence) { int err; @@ -91,11 +91,10 @@ static int nvgpu_submit_create_incr_cmd(struct nvgpu_channel *c, if (flag_fence_get) { err = nvgpu_channel_sync_incr_user(c->sync, incr_cmd, - *post_fence, need_wfi, need_sync_fence, - register_irq); + *post_fence, need_wfi, need_sync_fence); } else { err = nvgpu_channel_sync_incr(c->sync, incr_cmd, - *post_fence, need_sync_fence, register_irq); + *post_fence, need_sync_fence); } if (err != 0) { @@ -112,7 +111,7 @@ static int nvgpu_submit_create_incr_cmd(struct nvgpu_channel *c, static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c, struct nvgpu_channel_fence *fence, struct nvgpu_channel_job *job, - bool register_irq, u32 flags) + u32 flags) { struct gk20a *g = c->g; bool need_sync_fence; @@ -163,8 +162,7 @@ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c, * if not requested by user. */ err = nvgpu_submit_create_incr_cmd(c, &job->incr_cmd, &job->post_fence, - flag_fence_get, need_wfi, need_sync_fence, - register_irq); + flag_fence_get, need_wfi, need_sync_fence); if (err != 0) { goto clean_up_wait_cmd; } @@ -354,8 +352,7 @@ static int nvgpu_submit_prepare_gpfifo_track(struct nvgpu_channel *c, return err; } - err = nvgpu_submit_prepare_syncs(c, fence, job, need_deferred_cleanup, - flags); + err = nvgpu_submit_prepare_syncs(c, fence, job, flags); if (err != 0) { goto clean_up_job; } @@ -383,6 +380,8 @@ static int nvgpu_submit_prepare_gpfifo_track(struct nvgpu_channel *c, goto clean_up_gpfifo_incr; } + nvgpu_channel_sync_mark_progress(c->sync, need_deferred_cleanup); + if (fence_out != NULL) { *fence_out = nvgpu_fence_get(job->post_fence); } diff --git a/drivers/gpu/nvgpu/common/semaphore/semaphore.c b/drivers/gpu/nvgpu/common/semaphore/semaphore.c index 27383650a..e0b349c5a 100644 --- a/drivers/gpu/nvgpu/common/semaphore/semaphore.c +++ b/drivers/gpu/nvgpu/common/semaphore/semaphore.c @@ -155,7 +155,8 @@ bool nvgpu_semaphore_can_wait(struct nvgpu_semaphore *s) void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s, struct nvgpu_hw_semaphore *hw_sema) { - int next = nvgpu_hw_semaphore_update_next(hw_sema); + /* One submission increments the next value by one. */ + int next = nvgpu_hw_semaphore_read_next(hw_sema) + 1; /* "s" should be an uninitialized sema. */ WARN_ON(s->ready_to_wait); @@ -163,7 +164,7 @@ void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s, nvgpu_atomic_set(&s->value, next); s->ready_to_wait = true; - gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)", + gpu_sema_verbose_dbg(s->g, "PREP sema for c=%d (%u)", hw_sema->chid, next); } diff --git a/drivers/gpu/nvgpu/common/semaphore/semaphore_hw.c b/drivers/gpu/nvgpu/common/semaphore/semaphore_hw.c index 9d8a299a1..fed3e06c9 100644 --- a/drivers/gpu/nvgpu/common/semaphore/semaphore_hw.c +++ b/drivers/gpu/nvgpu/common/semaphore/semaphore_hw.c @@ -158,5 +158,11 @@ int nvgpu_hw_semaphore_read_next(struct nvgpu_hw_semaphore *hw_sema) int nvgpu_hw_semaphore_update_next(struct nvgpu_hw_semaphore *hw_sema) { - return nvgpu_atomic_add_return(1, &hw_sema->next_value); + int next = nvgpu_atomic_add_return(1, &hw_sema->next_value); + struct nvgpu_semaphore_pool *p = hw_sema->location.pool; + struct gk20a *g = p->sema_sea->gk20a; + + gpu_sema_verbose_dbg(g, "INCR sema for c=%d (%u)", + hw_sema->chid, next); + return next; } diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync.c b/drivers/gpu/nvgpu/common/sync/channel_sync.c index 0d49052d1..205b5e02b 100644 --- a/drivers/gpu/nvgpu/common/sync/channel_sync.c +++ b/drivers/gpu/nvgpu/common/sync/channel_sync.c @@ -65,17 +65,22 @@ int nvgpu_channel_sync_wait_fence_fd(struct nvgpu_channel_sync *s, int fd, int nvgpu_channel_sync_incr(struct nvgpu_channel_sync *s, struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence, - bool need_sync_fence, bool register_irq) + bool need_sync_fence) { - return s->ops->incr(s, entry, fence, need_sync_fence, register_irq); + return s->ops->incr(s, entry, fence, need_sync_fence); } int nvgpu_channel_sync_incr_user(struct nvgpu_channel_sync *s, struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence, - bool wfi, bool need_sync_fence, bool register_irq) + bool wfi, bool need_sync_fence) { - return s->ops->incr_user(s, entry, fence, wfi, need_sync_fence, - register_irq); + return s->ops->incr_user(s, entry, fence, wfi, need_sync_fence); +} + +void nvgpu_channel_sync_mark_progress(struct nvgpu_channel_sync *s, + bool register_irq) +{ + s->ops->mark_progress(s, register_irq); } void nvgpu_channel_sync_set_min_eq_max(struct nvgpu_channel_sync *s) diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h b/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h index 77966bcdb..4916b5ba5 100644 --- a/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h +++ b/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h @@ -62,15 +62,16 @@ struct nvgpu_channel_sync_ops { int (*incr)(struct nvgpu_channel_sync *s, struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence, - bool need_sync_fence, - bool register_irq); + bool need_sync_fence); int (*incr_user)(struct nvgpu_channel_sync *s, struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence, bool wfi, - bool need_sync_fence, - bool register_irq); + bool need_sync_fence); + + void (*mark_progress)(struct nvgpu_channel_sync *s, + bool register_irq); void (*set_min_eq_max)(struct nvgpu_channel_sync *s); diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c index c236e1766..acfab9fa3 100644 --- a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c +++ b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c @@ -100,7 +100,7 @@ static void add_sema_incr_cmd(struct gk20a *g, struct nvgpu_channel *c, /* release will need to write back to the semaphore memory. */ va = nvgpu_semaphore_gpu_rw_va(s); - /* incr the underlying sema next_value (like syncpt's max). */ + /* find the right sema next_value to write (like syncpt's max). */ nvgpu_semaphore_prepare(s, hw_sema); g->ops.sync.sema.add_incr_cmd(g, cmd, s, va, wfi); @@ -237,8 +237,7 @@ static int channel_sync_semaphore_incr( struct nvgpu_channel_sync *s, struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence, - bool need_sync_fence, - bool register_irq) + bool need_sync_fence) { /* Don't put wfi cmd to this one since we're not returning * a fence to user space. */ @@ -252,8 +251,7 @@ static int channel_sync_semaphore_incr_user( struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence, bool wfi, - bool need_sync_fence, - bool register_irq) + bool need_sync_fence) { #ifndef CONFIG_NVGPU_SYNCFD_NONE int err; @@ -275,6 +273,19 @@ static int channel_sync_semaphore_incr_user( #endif } +static void channel_sync_semaphore_mark_progress(struct nvgpu_channel_sync *s, + bool register_irq) +{ + struct nvgpu_channel_sync_semaphore *sp = + nvgpu_channel_sync_semaphore_from_base(s); + + (void)nvgpu_hw_semaphore_update_next(sp->hw_sema); + /* + * register_irq is ignored: there is only one semaphore interrupt that + * triggers nvgpu_channel_update() and it's always active. + */ +} + static void channel_sync_semaphore_set_min_eq_max(struct nvgpu_channel_sync *s) { struct nvgpu_channel_sync_semaphore *sp = @@ -310,6 +321,7 @@ static const struct nvgpu_channel_sync_ops channel_sync_semaphore_ops = { .wait_fence_fd = channel_sync_semaphore_wait_fd, .incr = channel_sync_semaphore_incr, .incr_user = channel_sync_semaphore_incr_user, + .mark_progress = channel_sync_semaphore_mark_progress, .set_min_eq_max = channel_sync_semaphore_set_min_eq_max, .destroy = channel_sync_semaphore_destroy, }; diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c index 29a72a130..50ee638f5 100644 --- a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c +++ b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c @@ -159,13 +159,12 @@ static void channel_sync_syncpt_update(void *priv, int nr_completed) nvgpu_channel_update(ch); - /* note: channel_get() is in channel_sync_syncpt_incr_common() */ + /* note: channel_get() is in channel_sync_syncpt_mark_progress() */ nvgpu_channel_put(ch); } static int channel_sync_syncpt_incr_common(struct nvgpu_channel_sync *s, bool wfi_cmd, - bool register_irq, struct priv_cmd_entry **incr_cmd, struct nvgpu_fence_type *fence, bool need_sync_fence) @@ -189,36 +188,10 @@ static int channel_sync_syncpt_incr_common(struct nvgpu_channel_sync *s, c->g->ops.sync.syncpt.add_incr_cmd(c->g, *incr_cmd, sp->id, sp->syncpt_buf.gpu_va, wfi_cmd); - thresh = nvgpu_nvhost_syncpt_incr_max_ext(sp->nvhost, sp->id, + thresh = nvgpu_wrapping_add_u32( + nvgpu_nvhost_syncpt_read_maxval(sp->nvhost, sp->id), c->g->ops.sync.syncpt.get_incr_per_release()); - if (register_irq) { - struct nvgpu_channel *referenced = nvgpu_channel_get(c); - - WARN_ON(!referenced); - - if (referenced) { - /* note: channel_put() is in - * channel_sync_syncpt_update() */ - - err = nvgpu_nvhost_intr_register_notifier( - sp->nvhost, - sp->id, thresh, - channel_sync_syncpt_update, c); - if (err != 0) { - nvgpu_channel_put(referenced); - } - - /* Adding interrupt action should - * never fail. A proper error handling - * here would require us to decrement - * the syncpt max back to its original - * value. */ - WARN(err, - "failed to set submit complete interrupt"); - } - } - if (need_sync_fence) { err = nvgpu_os_fence_syncpt_create(&os_fence, c, sp->nvhost, sp->id, thresh); @@ -248,30 +221,69 @@ clean_up_priv_cmd: static int channel_sync_syncpt_incr(struct nvgpu_channel_sync *s, struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence, - bool need_sync_fence, - bool register_irq) + bool need_sync_fence) { /* Don't put wfi cmd to this one since we're not returning * a fence to user space. */ - return channel_sync_syncpt_incr_common(s, - false /* no wfi */, - register_irq /* register irq */, - entry, fence, need_sync_fence); + return channel_sync_syncpt_incr_common(s, false, entry, fence, + need_sync_fence); } static int channel_sync_syncpt_incr_user(struct nvgpu_channel_sync *s, struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence, bool wfi, - bool need_sync_fence, - bool register_irq) + bool need_sync_fence) { /* Need to do 'wfi + host incr' since we return the fence * to user space. */ - return channel_sync_syncpt_incr_common(s, - wfi, - register_irq /* register irq */, - entry, fence, need_sync_fence); + return channel_sync_syncpt_incr_common(s, wfi, entry, fence, + need_sync_fence); +} + +static void channel_sync_syncpt_mark_progress(struct nvgpu_channel_sync *s, + bool register_irq) +{ + struct nvgpu_channel_sync_syncpt *sp = + nvgpu_channel_sync_syncpt_from_base(s); + struct nvgpu_channel *c = sp->c; + struct gk20a *g = c->g; + u32 thresh; + + thresh = nvgpu_nvhost_syncpt_incr_max_ext(sp->nvhost, sp->id, + g->ops.sync.syncpt.get_incr_per_release()); + + if (register_irq) { + struct nvgpu_channel *referenced = nvgpu_channel_get(c); + + WARN_ON(referenced == NULL); + + if (referenced != NULL) { + /* + * note: the matching channel_put() is in + * channel_sync_syncpt_update() that gets called when + * the job completes. + */ + + int err = nvgpu_nvhost_intr_register_notifier( + sp->nvhost, + sp->id, thresh, + channel_sync_syncpt_update, c); + if (err != 0) { + nvgpu_channel_put(referenced); + } + + /* + * This never fails in practice. If it does, we won't + * be getting a completion signal to free the job + * resources, but maybe this succeeds on a possible + * subsequent submit, and the channel closure path will + * eventually mark everything completed anyway. + */ + WARN(err != 0, + "failed to set submit complete interrupt"); + } + } } int nvgpu_channel_sync_wait_syncpt(struct nvgpu_channel_sync_syncpt *s, @@ -314,6 +326,7 @@ static const struct nvgpu_channel_sync_ops channel_sync_syncpt_ops = { .wait_fence_fd = channel_sync_syncpt_wait_fd, .incr = channel_sync_syncpt_incr, .incr_user = channel_sync_syncpt_incr_user, + .mark_progress = channel_sync_syncpt_mark_progress, .set_min_eq_max = channel_sync_syncpt_set_min_eq_max, .destroy = channel_sync_syncpt_destroy, }; @@ -348,8 +361,8 @@ nvgpu_channel_sync_syncpt_create(struct nvgpu_channel *c) snprintf(syncpt_name, sizeof(syncpt_name), "%s_%d", c->g->name, c->chid); - sp->id = nvgpu_nvhost_get_syncpt_host_managed(sp->nvhost, - c->chid, syncpt_name); + sp->id = nvgpu_nvhost_get_syncpt_client_managed(sp->nvhost, + syncpt_name); /** * This is a WAR to handle invalid value of a syncpt. diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h b/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h index e19d36e39..067079614 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h @@ -53,7 +53,7 @@ int nvgpu_channel_sync_wait_fence_fd(struct nvgpu_channel_sync *s, int fd, */ int nvgpu_channel_sync_incr(struct nvgpu_channel_sync *s, struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence, - bool need_sync_fence, bool register_irq); + bool need_sync_fence); /* * Increment syncpoint/semaphore, so that the returned fence represents @@ -65,7 +65,19 @@ int nvgpu_channel_sync_incr(struct nvgpu_channel_sync *s, */ int nvgpu_channel_sync_incr_user(struct nvgpu_channel_sync *s, struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence, - bool wfi, bool need_sync_fence, bool register_irq); + bool wfi, bool need_sync_fence); + +/* + * Tell the sync that some progress will eventually happen on it: increase the + * tracked max value of the underlying syncpoint/semaphore and maybe register + * an interrupt notifier to be called if needed so that the channel gets a + * job completion signal. + * + * @param register_irq [in] Register an interrupt for the increment. + */ +void nvgpu_channel_sync_mark_progress(struct nvgpu_channel_sync *s, + bool register_irq); + /* * Reset the channel syncpoint/semaphore. Syncpoint increments generally * wrap around the range of integer values. Current max value encompasses diff --git a/drivers/gpu/nvgpu/include/nvgpu/posix/posix-nvhost.h b/drivers/gpu/nvgpu/include/nvgpu/posix/posix-nvhost.h index 39817cd3e..fe705d09d 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/posix/posix-nvhost.h +++ b/drivers/gpu/nvgpu/include/nvgpu/posix/posix-nvhost.h @@ -85,4 +85,7 @@ int nvgpu_nvhost_syncpt_wait_timeout_ext( int nvgpu_nvhost_syncpt_read_ext_check( struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 *val); +u32 nvgpu_nvhost_syncpt_read_maxval( + struct nvgpu_nvhost_dev *nvhost_dev, u32 id); + #endif diff --git a/drivers/gpu/nvgpu/os/posix/posix-nvhost.c b/drivers/gpu/nvgpu/os/posix/posix-nvhost.c index 9375b0ca3..3500df743 100644 --- a/drivers/gpu/nvgpu/os/posix/posix-nvhost.c +++ b/drivers/gpu/nvgpu/os/posix/posix-nvhost.c @@ -178,6 +178,12 @@ int nvgpu_nvhost_syncpt_read_ext_check( return -ENOSYS; } +u32 nvgpu_nvhost_syncpt_read_maxval( + struct nvgpu_nvhost_dev *nvhost_dev, u32 id) +{ + return 0U; +} + int nvgpu_nvhost_syncpt_wait_timeout_ext( struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh, u32 timeout, u32 waiter_index)