diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c index a725cd6b0..35fb3023d 100644 --- a/drivers/gpu/nvgpu/common/linux/channel.c +++ b/drivers/gpu/nvgpu/common/linux/channel.c @@ -720,7 +720,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, return -EINVAL; if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT | - NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) && + NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET | + NVGPU_SUBMIT_GPFIFO_FLAGS_USER_FENCE_UPDATE)) && !fence) return -EINVAL; @@ -757,6 +758,16 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, (g->can_railgate && !c->deterministic) || !skip_buffer_refcounting; + /* + * If User is adding increments to the pushbuffer and doing all job + * tracking, then no need for kernel tracking here + * User should ensure that all pre-requisites for fast submit are met + * Fail the submit if that's not the case + */ + if (need_job_tracking && + (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_USER_FENCE_UPDATE)) + return -EINVAL; + if (need_job_tracking) { bool need_sync_framework = false; @@ -868,6 +879,15 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, goto clean_up; } + if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_USER_FENCE_UPDATE) { + /* + * User space adds increments in the pushbuffer, so just + * handle the threshold book keeping in kernel by adding + * number of syncpoint increments to threshold + */ + c->sync->add_user_incrs(c->sync, fence->value); + } + if (need_job_tracking) { err = channel_gk20a_alloc_job(c, &job); if (err) diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index e965a329b..18d61faa2 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c @@ -301,6 +301,13 @@ static u64 gk20a_channel_syncpt_address(struct gk20a_channel_sync *s) return sp->syncpt_buf.gpu_va; } +static u32 gk20a_channel_add_user_incrs(struct gk20a_channel_sync *s, u32 val) +{ + struct gk20a_channel_syncpt *sp = + container_of(s, struct gk20a_channel_syncpt, ops); + return nvgpu_nvhost_syncpt_incr_max_ext(sp->nvhost_dev, sp->id, val); +} + static void gk20a_channel_syncpt_destroy(struct gk20a_channel_sync *s) { struct gk20a_channel_syncpt *sp = @@ -353,6 +360,7 @@ gk20a_channel_syncpt_create(struct channel_gk20a *c) sp->ops.signal_timeline = gk20a_channel_syncpt_signal_timeline; sp->ops.syncpt_id = gk20a_channel_syncpt_id; sp->ops.syncpt_address = gk20a_channel_syncpt_address; + sp->ops.add_user_incrs = gk20a_channel_add_user_incrs; sp->ops.destroy = gk20a_channel_syncpt_destroy; return &sp->ops; @@ -878,6 +886,12 @@ static u64 gk20a_channel_semaphore_syncpt_address(struct gk20a_channel_sync *s) return 0; } +static u32 gk20a_channel_semaphore_add_user_incrs(struct gk20a_channel_sync *s, + u32 val) +{ + return 0; +} + static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s) { struct gk20a_channel_semaphore *sema = @@ -930,6 +944,7 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c) sema->ops.signal_timeline = gk20a_channel_semaphore_signal_timeline; sema->ops.syncpt_id = gk20a_channel_semaphore_syncpt_id; sema->ops.syncpt_address = gk20a_channel_semaphore_syncpt_address; + sema->ops.add_user_incrs = gk20a_channel_semaphore_add_user_incrs; sema->ops.destroy = gk20a_channel_semaphore_destroy; return &sema->ops; diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h index fe1d8526e..c80ebd380 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h @@ -105,6 +105,9 @@ struct gk20a_channel_sync { /* Returns the sync point address of sync point or 0 if not supported */ u64 (*syncpt_address)(struct gk20a_channel_sync *s); + /* Handle user added increments in the push buffer */ + u32 (*add_user_incrs)(struct gk20a_channel_sync *s, u32 val); + /* Free the resources allocated by gk20a_channel_sync_create. */ void (*destroy)(struct gk20a_channel_sync *s); }; diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index cf75595a8..18168158b 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -1478,6 +1478,8 @@ struct nvgpu_fence { #define NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING (1 << 5) /* expire current timeslice and reschedule runlist from front */ #define NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST (1 << 6) +/* user space has added syncpoint increments in the pushbuffer */ +#define NVGPU_SUBMIT_GPFIFO_FLAGS_USER_FENCE_UPDATE (1 << 7) struct nvgpu_submit_gpfifo_args { __u64 gpfifo;