diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c index f915f863e..b30e8535b 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel.c +++ b/drivers/gpu/nvgpu/common/fifo/channel.c @@ -217,7 +217,10 @@ static void channel_kernelmode_deinit(struct nvgpu_channel *ch) #endif (void) memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); - nvgpu_priv_cmdbuf_queue_free(ch); + if (ch->priv_cmd_q != NULL) { + nvgpu_priv_cmdbuf_queue_free(ch->priv_cmd_q); + ch->priv_cmd_q = NULL; + } /* free pre-allocated resources, if applicable */ if (nvgpu_channel_is_prealloc_enabled(ch)) { @@ -304,6 +307,7 @@ static int channel_setup_kernelmode(struct nvgpu_channel *c, { u32 gpfifo_size, gpfifo_entry_size; u64 gpfifo_gpu_va; + u32 priv_cmd_jobs; int err = 0; struct gk20a *g = c->g; @@ -375,7 +379,20 @@ static int channel_setup_kernelmode(struct nvgpu_channel *c, } } - err = nvgpu_priv_cmdbuf_queue_alloc(c, args->num_inflight_jobs); + /* + * Allocate priv cmdbuf space for pre and post fences. If the inflight + * job count isn't specified, we base it on the gpfifo count. We + * multiply by a factor of 1/3 because at most a third of the GPFIFO + * entries can be used for user-submitted jobs; another third goes to + * wait entries, and the final third to incr entries. There will be one + * pair of acq and incr commands for each job. + */ + priv_cmd_jobs = args->num_inflight_jobs; + if (priv_cmd_jobs == 0U) { + priv_cmd_jobs = c->gpfifo.entry_num / 3U; + } + + err = nvgpu_priv_cmdbuf_queue_alloc(c->vm, priv_cmd_jobs, &c->priv_cmd_q); if (err != 0) { goto clean_up_prealloc; } @@ -388,7 +405,8 @@ static int channel_setup_kernelmode(struct nvgpu_channel *c, return 0; clean_up_priv_cmd: - nvgpu_priv_cmdbuf_queue_free(c); + nvgpu_priv_cmdbuf_queue_free(c->priv_cmd_q); + c->priv_cmd_q = NULL; clean_up_prealloc: if (nvgpu_channel_is_deterministic(c) && args->num_inflight_jobs != 0U) { @@ -999,9 +1017,9 @@ void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c, * then incr_cmd i.e. order of allocation) */ if (job->wait_cmd != NULL) { - nvgpu_priv_cmdbuf_free(c, job->wait_cmd); + nvgpu_priv_cmdbuf_free(c->priv_cmd_q, job->wait_cmd); } - nvgpu_priv_cmdbuf_free(c, job->incr_cmd); + nvgpu_priv_cmdbuf_free(c->priv_cmd_q, job->incr_cmd); /* * ensure all pending writes complete before freeing up the job. diff --git a/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c b/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c index 76ff5392a..c1c2704f3 100644 --- a/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c +++ b/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include @@ -44,6 +44,7 @@ struct priv_cmd_entry { }; struct priv_cmd_queue { + struct vm_gk20a *vm; struct nvgpu_mem mem; /* pushbuf */ u32 size; /* allocated length in words */ u32 put; /* next entry will begin here */ @@ -58,15 +59,15 @@ struct priv_cmd_queue { /* allocate private cmd buffer queue. used for inserting commands before/after user submitted buffers. */ -int nvgpu_priv_cmdbuf_queue_alloc(struct nvgpu_channel *ch, - u32 num_in_flight) +int nvgpu_priv_cmdbuf_queue_alloc(struct vm_gk20a *vm, + u32 job_count, struct priv_cmd_queue **queue) { - struct gk20a *g = ch->g; - struct vm_gk20a *ch_vm = ch->vm; + struct gk20a *g = vm->mm->g; struct priv_cmd_queue *q; u64 size, tmp_size; int err = 0; u32 wait_size, incr_size; + u32 mem_per_job; /* * sema size is at least as much as syncpt size, but semas may not be @@ -96,28 +97,18 @@ int nvgpu_priv_cmdbuf_queue_alloc(struct nvgpu_channel *ch, * another 2 words. In reality these numbers vary by chip but we'll use * 8 and 10 as examples. * - * We have two cases to consider: the first is we base the size of the - * queue on the gpfifo count. Here we multiply by a factor of 1/3 - * because at most a third of the GPFIFO entries can be used for - * user-submitted jobs; another third goes to wait entries, and the - * final third to incr entries. There will be one pair of acq and incr - * commands for each job. - * - * gpfifo entry num * (1 / 3) * (8 + 10) * 4 bytes - * - * If instead num_in_flight is specified then we will use that to size - * the queue instead of a third of the gpfifo entry count. The worst - * case is still both sync commands (one ACQ and one INCR) per submit so - * we have a queue size of: + * Given the job count, cmdbuf space is allocated such that each job + * can get one wait command and one increment command: * * num_in_flight * (8 + 10) * 4 bytes + * + * These cmdbufs are inserted as gpfifo entries right before and after + * the user submitted gpfifo entries per submit. */ - if (num_in_flight == 0U) { - /* round down to ensure space for all priv cmds */ - num_in_flight = ch->gpfifo.entry_num / 3U; - } - - size = num_in_flight * (wait_size + incr_size) * sizeof(u32); + mem_per_job = nvgpu_safe_mult_u32(nvgpu_safe_add_u32(wait_size, + incr_size), (u32)sizeof(u32)); + /* both 32 bit and mem_per_job is small */ + size = nvgpu_safe_mult_u64((u64)job_count, (u64)mem_per_job); tmp_size = PAGE_ALIGN(roundup_pow_of_two(size)); if (tmp_size > U32_MAX) { @@ -130,12 +121,14 @@ int nvgpu_priv_cmdbuf_queue_alloc(struct nvgpu_channel *ch, return -ENOMEM; } - if (num_in_flight > U32_MAX / 2U) { + q->vm = vm; + + if (job_count > U32_MAX / 2U) { err = -ERANGE; goto err_free_queue; } - q->entries_len = 2U * num_in_flight; + q->entries_len = 2U * job_count; q->entries = nvgpu_vzalloc(g, nvgpu_safe_mult_u64((u64)q->entries_len, sizeof(*q->entries))); @@ -144,7 +137,7 @@ int nvgpu_priv_cmdbuf_queue_alloc(struct nvgpu_channel *ch, goto err_free_queue; } - err = nvgpu_dma_alloc_map_sys(ch_vm, size, &q->mem); + err = nvgpu_dma_alloc_map_sys(vm, size, &q->mem); if (err != 0) { nvgpu_err(g, "%s: memory allocation failed", __func__); goto err_free_entries; @@ -154,8 +147,7 @@ int nvgpu_priv_cmdbuf_queue_alloc(struct nvgpu_channel *ch, nvgpu_assert(tmp_size <= U32_MAX); q->size = (u32)tmp_size; - ch->priv_cmd_q = q; - + *queue = q; return 0; err_free_entries: nvgpu_vfree(g, q->entries); @@ -164,32 +156,25 @@ err_free_queue: return err; } -void nvgpu_priv_cmdbuf_queue_free(struct nvgpu_channel *ch) +void nvgpu_priv_cmdbuf_queue_free(struct priv_cmd_queue *q) { - struct vm_gk20a *ch_vm = ch->vm; - struct priv_cmd_queue *q = ch->priv_cmd_q; - struct gk20a *g = ch->g; + struct vm_gk20a *vm = q->vm; + struct gk20a *g = vm->mm->g; - if (q == NULL) { - return; - } - - nvgpu_dma_unmap_free(ch_vm, &q->mem); + nvgpu_dma_unmap_free(vm, &q->mem); nvgpu_vfree(g, q->entries); nvgpu_kfree(g, q); - - ch->priv_cmd_q = NULL; } /* allocate a cmd buffer with given size. size is number of u32 entries */ -static int nvgpu_priv_cmdbuf_alloc_buf(struct nvgpu_channel *c, u32 orig_size, +static int nvgpu_priv_cmdbuf_alloc_buf(struct priv_cmd_queue *q, u32 orig_size, struct priv_cmd_entry *e) { - struct priv_cmd_queue *q = c->priv_cmd_q; + struct gk20a *g = q->vm->mm->g; u32 size = orig_size; u32 free_count; - nvgpu_log_fn(c->g, "size %d", orig_size); + nvgpu_log_fn(g, "size %d", orig_size); /* * If free space in the end is less than requested, increase the size @@ -206,8 +191,8 @@ static int nvgpu_priv_cmdbuf_alloc_buf(struct nvgpu_channel *c, u32 orig_size, size = orig_size + (q->size - q->put); } - nvgpu_log_info(c->g, "ch %d: priv cmd queue get:put %d:%d", - c->chid, q->get, q->put); + nvgpu_log_info(g, "priv cmd queue get:put %d:%d", + q->get, q->put); nvgpu_assert(q->put < q->size); nvgpu_assert(q->get < q->size); @@ -250,15 +235,14 @@ static int nvgpu_priv_cmdbuf_alloc_buf(struct nvgpu_channel *c, u32 orig_size, nvgpu_smp_wmb(); e->valid = true; - nvgpu_log_fn(c->g, "done"); + nvgpu_log_fn(g, "done"); return 0; } -int nvgpu_priv_cmdbuf_alloc(struct nvgpu_channel *c, u32 size, +int nvgpu_priv_cmdbuf_alloc(struct priv_cmd_queue *q, u32 size, struct priv_cmd_entry **e) { - struct priv_cmd_queue *q = c->priv_cmd_q; u32 next_put = nvgpu_safe_add_u32(q->entry_put, 1U) % q->entries_len; struct priv_cmd_entry *entry; int err; @@ -268,7 +252,7 @@ int nvgpu_priv_cmdbuf_alloc(struct nvgpu_channel *c, u32 size, } entry = &q->entries[q->entry_put]; - err = nvgpu_priv_cmdbuf_alloc_buf(c, size, entry); + err = nvgpu_priv_cmdbuf_alloc_buf(q, size, entry); if (err != 0) { return err; } @@ -279,11 +263,9 @@ int nvgpu_priv_cmdbuf_alloc(struct nvgpu_channel *c, u32 size, return 0; } -void nvgpu_priv_cmdbuf_rollback(struct nvgpu_channel *ch, +void nvgpu_priv_cmdbuf_rollback(struct priv_cmd_queue *q, struct priv_cmd_entry *e) { - struct priv_cmd_queue *q = ch->priv_cmd_q; - nvgpu_assert(q->put < q->size); nvgpu_assert(q->size > 0U); nvgpu_assert(e->alloc_size <= q->size); @@ -297,18 +279,15 @@ void nvgpu_priv_cmdbuf_rollback(struct nvgpu_channel *ch, % q->entries_len; } -void nvgpu_priv_cmdbuf_free(struct nvgpu_channel *ch, - struct priv_cmd_entry *e) +void nvgpu_priv_cmdbuf_free(struct priv_cmd_queue *q, struct priv_cmd_entry *e) { - struct priv_cmd_queue *q = ch->priv_cmd_q; - struct gk20a *g = ch->g; + struct gk20a *g = q->vm->mm->g; if (e->valid) { /* read the entry's valid flag before reading its contents */ nvgpu_smp_rmb(); if ((q->get != e->off) && e->off != 0U) { - nvgpu_err(g, "requests out-of-order, ch=%d", - ch->chid); + nvgpu_err(g, "priv cmdbuf requests out-of-order"); } nvgpu_assert(q->size > 0U); q->get = nvgpu_safe_add_u32(e->off, e->size) & (q->size - 1U); diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c index 70dfb7366..9122ee3a2 100644 --- a/drivers/gpu/nvgpu/common/fifo/submit.c +++ b/drivers/gpu/nvgpu/common/fifo/submit.c @@ -159,7 +159,7 @@ clean_up_post_fence: job->post_fence = NULL; clean_up_wait_cmd: if (job->wait_cmd != NULL) { - nvgpu_priv_cmdbuf_rollback(c, job->wait_cmd); + nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->wait_cmd); } job->wait_cmd = NULL; clean_up_unlock: diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c index aea38adb1..3aa724199 100644 --- a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c +++ b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c @@ -148,7 +148,7 @@ static int channel_sync_semaphore_wait_fd( } wait_cmd_size = c->g->ops.sync.sema.get_wait_cmd_size(); - err = nvgpu_priv_cmdbuf_alloc(c, + err = nvgpu_priv_cmdbuf_alloc(c->priv_cmd_q, wait_cmd_size * num_fences, entry); if (err != 0) { goto cleanup; @@ -188,7 +188,7 @@ static int channel_sync_semaphore_incr_common( } incr_cmd_size = c->g->ops.sync.sema.get_incr_cmd_size(); - err = nvgpu_priv_cmdbuf_alloc(c, incr_cmd_size, incr_cmd); + err = nvgpu_priv_cmdbuf_alloc(c->priv_cmd_q, incr_cmd_size, incr_cmd); if (err != 0) { goto clean_up_sema; } @@ -218,7 +218,7 @@ clean_up_os_fence: os_fence.ops->drop_ref(&os_fence); } clean_up_cmdbuf: - nvgpu_priv_cmdbuf_rollback(c, *incr_cmd); + nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, *incr_cmd); clean_up_sema: nvgpu_semaphore_put(semaphore); return err; diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c index 4a9f42c43..29a72a130 100644 --- a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c +++ b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c @@ -77,7 +77,7 @@ static int channel_sync_syncpt_wait_raw(struct nvgpu_channel_sync_syncpt *s, return -EINVAL; } - err = nvgpu_priv_cmdbuf_alloc(c, + err = nvgpu_priv_cmdbuf_alloc(c->priv_cmd_q, c->g->ops.sync.syncpt.get_wait_cmd_size(), wait_cmd); if (err != 0) { @@ -135,7 +135,7 @@ static int channel_sync_syncpt_wait_fd(struct nvgpu_channel_sync *s, int fd, } wait_cmd_size = c->g->ops.sync.syncpt.get_wait_cmd_size(); - err = nvgpu_priv_cmdbuf_alloc(c, + err = nvgpu_priv_cmdbuf_alloc(c->priv_cmd_q, wait_cmd_size * num_fences, wait_cmd); if (err != 0) { goto cleanup; @@ -177,7 +177,7 @@ static int channel_sync_syncpt_incr_common(struct nvgpu_channel_sync *s, struct nvgpu_channel *c = sp->c; struct nvgpu_os_fence os_fence = {0}; - err = nvgpu_priv_cmdbuf_alloc(c, + err = nvgpu_priv_cmdbuf_alloc(c->priv_cmd_q, c->g->ops.sync.syncpt.get_incr_cmd_size(wfi_cmd), incr_cmd); if (err != 0) { @@ -241,7 +241,7 @@ static int channel_sync_syncpt_incr_common(struct nvgpu_channel_sync *s, return 0; clean_up_priv_cmd: - nvgpu_priv_cmdbuf_rollback(c, *incr_cmd); + nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, *incr_cmd); return err; } diff --git a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.h b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.h index b0bfe4fd4..b1260b748 100644 --- a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.h +++ b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.h @@ -27,6 +27,7 @@ struct gk20a; struct priv_cmd_entry; struct nvgpu_mem; +struct nvgpu_channel; #ifdef CONFIG_TEGRA_GK20A_NVHOST diff --git a/drivers/gpu/nvgpu/include/nvgpu/priv_cmdbuf.h b/drivers/gpu/nvgpu/include/nvgpu/priv_cmdbuf.h index 33fe6e4de..faa271681 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/priv_cmdbuf.h +++ b/drivers/gpu/nvgpu/include/nvgpu/priv_cmdbuf.h @@ -26,17 +26,19 @@ #include struct gk20a; -struct nvgpu_channel; +struct vm_gk20a; struct priv_cmd_entry; +struct priv_cmd_queue; -int nvgpu_priv_cmdbuf_queue_alloc(struct nvgpu_channel *ch, u32 num_in_flight); -void nvgpu_priv_cmdbuf_queue_free(struct nvgpu_channel *ch); +int nvgpu_priv_cmdbuf_queue_alloc(struct vm_gk20a *vm, + u32 job_count, struct priv_cmd_queue **queue); +void nvgpu_priv_cmdbuf_queue_free(struct priv_cmd_queue *q); -int nvgpu_priv_cmdbuf_alloc(struct nvgpu_channel *c, u32 size, +int nvgpu_priv_cmdbuf_alloc(struct priv_cmd_queue *q, u32 size, struct priv_cmd_entry **e); -void nvgpu_priv_cmdbuf_rollback(struct nvgpu_channel *ch, +void nvgpu_priv_cmdbuf_rollback(struct priv_cmd_queue *q, struct priv_cmd_entry *e); -void nvgpu_priv_cmdbuf_free(struct nvgpu_channel *ch, +void nvgpu_priv_cmdbuf_free(struct priv_cmd_queue *q, struct priv_cmd_entry *e); void nvgpu_priv_cmdbuf_append(struct gk20a *g, struct priv_cmd_entry *e,