diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c index da8dade64..f915f863e 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel.c +++ b/drivers/gpu/nvgpu/common/fifo/channel.c @@ -217,7 +217,7 @@ static void channel_kernelmode_deinit(struct nvgpu_channel *ch) #endif (void) memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); - nvgpu_free_priv_cmdbuf_queue(ch); + nvgpu_priv_cmdbuf_queue_free(ch); /* free pre-allocated resources, if applicable */ if (nvgpu_channel_is_prealloc_enabled(ch)) { @@ -375,7 +375,7 @@ static int channel_setup_kernelmode(struct nvgpu_channel *c, } } - err = nvgpu_alloc_priv_cmdbuf_queue(c, args->num_inflight_jobs); + err = nvgpu_priv_cmdbuf_queue_alloc(c, args->num_inflight_jobs); if (err != 0) { goto clean_up_prealloc; } @@ -388,7 +388,7 @@ static int channel_setup_kernelmode(struct nvgpu_channel *c, return 0; clean_up_priv_cmd: - nvgpu_free_priv_cmdbuf_queue(c); + nvgpu_priv_cmdbuf_queue_free(c); clean_up_prealloc: if (nvgpu_channel_is_deterministic(c) && args->num_inflight_jobs != 0U) { @@ -998,10 +998,10 @@ void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c, * Free the private command buffers (wait_cmd first and * then incr_cmd i.e. order of allocation) */ - nvgpu_channel_update_priv_cmd_q_and_free_entry(c, - job->wait_cmd); - nvgpu_channel_update_priv_cmd_q_and_free_entry(c, - job->incr_cmd); + if (job->wait_cmd != NULL) { + nvgpu_priv_cmdbuf_free(c, job->wait_cmd); + } + nvgpu_priv_cmdbuf_free(c, job->incr_cmd); /* * ensure all pending writes complete before freeing up the job. diff --git a/drivers/gpu/nvgpu/common/fifo/job.c b/drivers/gpu/nvgpu/common/fifo/job.c index 8a7d1aae6..0f5fe56d9 100644 --- a/drivers/gpu/nvgpu/common/fifo/job.c +++ b/drivers/gpu/nvgpu/common/fifo/job.c @@ -76,17 +76,8 @@ int nvgpu_channel_alloc_job(struct nvgpu_channel *c, void nvgpu_channel_free_job(struct nvgpu_channel *c, struct nvgpu_channel_job *job) { - /* - * In case of pre_allocated jobs, we need to clean out - * the job but maintain the pointers to the priv_cmd_entry, - * since they're inherently tied to the job node. - */ if (nvgpu_channel_is_prealloc_enabled(c)) { - struct priv_cmd_entry *wait_cmd = job->wait_cmd; - struct priv_cmd_entry *incr_cmd = job->incr_cmd; (void) memset(job, 0, sizeof(*job)); - job->wait_cmd = wait_cmd; - job->incr_cmd = incr_cmd; } else { nvgpu_kfree(c->g, job); } @@ -168,10 +159,8 @@ bool nvgpu_channel_joblist_is_empty(struct nvgpu_channel *c) int channel_prealloc_resources(struct nvgpu_channel *ch, u32 num_jobs) { #ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS - unsigned int i; int err; size_t size; - struct priv_cmd_entry *entries = NULL; if ((nvgpu_channel_is_prealloc_enabled(ch)) || (num_jobs == 0U)) { return -EINVAL; @@ -192,32 +181,10 @@ int channel_prealloc_resources(struct nvgpu_channel *ch, u32 num_jobs) goto clean_up; } - /* - * pre-allocate 2x priv_cmd_entry for each job up front. - * since vmalloc take in an unsigned long, we need - * to make sure we don't hit an overflow condition - */ - size = sizeof(struct priv_cmd_entry); - if (num_jobs <= U32_MAX / (size << 1U)) { - entries = nvgpu_vzalloc(ch->g, - ((unsigned long)num_jobs << 1UL) * - (unsigned long)size); - } - if (entries == NULL) { - err = -ENOMEM; - goto clean_up_joblist; - } - - for (i = 0; i < num_jobs; i++) { - ch->joblist.pre_alloc.jobs[i].wait_cmd = &entries[i]; - ch->joblist.pre_alloc.jobs[i].incr_cmd = - &entries[i + num_jobs]; - } - /* pre-allocate a fence pool */ err = nvgpu_fence_pool_alloc(ch, num_jobs); if (err != 0) { - goto clean_up_priv_cmd; + goto clean_up; } ch->joblist.pre_alloc.length = num_jobs; @@ -234,11 +201,8 @@ int channel_prealloc_resources(struct nvgpu_channel *ch, u32 num_jobs) return 0; -clean_up_priv_cmd: - nvgpu_vfree(ch->g, entries); -clean_up_joblist: - nvgpu_vfree(ch->g, ch->joblist.pre_alloc.jobs); clean_up: + nvgpu_vfree(ch->g, ch->joblist.pre_alloc.jobs); (void) memset(&ch->joblist.pre_alloc, 0, sizeof(ch->joblist.pre_alloc)); return err; #else diff --git a/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c b/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c index 5555c6b9e..b7eea852e 100644 --- a/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c +++ b/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c @@ -31,17 +31,24 @@ #include #include #include +#include struct priv_cmd_queue { - struct nvgpu_mem mem; - u32 size; /* num of entries in words */ - u32 put; /* put for priv cmd queue */ - u32 get; /* get for priv cmd queue */ + struct nvgpu_mem mem; /* pushbuf */ + u32 size; /* allocated length in words */ + u32 put; /* next entry will begin here */ + u32 get; /* next entry to free begins here */ + + /* an entry is a fragment of the pushbuf memory */ + struct priv_cmd_entry *entries; + u32 entries_len; /* allocated length */ + u32 entry_put; + u32 entry_get; }; /* allocate private cmd buffer queue. used for inserting commands before/after user submitted buffers. */ -int nvgpu_alloc_priv_cmdbuf_queue(struct nvgpu_channel *ch, +int nvgpu_priv_cmdbuf_queue_alloc(struct nvgpu_channel *ch, u32 num_in_flight) { struct gk20a *g = ch->g; @@ -69,7 +76,9 @@ int nvgpu_alloc_priv_cmdbuf_queue(struct nvgpu_channel *ch, * Compute the amount of priv_cmdbuf space we need. In general the * worst case is the kernel inserts both a semaphore pre-fence and * post-fence. Any sync-pt fences will take less memory so we can - * ignore them unless they're the only supported type. + * ignore them unless they're the only supported type. Jobs can also + * have more than one pre-fence but that's abnormal and we'll -EAGAIN + * if such jobs would fill the queue. * * A semaphore ACQ (fence-wait) is 8 words: semaphore_a, semaphore_b, * semaphore_c, and semaphore_d. A semaphore INCR (fence-get) will be @@ -95,21 +104,40 @@ int nvgpu_alloc_priv_cmdbuf_queue(struct nvgpu_channel *ch, */ if (num_in_flight == 0U) { /* round down to ensure space for all priv cmds */ - num_in_flight = ch->gpfifo.entry_num / 3; + num_in_flight = ch->gpfifo.entry_num / 3U; } size = num_in_flight * (wait_size + incr_size) * sizeof(u32); tmp_size = PAGE_ALIGN(roundup_pow_of_two(size)); - nvgpu_assert(tmp_size <= U32_MAX); + if (tmp_size > U32_MAX) { + return -ERANGE; + } size = (u32)tmp_size; q = nvgpu_kzalloc(g, sizeof(*q)); + if (q == NULL) { + return -ENOMEM; + } + + if (num_in_flight > U32_MAX / 2U) { + err = -ERANGE; + goto err_free_queue; + } + + q->entries_len = 2U * num_in_flight; + q->entries = nvgpu_vzalloc(g, + nvgpu_safe_mult_u64((u64)q->entries_len, + sizeof(*q->entries))); + if (q->entries == NULL) { + err = -ENOMEM; + goto err_free_queue; + } err = nvgpu_dma_alloc_map_sys(ch_vm, size, &q->mem); if (err != 0) { nvgpu_err(g, "%s: memory allocation failed", __func__); - goto err_free_buf; + goto err_free_entries; } tmp_size = q->mem.size / sizeof(u32); @@ -119,53 +147,62 @@ int nvgpu_alloc_priv_cmdbuf_queue(struct nvgpu_channel *ch, ch->priv_cmd_q = q; return 0; -err_free_buf: +err_free_entries: + nvgpu_vfree(g, q->entries); +err_free_queue: nvgpu_kfree(g, q); return err; } -void nvgpu_free_priv_cmdbuf_queue(struct nvgpu_channel *ch) +void nvgpu_priv_cmdbuf_queue_free(struct nvgpu_channel *ch) { struct vm_gk20a *ch_vm = ch->vm; struct priv_cmd_queue *q = ch->priv_cmd_q; + struct gk20a *g = ch->g; if (q == NULL) { return; } nvgpu_dma_unmap_free(ch_vm, &q->mem); - nvgpu_kfree(ch->g, q); + nvgpu_vfree(g, q->entries); + nvgpu_kfree(g, q); ch->priv_cmd_q = NULL; } /* allocate a cmd buffer with given size. size is number of u32 entries */ -int nvgpu_channel_alloc_priv_cmdbuf(struct nvgpu_channel *c, u32 orig_size, +static int nvgpu_priv_cmdbuf_alloc_buf(struct nvgpu_channel *c, u32 orig_size, struct priv_cmd_entry *e) { struct priv_cmd_queue *q = c->priv_cmd_q; - u32 free_count; u32 size = orig_size; + u32 free_count; nvgpu_log_fn(c->g, "size %d", orig_size); - if (e == NULL) { - nvgpu_err(c->g, - "ch %d: priv cmd entry is null", - c->chid); - return -EINVAL; - } - - /* if free space in the end is less than requested, increase the size - * to make the real allocated space start from beginning. */ - if (q->put + size > q->size) { + /* + * If free space in the end is less than requested, increase the size + * to make the real allocated space start from beginning. The hardware + * expects each cmdbuf to be contiguous in the dma space. + * + * This too small extra space in the end may happen because the + * requested wait and incr command buffers do not necessarily align + * with the whole buffer capacity. They don't always align because the + * buffer size is rounded to the next power of two and because not all + * jobs necessarily use exactly one wait command. + */ + if (nvgpu_safe_add_u32(q->put, size) > q->size) { size = orig_size + (q->size - q->put); } nvgpu_log_info(c->g, "ch %d: priv cmd queue get:put %d:%d", c->chid, q->get, q->put); - free_count = (q->size - (q->put - q->get) - 1U) % q->size; + nvgpu_assert(q->put < q->size); + nvgpu_assert(q->get < q->size); + nvgpu_assert(q->size > 0U); + free_count = (q->size - q->put + q->get - 1U) & (q->size - 1U); if (size > free_count) { return -EAGAIN; @@ -173,17 +210,22 @@ int nvgpu_channel_alloc_priv_cmdbuf(struct nvgpu_channel *c, u32 orig_size, e->fill_off = 0; e->size = orig_size; + e->alloc_size = size; e->mem = &q->mem; - /* if we have increased size to skip free space in the end, set put - to beginning of cmd buffer (0) + size */ + /* + * if we have increased size to skip free space in the end, set put + * to beginning of cmd buffer + size, as if the prev put was at + * position 0. + */ if (size != orig_size) { e->off = 0; e->gva = q->mem.gpu_va; q->put = orig_size; } else { e->off = q->put; - e->gva = q->mem.gpu_va + q->put * sizeof(u32); + e->gva = nvgpu_safe_add_u64(q->mem.gpu_va, + nvgpu_safe_mult_u64((u64)q->put, sizeof(u32))); q->put = (q->put + orig_size) & (q->size - 1U); } @@ -193,7 +235,7 @@ int nvgpu_channel_alloc_priv_cmdbuf(struct nvgpu_channel *c, u32 orig_size, /* * commit the previous writes before making the entry valid. * see the corresponding nvgpu_smp_rmb() in - * nvgpu_channel_update_priv_cmd_q_and_free_entry(). + * nvgpu_priv_cmdbuf_free(). */ nvgpu_smp_wmb(); @@ -203,30 +245,54 @@ int nvgpu_channel_alloc_priv_cmdbuf(struct nvgpu_channel *c, u32 orig_size, return 0; } -/* - * Don't call this to free an explicit cmd entry. - * It doesn't update priv_cmd_queue get/put. - */ -void nvgpu_channel_free_priv_cmd_entry(struct nvgpu_channel *c, - struct priv_cmd_entry *e) +int nvgpu_priv_cmdbuf_alloc(struct nvgpu_channel *c, u32 size, + struct priv_cmd_entry **e) { - if (nvgpu_channel_is_prealloc_enabled(c)) { - (void) memset(e, 0, sizeof(struct priv_cmd_entry)); - } else { - nvgpu_kfree(c->g, e); + struct priv_cmd_queue *q = c->priv_cmd_q; + u32 next_put = nvgpu_safe_add_u32(q->entry_put, 1U) % q->entries_len; + struct priv_cmd_entry *entry; + int err; + + if (next_put == q->entry_get) { + return -EAGAIN; } + entry = &q->entries[q->entry_put]; + + err = nvgpu_priv_cmdbuf_alloc_buf(c, size, entry); + if (err != 0) { + return err; + } + + q->entry_put = next_put; + *e = entry; + + return 0; } -void nvgpu_channel_update_priv_cmd_q_and_free_entry( - struct nvgpu_channel *ch, struct priv_cmd_entry *e) +void nvgpu_priv_cmdbuf_rollback(struct nvgpu_channel *ch, + struct priv_cmd_entry *e) +{ + struct priv_cmd_queue *q = ch->priv_cmd_q; + + nvgpu_assert(q->put < q->size); + nvgpu_assert(q->size > 0U); + nvgpu_assert(e->alloc_size <= q->size); + q->put = (q->put + q->size - e->alloc_size) & (q->size - 1U); + + (void)memset(e, 0, sizeof(*e)); + + nvgpu_assert(q->entry_put < q->entries_len); + nvgpu_assert(q->entries_len > 0U); + q->entry_put = (q->entry_put + q->entries_len - 1U) + % q->entries_len; +} + +void nvgpu_priv_cmdbuf_free(struct nvgpu_channel *ch, + struct priv_cmd_entry *e) { struct priv_cmd_queue *q = ch->priv_cmd_q; struct gk20a *g = ch->g; - if (e == NULL) { - return; - } - if (e->valid) { /* read the entry's valid flag before reading its contents */ nvgpu_smp_rmb(); @@ -234,10 +300,13 @@ void nvgpu_channel_update_priv_cmd_q_and_free_entry( nvgpu_err(g, "requests out-of-order, ch=%d", ch->chid); } - q->get = e->off + e->size; + nvgpu_assert(q->size > 0U); + q->get = nvgpu_safe_add_u32(e->off, e->size) & (q->size - 1U); + q->entry_get = nvgpu_safe_add_u32(q->entry_get, 1U) + % q->entries_len; } - nvgpu_channel_free_priv_cmd_entry(ch, e); + (void)memset(e, 0, sizeof(*e)); } void nvgpu_priv_cmdbuf_append(struct gk20a *g, struct priv_cmd_entry *e, diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c index 9d86adee6..70dfb7366 100644 --- a/drivers/gpu/nvgpu/common/fifo/submit.c +++ b/drivers/gpu/nvgpu/common/fifo/submit.c @@ -62,7 +62,6 @@ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c, int wait_fence_fd = -1; int err = 0; bool need_wfi = (flags & NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI) == 0U; - bool pre_alloc_enabled = nvgpu_channel_is_prealloc_enabled(c); struct nvgpu_channel_sync_syncpt *sync_syncpt = NULL; bool flag_fence_get = (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) != 0U; bool flag_sync_fence = (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) != 0U; @@ -74,7 +73,7 @@ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c, c->sync = nvgpu_channel_sync_create(c); if (c->sync == NULL) { err = -ENOMEM; - goto fail; + goto clean_up_unlock; } new_sync_created = true; } @@ -84,7 +83,7 @@ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c, if ((g->ops.channel.set_syncpt != NULL) && new_sync_created) { err = g->ops.channel.set_syncpt(c); if (err != 0) { - goto fail; + goto clean_up_unlock; } } @@ -96,40 +95,27 @@ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c, u32 max_wait_cmds = nvgpu_channel_is_deterministic(c) ? 1U : 0U; - if (!pre_alloc_enabled) { - job->wait_cmd = nvgpu_kzalloc(g, - sizeof(struct priv_cmd_entry)); - } - - if (job->wait_cmd == NULL) { - err = -ENOMEM; - goto fail; - } - if (flag_sync_fence) { nvgpu_assert(fence->id <= (u32)INT_MAX); wait_fence_fd = (int)fence->id; err = nvgpu_channel_sync_wait_fence_fd(c->sync, - wait_fence_fd, job->wait_cmd, max_wait_cmds); + wait_fence_fd, &job->wait_cmd, max_wait_cmds); } else { sync_syncpt = nvgpu_channel_sync_to_syncpt(c->sync); if (sync_syncpt != NULL) { err = nvgpu_channel_sync_wait_syncpt( sync_syncpt, fence->id, - fence->value, job->wait_cmd); + fence->value, &job->wait_cmd); } else { err = -EINVAL; } } if (err != 0) { - goto clean_up_wait_cmd; + goto clean_up_unlock; } - if (job->wait_cmd->valid) { - /* not expired yet */ - *wait_cmd = job->wait_cmd; - } + *wait_cmd = job->wait_cmd; } if (flag_fence_get && flag_sync_fence) { @@ -146,29 +132,21 @@ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c, err = -ENOMEM; goto clean_up_wait_cmd; } - if (!pre_alloc_enabled) { - job->incr_cmd = nvgpu_kzalloc(g, sizeof(struct priv_cmd_entry)); - } - - if (job->incr_cmd == NULL) { - err = -ENOMEM; - goto clean_up_post_fence; - } if (flag_fence_get) { err = nvgpu_channel_sync_incr_user(c->sync, - job->incr_cmd, job->post_fence, need_wfi, + &job->incr_cmd, job->post_fence, need_wfi, need_sync_fence, register_irq); } else { err = nvgpu_channel_sync_incr(c->sync, - job->incr_cmd, job->post_fence, need_sync_fence, + &job->incr_cmd, job->post_fence, need_sync_fence, register_irq); } if (err == 0) { *incr_cmd = job->incr_cmd; *post_fence = job->post_fence; } else { - goto clean_up_incr_cmd; + goto clean_up_post_fence; } if (g->aggressive_sync_destroy_thresh != 0U) { @@ -176,22 +154,15 @@ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c, } return 0; -clean_up_incr_cmd: - nvgpu_channel_free_priv_cmd_entry(c, job->incr_cmd); - if (!pre_alloc_enabled) { - job->incr_cmd = NULL; - } clean_up_post_fence: nvgpu_fence_put(job->post_fence); job->post_fence = NULL; clean_up_wait_cmd: if (job->wait_cmd != NULL) { - nvgpu_channel_free_priv_cmd_entry(c, job->wait_cmd); + nvgpu_priv_cmdbuf_rollback(c, job->wait_cmd); } - if (!pre_alloc_enabled) { - job->wait_cmd = NULL; - } -fail: + job->wait_cmd = NULL; +clean_up_unlock: if (g->aggressive_sync_destroy_thresh != 0U) { nvgpu_mutex_release(&c->sync_lock); } diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync.c b/drivers/gpu/nvgpu/common/sync/channel_sync.c index c773f7469..0d49052d1 100644 --- a/drivers/gpu/nvgpu/common/sync/channel_sync.c +++ b/drivers/gpu/nvgpu/common/sync/channel_sync.c @@ -58,21 +58,21 @@ bool nvgpu_channel_sync_needs_os_fence_framework(struct gk20a *g) } int nvgpu_channel_sync_wait_fence_fd(struct nvgpu_channel_sync *s, int fd, - struct priv_cmd_entry *entry, u32 max_wait_cmds) + struct priv_cmd_entry **entry, u32 max_wait_cmds) { return s->ops->wait_fence_fd(s, fd, entry, max_wait_cmds); } int nvgpu_channel_sync_incr(struct nvgpu_channel_sync *s, - struct priv_cmd_entry *entry, struct nvgpu_fence_type *fence, + struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence, bool need_sync_fence, bool register_irq) { return s->ops->incr(s, entry, fence, need_sync_fence, register_irq); } int nvgpu_channel_sync_incr_user(struct nvgpu_channel_sync *s, - struct priv_cmd_entry *entry, struct nvgpu_fence_type *fence, bool wfi, - bool need_sync_fence, bool register_irq) + struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence, + bool wfi, bool need_sync_fence, bool register_irq) { return s->ops->incr_user(s, entry, fence, wfi, need_sync_fence, register_irq); diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h b/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h index 69f8bbcfc..77966bcdb 100644 --- a/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h +++ b/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h @@ -54,19 +54,19 @@ struct nvgpu_channel_sync { */ struct nvgpu_channel_sync_ops { int (*wait_fence_raw)(struct nvgpu_channel_sync *s, u32 id, u32 thresh, - struct priv_cmd_entry *entry); + struct priv_cmd_entry **entry); int (*wait_fence_fd)(struct nvgpu_channel_sync *s, int fd, - struct priv_cmd_entry *entry, u32 max_wait_cmds); + struct priv_cmd_entry **entry, u32 max_wait_cmds); int (*incr)(struct nvgpu_channel_sync *s, - struct priv_cmd_entry *entry, + struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence, bool need_sync_fence, bool register_irq); int (*incr_user)(struct nvgpu_channel_sync *s, - struct priv_cmd_entry *entry, + struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence, bool wfi, bool need_sync_fence, diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c index ad8e6df0a..0a8eeee62 100644 --- a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c +++ b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c @@ -114,7 +114,7 @@ static void channel_sync_semaphore_gen_wait_cmd(struct nvgpu_channel *c, static int channel_sync_semaphore_wait_fd( struct nvgpu_channel_sync *s, int fd, - struct priv_cmd_entry *entry, u32 max_wait_cmds) + struct priv_cmd_entry **entry, u32 max_wait_cmds) { struct nvgpu_channel_sync_semaphore *sema = nvgpu_channel_sync_semaphore_from_base(s); @@ -148,17 +148,16 @@ static int channel_sync_semaphore_wait_fd( } wait_cmd_size = c->g->ops.sync.sema.get_wait_cmd_size(); - err = nvgpu_channel_alloc_priv_cmdbuf(c, + err = nvgpu_priv_cmdbuf_alloc(c, wait_cmd_size * num_fences, entry); if (err != 0) { - nvgpu_err(c->g, "not enough priv cmd buffer space"); goto cleanup; } for (i = 0; i < num_fences; i++) { nvgpu_os_fence_sema_extract_nth_semaphore( &os_fence_sema, i, &semaphore); - channel_sync_semaphore_gen_wait_cmd(c, semaphore, entry, + channel_sync_semaphore_gen_wait_cmd(c, semaphore, *entry, wait_cmd_size); } @@ -169,7 +168,7 @@ cleanup: static int channel_sync_semaphore_incr_common( struct nvgpu_channel_sync *s, bool wfi_cmd, - struct priv_cmd_entry *incr_cmd, + struct priv_cmd_entry **incr_cmd, struct nvgpu_fence_type *fence, bool need_sync_fence) { @@ -189,39 +188,37 @@ static int channel_sync_semaphore_incr_common( } incr_cmd_size = c->g->ops.sync.sema.get_incr_cmd_size(); - err = nvgpu_channel_alloc_priv_cmdbuf(c, incr_cmd_size, incr_cmd); + err = nvgpu_priv_cmdbuf_alloc(c, incr_cmd_size, incr_cmd); if (err != 0) { - nvgpu_err(c->g, - "not enough priv cmd buffer space"); goto clean_up_sema; } /* Release the completion semaphore. */ - add_sema_incr_cmd(c->g, c, semaphore, incr_cmd, wfi_cmd); + add_sema_incr_cmd(c->g, c, semaphore, *incr_cmd, wfi_cmd); if (need_sync_fence) { - err = nvgpu_os_fence_sema_create(&os_fence, c, - semaphore); + err = nvgpu_os_fence_sema_create(&os_fence, c, semaphore); if (err != 0) { - goto clean_up_sema; + goto clean_up_cmdbuf; } } - err = nvgpu_fence_from_semaphore(fence, - semaphore, - &c->semaphore_wq, - os_fence); + err = nvgpu_fence_from_semaphore(fence, semaphore, &c->semaphore_wq, + os_fence); if (err != 0) { - if (nvgpu_os_fence_is_initialized(&os_fence)) { - os_fence.ops->drop_ref(&os_fence); - } - goto clean_up_sema; + goto clean_up_os_fence; } return 0; +clean_up_os_fence: + if (nvgpu_os_fence_is_initialized(&os_fence)) { + os_fence.ops->drop_ref(&os_fence); + } +clean_up_cmdbuf: + nvgpu_priv_cmdbuf_rollback(c, *incr_cmd); clean_up_sema: nvgpu_semaphore_put(semaphore); return err; @@ -229,7 +226,7 @@ clean_up_sema: static int channel_sync_semaphore_incr( struct nvgpu_channel_sync *s, - struct priv_cmd_entry *entry, + struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence, bool need_sync_fence, bool register_irq) @@ -243,7 +240,7 @@ static int channel_sync_semaphore_incr( static int channel_sync_semaphore_incr_user( struct nvgpu_channel_sync *s, - struct priv_cmd_entry *entry, + struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence, bool wfi, bool need_sync_fence, diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c index 87e8bb702..4a9f42c43 100644 --- a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c +++ b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c @@ -67,7 +67,7 @@ static void channel_sync_syncpt_gen_wait_cmd(struct nvgpu_channel *c, } static int channel_sync_syncpt_wait_raw(struct nvgpu_channel_sync_syncpt *s, - u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd) + u32 id, u32 thresh, struct priv_cmd_entry **wait_cmd) { struct nvgpu_channel *c = s->c; int err = 0; @@ -77,22 +77,21 @@ static int channel_sync_syncpt_wait_raw(struct nvgpu_channel_sync_syncpt *s, return -EINVAL; } - err = nvgpu_channel_alloc_priv_cmdbuf(c, + err = nvgpu_priv_cmdbuf_alloc(c, c->g->ops.sync.syncpt.get_wait_cmd_size(), wait_cmd); if (err != 0) { - nvgpu_err(c->g, "not enough priv cmd buffer space"); return err; } channel_sync_syncpt_gen_wait_cmd(c, id, thresh, - wait_cmd, wait_cmd_size); + *wait_cmd, wait_cmd_size); return 0; } static int channel_sync_syncpt_wait_fd(struct nvgpu_channel_sync *s, int fd, - struct priv_cmd_entry *wait_cmd, u32 max_wait_cmds) + struct priv_cmd_entry **wait_cmd, u32 max_wait_cmds) { struct nvgpu_os_fence os_fence = {0}; struct nvgpu_os_fence_syncpt os_fence_syncpt = {0}; @@ -136,11 +135,9 @@ static int channel_sync_syncpt_wait_fd(struct nvgpu_channel_sync *s, int fd, } wait_cmd_size = c->g->ops.sync.syncpt.get_wait_cmd_size(); - err = nvgpu_channel_alloc_priv_cmdbuf(c, + err = nvgpu_priv_cmdbuf_alloc(c, wait_cmd_size * num_fences, wait_cmd); if (err != 0) { - nvgpu_err(c->g, "not enough priv cmd buffer space"); - err = -EINVAL; goto cleanup; } @@ -148,7 +145,7 @@ static int channel_sync_syncpt_wait_fd(struct nvgpu_channel_sync *s, int fd, nvgpu_os_fence_syncpt_extract_nth_syncpt( &os_fence_syncpt, i, &syncpt_id, &syncpt_thresh); channel_sync_syncpt_gen_wait_cmd(c, syncpt_id, - syncpt_thresh, wait_cmd, wait_cmd_size); + syncpt_thresh, *wait_cmd, wait_cmd_size); } cleanup: @@ -169,7 +166,7 @@ static void channel_sync_syncpt_update(void *priv, int nr_completed) static int channel_sync_syncpt_incr_common(struct nvgpu_channel_sync *s, bool wfi_cmd, bool register_irq, - struct priv_cmd_entry *incr_cmd, + struct priv_cmd_entry **incr_cmd, struct nvgpu_fence_type *fence, bool need_sync_fence) { @@ -180,7 +177,7 @@ static int channel_sync_syncpt_incr_common(struct nvgpu_channel_sync *s, struct nvgpu_channel *c = sp->c; struct nvgpu_os_fence os_fence = {0}; - err = nvgpu_channel_alloc_priv_cmdbuf(c, + err = nvgpu_priv_cmdbuf_alloc(c, c->g->ops.sync.syncpt.get_incr_cmd_size(wfi_cmd), incr_cmd); if (err != 0) { @@ -189,7 +186,7 @@ static int channel_sync_syncpt_incr_common(struct nvgpu_channel_sync *s, nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx", sp->id, sp->syncpt_buf.gpu_va); - c->g->ops.sync.syncpt.add_incr_cmd(c->g, incr_cmd, + c->g->ops.sync.syncpt.add_incr_cmd(c->g, *incr_cmd, sp->id, sp->syncpt_buf.gpu_va, wfi_cmd); thresh = nvgpu_nvhost_syncpt_incr_max_ext(sp->nvhost, sp->id, @@ -244,12 +241,12 @@ static int channel_sync_syncpt_incr_common(struct nvgpu_channel_sync *s, return 0; clean_up_priv_cmd: - nvgpu_channel_update_priv_cmd_q_and_free_entry(c, incr_cmd); + nvgpu_priv_cmdbuf_rollback(c, *incr_cmd); return err; } static int channel_sync_syncpt_incr(struct nvgpu_channel_sync *s, - struct priv_cmd_entry *entry, + struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence, bool need_sync_fence, bool register_irq) @@ -263,7 +260,7 @@ static int channel_sync_syncpt_incr(struct nvgpu_channel_sync *s, } static int channel_sync_syncpt_incr_user(struct nvgpu_channel_sync *s, - struct priv_cmd_entry *entry, + struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence, bool wfi, bool need_sync_fence, @@ -278,7 +275,7 @@ static int channel_sync_syncpt_incr_user(struct nvgpu_channel_sync *s, } int nvgpu_channel_sync_wait_syncpt(struct nvgpu_channel_sync_syncpt *s, - u32 id, u32 thresh, struct priv_cmd_entry *entry) + u32 id, u32 thresh, struct priv_cmd_entry **entry) { return channel_sync_syncpt_wait_raw(s, id, thresh, entry); } diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h b/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h index 2be1e780d..e19d36e39 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h @@ -43,7 +43,7 @@ struct gk20a; * Returns a gpu cmdbuf that performs the wait when executed */ int nvgpu_channel_sync_wait_fence_fd(struct nvgpu_channel_sync *s, int fd, - struct priv_cmd_entry *entry, u32 max_wait_cmds); + struct priv_cmd_entry **entry, u32 max_wait_cmds); /* * Increment syncpoint/semaphore. @@ -52,7 +52,7 @@ int nvgpu_channel_sync_wait_fence_fd(struct nvgpu_channel_sync *s, int fd, * - a fence that can be passed to wait_cpu() and is_expired(). */ int nvgpu_channel_sync_incr(struct nvgpu_channel_sync *s, - struct priv_cmd_entry *entry, struct nvgpu_fence_type *fence, + struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence, bool need_sync_fence, bool register_irq); /* @@ -64,8 +64,8 @@ int nvgpu_channel_sync_incr(struct nvgpu_channel_sync *s, * - a nvgpu_fence_type that signals when the incr has happened. */ int nvgpu_channel_sync_incr_user(struct nvgpu_channel_sync *s, - struct priv_cmd_entry *entry, struct nvgpu_fence_type *fence, bool wfi, - bool need_sync_fence, bool register_irq); + struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence, + bool wfi, bool need_sync_fence, bool register_irq); /* * Reset the channel syncpoint/semaphore. Syncpoint increments generally * wrap around the range of integer values. Current max value encompasses diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel_sync_syncpt.h b/drivers/gpu/nvgpu/include/nvgpu/channel_sync_syncpt.h index 9d3bd904c..b24809072 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel_sync_syncpt.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel_sync_syncpt.h @@ -53,7 +53,7 @@ u32 nvgpu_channel_sync_get_syncpt_id(struct nvgpu_channel_sync_syncpt *s); * Returns a gpu cmdbuf that performs the wait when executed. */ int nvgpu_channel_sync_wait_syncpt(struct nvgpu_channel_sync_syncpt *s, - u32 id, u32 thresh, struct priv_cmd_entry *entry); + u32 id, u32 thresh, struct priv_cmd_entry **entry); #endif /** @@ -97,7 +97,7 @@ static inline u64 nvgpu_channel_sync_get_syncpt_address( static inline int nvgpu_channel_sync_wait_syncpt( struct nvgpu_channel_sync_syncpt *s, - u32 id, u32 thresh, struct priv_cmd_entry *entry) + u32 id, u32 thresh, struct priv_cmd_entry **entry) { return -EINVAL; } diff --git a/drivers/gpu/nvgpu/include/nvgpu/priv_cmdbuf.h b/drivers/gpu/nvgpu/include/nvgpu/priv_cmdbuf.h index f06954dac..9e26020ee 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/priv_cmdbuf.h +++ b/drivers/gpu/nvgpu/include/nvgpu/priv_cmdbuf.h @@ -35,18 +35,18 @@ struct priv_cmd_entry { u32 off; /* offset in mem, in u32 entries */ u32 fill_off; /* write offset from off, in u32 entries */ u64 gva; - u32 get; /* start of entry in queue */ u32 size; /* in words */ + u32 alloc_size; }; -int nvgpu_alloc_priv_cmdbuf_queue(struct nvgpu_channel *ch, u32 num_in_flight); -void nvgpu_free_priv_cmdbuf_queue(struct nvgpu_channel *ch); +int nvgpu_priv_cmdbuf_queue_alloc(struct nvgpu_channel *ch, u32 num_in_flight); +void nvgpu_priv_cmdbuf_queue_free(struct nvgpu_channel *ch); -int nvgpu_channel_alloc_priv_cmdbuf(struct nvgpu_channel *c, u32 orig_size, +int nvgpu_priv_cmdbuf_alloc(struct nvgpu_channel *c, u32 size, + struct priv_cmd_entry **e); +void nvgpu_priv_cmdbuf_rollback(struct nvgpu_channel *ch, struct priv_cmd_entry *e); -void nvgpu_channel_free_priv_cmd_entry(struct nvgpu_channel *c, - struct priv_cmd_entry *e); -void nvgpu_channel_update_priv_cmd_q_and_free_entry(struct nvgpu_channel *ch, +void nvgpu_priv_cmdbuf_free(struct nvgpu_channel *ch, struct priv_cmd_entry *e); void nvgpu_priv_cmdbuf_append(struct gk20a *g, struct priv_cmd_entry *e,