diff --git a/drivers/gpu/nvgpu/common/fence/fence.c b/drivers/gpu/nvgpu/common/fence/fence.c index 0c437f54b..73629dc3c 100644 --- a/drivers/gpu/nvgpu/common/fence/fence.c +++ b/drivers/gpu/nvgpu/common/fence/fence.c @@ -41,24 +41,21 @@ static struct nvgpu_fence_type *nvgpu_fence_from_ref(struct nvgpu_ref *ref) static void nvgpu_fence_free(struct nvgpu_ref *ref) { struct nvgpu_fence_type *f = nvgpu_fence_from_ref(ref); - struct gk20a *g = f->g; if (nvgpu_os_fence_is_initialized(&f->os_fence)) { f->os_fence.ops->drop_ref(&f->os_fence); } + #ifdef CONFIG_NVGPU_SW_SEMAPHORE if (f->semaphore != NULL) { nvgpu_semaphore_put(f->semaphore); } #endif - if (f->allocator != NULL) { - if (nvgpu_alloc_initialized(f->allocator)) { - nvgpu_free(f->allocator, (u64)(uintptr_t)f); - } - } else { - nvgpu_kfree(g, f); - } + /* the allocator must outlive the fences */ + BUG_ON(!nvgpu_alloc_initialized(f->allocator)); + + nvgpu_free(f->allocator, (u64)(uintptr_t)f); } void nvgpu_fence_put(struct nvgpu_fence_type *f) @@ -146,7 +143,8 @@ void nvgpu_fence_pool_free(struct nvgpu_channel *ch) { if (nvgpu_alloc_initialized(&ch->fence_allocator)) { struct nvgpu_fence_type *fence_pool; - fence_pool = (struct nvgpu_fence_type *)(uintptr_t) + + fence_pool = (struct nvgpu_fence_type *)(uintptr_t) nvgpu_alloc_base(&ch->fence_allocator); nvgpu_alloc_destroy(&ch->fence_allocator); nvgpu_vfree(ch->g, fence_pool); @@ -158,23 +156,16 @@ struct nvgpu_fence_type *nvgpu_fence_alloc(struct nvgpu_channel *ch) { struct nvgpu_fence_type *fence = NULL; - if (nvgpu_channel_is_prealloc_enabled(ch)) { - if (nvgpu_alloc_initialized(&ch->fence_allocator)) { - fence = (struct nvgpu_fence_type *)(uintptr_t) - nvgpu_alloc(&ch->fence_allocator, - sizeof(struct nvgpu_fence_type)); - - /* clear the node and reset the allocator pointer */ - if (fence != NULL) { - (void) memset(fence, 0, sizeof(*fence)); - fence->allocator = &ch->fence_allocator; - } - } - } else { - fence = nvgpu_kzalloc(ch->g, sizeof(struct nvgpu_fence_type)); + if (nvgpu_alloc_initialized(&ch->fence_allocator)) { + fence = (struct nvgpu_fence_type *)(uintptr_t) + nvgpu_alloc(&ch->fence_allocator, + sizeof(struct nvgpu_fence_type)); } if (fence != NULL) { + (void) memset(fence, 0, sizeof(*fence)); + fence->allocator = &ch->fence_allocator; + nvgpu_ref_init(&fence->ref); fence->g = ch->g; } diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c index 50a32de01..e963c4fc4 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel.c +++ b/drivers/gpu/nvgpu/common/fifo/channel.c @@ -224,10 +224,7 @@ static void channel_kernelmode_deinit(struct nvgpu_channel *ch) ch->priv_cmd_q = NULL; } - /* free pre-allocated resources, if applicable */ - if (nvgpu_channel_is_prealloc_enabled(ch)) { - channel_free_prealloc_resources(ch); - } + channel_free_prealloc_resources(ch); /* sync must be destroyed before releasing channel vm */ nvgpu_mutex_acquire(&ch->sync_lock); @@ -238,18 +235,6 @@ static void channel_kernelmode_deinit(struct nvgpu_channel *ch) nvgpu_mutex_release(&ch->sync_lock); } -bool nvgpu_channel_is_prealloc_enabled(struct nvgpu_channel *c) -{ -#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS - bool pre_alloc_enabled = c->joblist.pre_alloc.enabled; - - nvgpu_smp_rmb(); - return pre_alloc_enabled; -#else - return false; -#endif -} - #ifdef CONFIG_TEGRA_GK20A_NVHOST int nvgpu_channel_set_syncpt(struct nvgpu_channel *ch) { @@ -311,7 +296,7 @@ static int channel_setup_kernelmode(struct nvgpu_channel *c, { u32 gpfifo_size, gpfifo_entry_size; u64 gpfifo_gpu_va; - u32 priv_cmd_jobs; + u32 job_count; int err = 0; struct gk20a *g = c->g; @@ -374,15 +359,6 @@ static int channel_setup_kernelmode(struct nvgpu_channel *c, goto clean_up_sync; } - if (nvgpu_channel_is_deterministic(c) && - args->num_inflight_jobs != 0U) { - err = channel_prealloc_resources(c, - args->num_inflight_jobs); - if (err != 0) { - goto clean_up_sync; - } - } - /* * Allocate priv cmdbuf space for pre and post fences. If the inflight * job count isn't specified, we base it on the gpfifo count. We @@ -391,12 +367,25 @@ static int channel_setup_kernelmode(struct nvgpu_channel *c, * wait entries, and the final third to incr entries. There will be one * pair of acq and incr commands for each job. */ - priv_cmd_jobs = args->num_inflight_jobs; - if (priv_cmd_jobs == 0U) { - priv_cmd_jobs = c->gpfifo.entry_num / 3U; + job_count = args->num_inflight_jobs; + if (job_count == 0U) { + /* + * Round up so the allocation behaves nicely with a very small + * gpfifo, and to be able to use all slots when the entry count + * would be one too small for both wait and incr commands. An + * increment would then still just fit. + * + * gpfifo_size is required to be at most 2^31 earlier. + */ + job_count = nvgpu_safe_add_u32(gpfifo_size, 2U) / 3U; } - err = nvgpu_priv_cmdbuf_queue_alloc(c->vm, priv_cmd_jobs, &c->priv_cmd_q); + err = channel_prealloc_resources(c, job_count); + if (err != 0) { + goto clean_up_sync; + } + + err = nvgpu_priv_cmdbuf_queue_alloc(c->vm, job_count, &c->priv_cmd_q); if (err != 0) { goto clean_up_prealloc; } @@ -412,10 +401,7 @@ clean_up_priv_cmd: nvgpu_priv_cmdbuf_queue_free(c->priv_cmd_q); c->priv_cmd_q = NULL; clean_up_prealloc: - if (nvgpu_channel_is_deterministic(c) && - args->num_inflight_jobs != 0U) { - channel_free_prealloc_resources(c); - } + channel_free_prealloc_resources(c); clean_up_sync: if (c->sync != NULL) { nvgpu_channel_sync_destroy(c->sync); @@ -626,7 +612,6 @@ int nvgpu_channel_add_job(struct nvgpu_channel *c, struct nvgpu_mapped_buf **mapped_buffers = NULL; int err = 0; u32 num_mapped_buffers = 0; - bool pre_alloc_enabled = nvgpu_channel_is_prealloc_enabled(c); if (!skip_buffer_refcounting) { err = nvgpu_vm_get_buffers(vm, &mapped_buffers, @@ -642,21 +627,9 @@ int nvgpu_channel_add_job(struct nvgpu_channel *c, nvgpu_channel_wdt_start(c->wdt, c); - if (!pre_alloc_enabled) { - nvgpu_channel_joblist_lock(c); - } - - /* - * ensure all pending write complete before adding to the list. - * see corresponding nvgpu_smp_rmb in - * nvgpu_channel_clean_up_jobs() - */ - nvgpu_smp_wmb(); + nvgpu_channel_joblist_lock(c); channel_joblist_add(c, job); - - if (!pre_alloc_enabled) { - nvgpu_channel_joblist_unlock(c); - } + nvgpu_channel_joblist_unlock(c); } else { err = -ETIMEDOUT; goto err_put_buffers; @@ -719,13 +692,6 @@ void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c, nvgpu_channel_joblist_unlock(c); break; } - - /* - * ensure that all subsequent reads occur after checking - * that we have a valid node. see corresponding nvgpu_smp_wmb in - * nvgpu_channel_add_job(). - */ - nvgpu_smp_rmb(); job = channel_joblist_peek(c); nvgpu_channel_joblist_unlock(c); @@ -769,15 +735,6 @@ void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c, job->num_mapped_buffers); } - /* - * Remove job from channel's job list before we close the - * fences, to prevent other callers (nvgpu_channel_abort) from - * trying to dereference post_fence when it no longer exists. - */ - nvgpu_channel_joblist_lock(c); - channel_joblist_delete(c, job); - nvgpu_channel_joblist_unlock(c); - /* Close the fence (this will unref the semaphore and release * it to the pool). */ nvgpu_fence_put(job->post_fence); @@ -791,13 +748,12 @@ void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c, } nvgpu_priv_cmdbuf_free(c->priv_cmd_q, job->incr_cmd); - /* - * ensure all pending writes complete before freeing up the job. - * see corresponding nvgpu_smp_rmb in nvgpu_channel_alloc_job(). - */ - nvgpu_smp_wmb(); - nvgpu_channel_free_job(c, job); + + nvgpu_channel_joblist_lock(c); + channel_joblist_delete(c, job); + nvgpu_channel_joblist_unlock(c); + job_finished = true; /* @@ -1584,6 +1540,16 @@ static int channel_setup_bind_prechecks(struct nvgpu_channel *c, goto fail; } + /* + * The gpfifo ring buffer is empty when get == put and it's full when + * get == put + 1. Just one entry wouldn't make sense. + */ + if (args->num_gpfifo_entries < 2U) { + nvgpu_err(g, "gpfifo has no space for any jobs"); + err = -EINVAL; + goto fail; + } + /* an address space needs to have been bound at this point. */ if (!nvgpu_channel_as_bound(c)) { nvgpu_err(g, @@ -1923,8 +1889,6 @@ int nvgpu_channel_init_support(struct gk20a *g, u32 chid) nvgpu_spinlock_init(&c->ref_actions_lock); #endif #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT - nvgpu_spinlock_init(&c->joblist.dynamic.lock); - nvgpu_init_list_node(&c->joblist.dynamic.jobs); nvgpu_init_list_node(&c->worker_item); nvgpu_mutex_init(&c->joblist.cleanup_lock); diff --git a/drivers/gpu/nvgpu/common/fifo/job.c b/drivers/gpu/nvgpu/common/fifo/job.c index 2035aa682..f74ff29e2 100644 --- a/drivers/gpu/nvgpu/common/fifo/job.c +++ b/drivers/gpu/nvgpu/common/fifo/job.c @@ -40,82 +40,44 @@ channel_gk20a_job_from_list(struct nvgpu_list_node *node) int nvgpu_channel_alloc_job(struct nvgpu_channel *c, struct nvgpu_channel_job **job_out) { - int err = 0; + unsigned int put = c->joblist.pre_alloc.put; + unsigned int get = c->joblist.pre_alloc.get; + unsigned int next = (put + 1) % c->joblist.pre_alloc.length; + bool full = next == get; - if (nvgpu_channel_is_prealloc_enabled(c)) { - unsigned int put = c->joblist.pre_alloc.put; - unsigned int get = c->joblist.pre_alloc.get; - unsigned int next = (put + 1) % c->joblist.pre_alloc.length; - bool full = next == get; - - /* - * ensure all subsequent reads happen after reading get. - * see corresponding nvgpu_smp_wmb in - * nvgpu_channel_clean_up_jobs() - */ - nvgpu_smp_rmb(); - - if (!full) { - *job_out = &c->joblist.pre_alloc.jobs[put]; - } else { - nvgpu_warn(c->g, - "out of job ringbuffer space"); - err = -EAGAIN; - } - } else { - *job_out = nvgpu_kzalloc(c->g, - sizeof(struct nvgpu_channel_job)); - if (*job_out == NULL) { - err = -ENOMEM; - } + if (full) { + return -EAGAIN; } - return err; + *job_out = &c->joblist.pre_alloc.jobs[put]; + + return 0; } void nvgpu_channel_free_job(struct nvgpu_channel *c, struct nvgpu_channel_job *job) { - if (nvgpu_channel_is_prealloc_enabled(c)) { - (void) memset(job, 0, sizeof(*job)); - } else { - nvgpu_kfree(c->g, job); - } + (void) memset(job, 0, sizeof(*job)); } void nvgpu_channel_joblist_lock(struct nvgpu_channel *c) { - if (nvgpu_channel_is_prealloc_enabled(c)) { - nvgpu_mutex_acquire(&c->joblist.pre_alloc.read_lock); - } else { - nvgpu_spinlock_acquire(&c->joblist.dynamic.lock); - } + nvgpu_mutex_acquire(&c->joblist.pre_alloc.read_lock); } void nvgpu_channel_joblist_unlock(struct nvgpu_channel *c) { - if (nvgpu_channel_is_prealloc_enabled(c)) { - nvgpu_mutex_release(&c->joblist.pre_alloc.read_lock); - } else { - nvgpu_spinlock_release(&c->joblist.dynamic.lock); - } + nvgpu_mutex_release(&c->joblist.pre_alloc.read_lock); } struct nvgpu_channel_job *channel_joblist_peek(struct nvgpu_channel *c) { - u32 get; struct nvgpu_channel_job *job = NULL; - if (nvgpu_channel_is_prealloc_enabled(c)) { - if (!nvgpu_channel_joblist_is_empty(c)) { - get = c->joblist.pre_alloc.get; - job = &c->joblist.pre_alloc.jobs[get]; - } - } else { - if (!nvgpu_list_empty(&c->joblist.dynamic.jobs)) { - job = nvgpu_list_first_entry(&c->joblist.dynamic.jobs, - channel_gk20a_job, list); - } + if (!nvgpu_channel_joblist_is_empty(c)) { + unsigned int get = c->joblist.pre_alloc.get; + + job = &c->joblist.pre_alloc.jobs[get]; } return job; @@ -124,48 +86,27 @@ struct nvgpu_channel_job *channel_joblist_peek(struct nvgpu_channel *c) void channel_joblist_add(struct nvgpu_channel *c, struct nvgpu_channel_job *job) { - if (nvgpu_channel_is_prealloc_enabled(c)) { - c->joblist.pre_alloc.put = (c->joblist.pre_alloc.put + 1U) % - (c->joblist.pre_alloc.length); - } else { - nvgpu_list_add_tail(&job->list, &c->joblist.dynamic.jobs); - } + c->joblist.pre_alloc.put = (c->joblist.pre_alloc.put + 1U) % + (c->joblist.pre_alloc.length); } void channel_joblist_delete(struct nvgpu_channel *c, struct nvgpu_channel_job *job) { - if (nvgpu_channel_is_prealloc_enabled(c)) { - c->joblist.pre_alloc.get = (c->joblist.pre_alloc.get + 1U) % - (c->joblist.pre_alloc.length); - } else { - nvgpu_list_del(&job->list); - } + c->joblist.pre_alloc.get = (c->joblist.pre_alloc.get + 1U) % + (c->joblist.pre_alloc.length); } bool nvgpu_channel_joblist_is_empty(struct nvgpu_channel *c) { - if (nvgpu_channel_is_prealloc_enabled(c)) { - - unsigned int get = c->joblist.pre_alloc.get; - unsigned int put = c->joblist.pre_alloc.put; - - return get == put; - } - - return nvgpu_list_empty(&c->joblist.dynamic.jobs); + return c->joblist.pre_alloc.get == c->joblist.pre_alloc.put; } -int channel_prealloc_resources(struct nvgpu_channel *ch, u32 num_jobs) +int channel_prealloc_resources(struct nvgpu_channel *c, u32 num_jobs) { -#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS int err; u32 size; - if ((nvgpu_channel_is_prealloc_enabled(ch)) || (num_jobs == 0U)) { - return -EINVAL; - } - size = (u32)sizeof(struct nvgpu_channel_job); if (num_jobs > nvgpu_safe_sub_u32(U32_MAX / size, 1U)) { err = -ERANGE; @@ -177,57 +118,41 @@ int channel_prealloc_resources(struct nvgpu_channel *ch, u32 num_jobs) * units of item slot), so allocate a size of (num_jobs + 1) * size * bytes. */ - ch->joblist.pre_alloc.jobs = nvgpu_vzalloc(ch->g, + c->joblist.pre_alloc.jobs = nvgpu_vzalloc(c->g, nvgpu_safe_mult_u32( nvgpu_safe_add_u32(num_jobs, 1U), size)); - if (ch->joblist.pre_alloc.jobs == NULL) { + if (c->joblist.pre_alloc.jobs == NULL) { err = -ENOMEM; goto clean_up; } - /* pre-allocate a fence pool */ - err = nvgpu_fence_pool_alloc(ch, num_jobs); + err = nvgpu_fence_pool_alloc(c, num_jobs); if (err != 0) { goto clean_up; } - ch->joblist.pre_alloc.length = num_jobs; - ch->joblist.pre_alloc.put = 0; - ch->joblist.pre_alloc.get = 0; - /* - * commit the previous writes before setting the flag. - * see corresponding nvgpu_smp_rmb in - * nvgpu_channel_is_prealloc_enabled() + * length is the allocation size of the ringbuffer; the number of jobs + * that fit is one less. */ - nvgpu_smp_wmb(); - ch->joblist.pre_alloc.enabled = true; + c->joblist.pre_alloc.length = nvgpu_safe_add_u32(num_jobs, 1U); + c->joblist.pre_alloc.put = 0; + c->joblist.pre_alloc.get = 0; return 0; clean_up: - nvgpu_vfree(ch->g, ch->joblist.pre_alloc.jobs); - (void) memset(&ch->joblist.pre_alloc, 0, sizeof(ch->joblist.pre_alloc)); + nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs); + (void) memset(&c->joblist.pre_alloc, 0, sizeof(c->joblist.pre_alloc)); return err; -#else - return -ENOSYS; -#endif } void channel_free_prealloc_resources(struct nvgpu_channel *c) { -#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS - nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs[0].wait_cmd); - nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs); - nvgpu_fence_pool_free(c); - - /* - * commit the previous writes before disabling the flag. - * see corresponding nvgpu_smp_rmb in - * nvgpu_channel_is_prealloc_enabled() - */ - nvgpu_smp_wmb(); - c->joblist.pre_alloc.enabled = false; -#endif + if (c->joblist.pre_alloc.jobs != NULL) { + nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs); + c->joblist.pre_alloc.jobs = NULL; + nvgpu_fence_pool_free(c); + } } diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c index 17be7de00..d77550af5 100644 --- a/drivers/gpu/nvgpu/common/fifo/submit.c +++ b/drivers/gpu/nvgpu/common/fifo/submit.c @@ -90,6 +90,11 @@ static int nvgpu_submit_create_incr_cmd(struct nvgpu_channel *c, *post_fence = nvgpu_fence_alloc(c); if (*post_fence == NULL) { + /* + * The fence pool is exactly as long as the job list so this + * should always succeed. If not, things are so broken that + * ENOMEM is better than ENOSPC. + */ return -ENOMEM; } @@ -351,7 +356,9 @@ static int nvgpu_submit_prepare_gpfifo_track(struct nvgpu_channel *c, struct nvgpu_channel_job *job = NULL; int err; + nvgpu_channel_joblist_lock(c); err = nvgpu_channel_alloc_job(c, &job); + nvgpu_channel_joblist_unlock(c); if (err != 0) { return err; } @@ -572,11 +579,6 @@ static int nvgpu_submit_deterministic(struct nvgpu_channel *c, return -EINVAL; } - /* dynamic job allocation wouldn't be deterministic */ - if (!nvgpu_channel_is_prealloc_enabled(c)) { - return -EINVAL; - } - /* dynamic sync allocation wouldn't be deterministic */ if (g->aggressive_sync_destroy_thresh != 0U) { return -EINVAL; diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h index 0186e4898..2805eca3d 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h @@ -256,7 +256,6 @@ struct notification { struct nvgpu_channel_joblist { struct { - bool enabled; unsigned int length; unsigned int put; unsigned int get; @@ -264,11 +263,6 @@ struct nvgpu_channel_joblist { struct nvgpu_mutex read_lock; } pre_alloc; - struct { - struct nvgpu_list_node jobs; - struct nvgpu_spinlock lock; - } dynamic; - /** * Synchronize abort cleanup (when closing a channel) and job cleanup * (asynchronously from worker) - protect from concurrent access when @@ -588,7 +582,6 @@ int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c, #ifdef CONFIG_TEGRA_GK20A_NVHOST int nvgpu_channel_set_syncpt(struct nvgpu_channel *ch); #endif -bool nvgpu_channel_is_prealloc_enabled(struct nvgpu_channel *c); bool nvgpu_channel_update_and_check_ctxsw_timeout(struct nvgpu_channel *ch, u32 timeout_delta_ms, bool *progress); diff --git a/drivers/gpu/nvgpu/include/nvgpu/job.h b/drivers/gpu/nvgpu/include/nvgpu/job.h index 42181ea43..85ac4d12a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/job.h +++ b/drivers/gpu/nvgpu/include/nvgpu/job.h @@ -54,7 +54,7 @@ void channel_joblist_delete(struct nvgpu_channel *c, struct nvgpu_channel_job *job); bool nvgpu_channel_joblist_is_empty(struct nvgpu_channel *c); -int channel_prealloc_resources(struct nvgpu_channel *ch, u32 num_jobs); +int channel_prealloc_resources(struct nvgpu_channel *c, u32 num_jobs); void channel_free_prealloc_resources(struct nvgpu_channel *c); #endif