From f6c96f620ffa6133656310a1415d710e56fb46d0 Mon Sep 17 00:00:00 2001 From: Debarshi Dutta Date: Mon, 24 Jun 2019 15:17:47 +0530 Subject: [PATCH] gpu: nvgpu: add CONFIG_NVGPU_KERNEL_MODE_SUBMIT flag The following functions belong to the path of kernel_mode submit and the flag CONFIG_NVGPU_KERNEL_MODE_SUBMIT is used to compile these out of safety builds. channel_gk20a_alloc_priv_cmdbuf channel_gk20a_free_prealloc_resources channel_gk20a_joblist_add channel_gk20a_joblist_delete channel_gk20a_joblist_peek channel_gk20a_prealloc_resources nvgpu_channel nvgpu_channel_add_job nvgpu_channel_alloc_job nvgpu_channel_alloc_priv_cmdbuf nvgpu_channel_clean_up_jobs nvgpu_channel_free_job nvgpu_channel_free_priv_cmd_entry nvgpu_channel_free_priv_cmd_q nvgpu_channel_from_worker_item nvgpu_channel_get_gpfifo_free_count nvgpu_channel_is_prealloc_enabled nvgpu_channel_joblist_is_empty nvgpu_channel_joblist_lock nvgpu_channel_joblist_unlock nvgpu_channel_kernelmode_deinit nvgpu_channel_poll_wdt nvgpu_channel_set_syncpt nvgpu_channel_setup_kernelmode nvgpu_channel_sync_get_ref nvgpu_channel_sync_incr nvgpu_channel_sync_incr_user nvgpu_channel_sync_put_ref_and_check nvgpu_channel_sync_wait_fence_fd nvgpu_channel_update nvgpu_channel_update_gpfifo_get_and_get_free_count nvgpu_channel_update_priv_cmd_q_and_free_entry nvgpu_channel_wdt_continue nvgpu_channel_wdt_handler nvgpu_channel_wdt_init nvgpu_channel_wdt_restart_all_channels nvgpu_channel_wdt_restart_all_channels nvgpu_channel_wdt_rewind nvgpu_channel_wdt_start nvgpu_channel_wdt_stop nvgpu_channel_worker_deinit nvgpu_channel_worker_from_worker nvgpu_channel_worker_init nvgpu_channel_worker_poll_init nvgpu_channel_worker_poll_wakeup_post_process_item nvgpu_channel_worker_poll_wakeup_process_item nvgpu_submit_channel_gpfifo_kernel nvgpu_submit_channel_gpfifo_user gk20a_userd_gp_get gk20a_userd_pb_get gk20a_userd_gp_put nvgpu_fence_alloc The following members of struct nvgpu_channel are compiled out of safety build. struct gpfifo_desc gpfifo; struct priv_cmd_queue priv_cmd_q; struct nvgpu_channel_sync *sync; struct nvgpu_list_node worker_item; struct nvgpu_channel_wdt wdt; The following files are compiled out of safety build. common/fifo/submit.c common/sync/channe1_sync_semaphore.c hal/fifo/userd_gv11b.c Jira NVGPU-3479 Change-Id: If46c936477c6698f4bec3cab93906aaacb0ceabf Signed-off-by: Debarshi Dutta Reviewed-on: https://git-master.nvidia.com/r/2127212 GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/Makefile | 1 + drivers/gpu/nvgpu/Makefile.shared.configs | 5 + drivers/gpu/nvgpu/Makefile.sources | 9 +- drivers/gpu/nvgpu/common/fence/fence.c | 2 + drivers/gpu/nvgpu/common/fifo/channel.c | 1638 +++++++++-------- drivers/gpu/nvgpu/common/fifo/fifo.c | 6 + drivers/gpu/nvgpu/common/fifo/tsg.c | 2 + drivers/gpu/nvgpu/common/sync/channel_sync.c | 26 +- .../gpu/nvgpu/common/sync/channel_sync_priv.h | 3 +- .../nvgpu/common/sync/channel_sync_syncpt.c | 19 +- .../gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c | 4 + .../nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c | 2 + .../nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c | 2 + .../gpu/nvgpu/hal/fifo/ctxsw_timeout_gk20a.c | 3 +- .../gpu/nvgpu/hal/fifo/ctxsw_timeout_gv11b.c | 2 + drivers/gpu/nvgpu/hal/fifo/userd_gk20a.c | 2 + drivers/gpu/nvgpu/hal/fifo/userd_gk20a.h | 2 + drivers/gpu/nvgpu/hal/init/hal_gm20b.c | 6 +- drivers/gpu/nvgpu/hal/init/hal_gp10b.c | 8 +- drivers/gpu/nvgpu/hal/init/hal_gv11b.c | 6 +- drivers/gpu/nvgpu/hal/init/hal_tu104.c | 6 +- drivers/gpu/nvgpu/include/nvgpu/channel.h | 143 +- .../gpu/nvgpu/include/nvgpu/channel_sync.h | 24 +- .../include/nvgpu/channel_sync_semaphore.h | 4 + drivers/gpu/nvgpu/include/nvgpu/fence.h | 2 + drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 6 + drivers/gpu/nvgpu/include/nvgpu/tsg.h | 2 + 27 files changed, 1031 insertions(+), 904 deletions(-) diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index cc92bdca8..940b5c9b9 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -40,6 +40,7 @@ ccflags-y += -DCONFIG_NVGPU_CHANNEL_TSG_SCHEDULING ccflags-y += -DCONFIG_NVGPU_CHANNEL_TSG_CONTROL ccflags-y += -DCONFIG_NVGPU_POWER_PG ccflags-y += -DCONFIG_NVGPU_CE +ccflags-y += -DCONFIG_NVGPU_KERNEL_MODE_SUBMIT ccflags-y += -DCONFIG_NVGPU_COMPRESSION ccflags-y += -DCONFIG_NVGPU_SIM ccflags-y += -DCONFIG_NVGPU_TRACE diff --git a/drivers/gpu/nvgpu/Makefile.shared.configs b/drivers/gpu/nvgpu/Makefile.shared.configs index 34db9c8bc..985466d0a 100644 --- a/drivers/gpu/nvgpu/Makefile.shared.configs +++ b/drivers/gpu/nvgpu/Makefile.shared.configs @@ -79,6 +79,11 @@ NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_USERD # Enable Channel WDT for safety build until we switch to user mode submits only NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_CHANNEL_WDT +# Enable Kernel Mode submit for safety build until we switch to user mode +# submits only +CONFIG_NVGPU_KERNEL_MODE_SUBMIT := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_KERNEL_MODE_SUBMIT + NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_FIFO_ENGINE_ACTIVITY # Enable Channel/TSG Scheduling for safety build until devctl whitelisting is done diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index 41b92069c..b43a5ef29 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -125,7 +125,6 @@ srcs += common/utils/enabled.c \ common/ptimer/ptimer.c \ common/sync/channel_sync.c \ common/sync/channel_sync_syncpt.c \ - common/sync/channel_sync_semaphore.c \ common/semaphore/semaphore_sea.c \ common/semaphore/semaphore_pool.c \ common/semaphore/semaphore_hw.c \ @@ -137,7 +136,6 @@ srcs += common/utils/enabled.c \ common/rc/rc.c \ common/fifo/fifo.c \ common/fifo/pbdma.c \ - common/fifo/submit.c \ common/fifo/tsg.c \ common/fifo/runlist.c \ common/fifo/engine_status.c \ @@ -236,7 +234,6 @@ srcs += common/utils/enabled.c \ hal/fifo/tsg_gk20a.c \ hal/fifo/tsg_gv11b.c \ hal/fifo/userd_gk20a.c \ - hal/fifo/userd_gv11b.c \ hal/fifo/fifo_intr_gk20a.c \ hal/fifo/fifo_intr_gv11b.c \ hal/fifo/mmu_fault_gk20a.c \ @@ -332,6 +329,12 @@ ifeq ($(CONFIG_NVGPU_CE),1) srcs += common/ce/ce.c endif +ifeq ($(CONFIG_NVGPU_KERNEL_MODE_SUBMIT),1) +srcs += common/fifo/submit.c \ + common/sync/channel_sync_semaphore.c \ + hal/fifo/userd_gv11b.c +endif + ifeq ($(CONFIG_NVGPU_FECS_TRACE),1) srcs += common/gr/fecs_trace.c \ hal/gr/fecs_trace/fecs_trace_gm20b.c \ diff --git a/drivers/gpu/nvgpu/common/fence/fence.c b/drivers/gpu/nvgpu/common/fence/fence.c index 76f7f48fa..8c827ab5a 100644 --- a/drivers/gpu/nvgpu/common/fence/fence.c +++ b/drivers/gpu/nvgpu/common/fence/fence.c @@ -156,6 +156,7 @@ void nvgpu_fence_pool_free(struct nvgpu_channel *ch) } } +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT struct nvgpu_fence_type *nvgpu_fence_alloc(struct nvgpu_channel *ch) { struct nvgpu_fence_type *fence = NULL; @@ -183,6 +184,7 @@ struct nvgpu_fence_type *nvgpu_fence_alloc(struct nvgpu_channel *ch) return fence; } +#endif void nvgpu_fence_init(struct nvgpu_fence_type *f, const struct nvgpu_fence_ops *ops, diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c index e4a5d1469..f30043a77 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel.c +++ b/drivers/gpu/nvgpu/common/fifo/channel.c @@ -61,18 +61,21 @@ static void free_channel(struct nvgpu_fifo *f, struct nvgpu_channel *ch); static void gk20a_channel_dump_ref_actions(struct nvgpu_channel *ch); +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT static void nvgpu_channel_free_priv_cmd_q(struct nvgpu_channel *ch); - static void channel_gk20a_free_prealloc_resources(struct nvgpu_channel *c); - static void channel_gk20a_joblist_add(struct nvgpu_channel *c, struct nvgpu_channel_job *job); static void channel_gk20a_joblist_delete(struct nvgpu_channel *c, struct nvgpu_channel_job *job); static struct nvgpu_channel_job *channel_gk20a_joblist_peek( struct nvgpu_channel *c); - static const struct nvgpu_worker_ops channel_worker_ops; +#endif + +static int nvgpu_channel_setup_ramfc(struct nvgpu_channel *c, + struct nvgpu_setup_bind_args *args, + u64 gpfifo_gpu_va, u32 gpfifo_size); /* allocate GPU channel */ static struct nvgpu_channel *allocate_channel(struct nvgpu_fifo *f) @@ -174,6 +177,7 @@ int nvgpu_channel_disable_tsg(struct gk20a *g, struct nvgpu_channel *ch) } } +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT void nvgpu_channel_abort_clean_up(struct nvgpu_channel *ch) { /* synchronize with actual job cleanup */ @@ -198,67 +202,6 @@ void nvgpu_channel_abort_clean_up(struct nvgpu_channel *ch) nvgpu_channel_update(ch); } -void nvgpu_channel_set_unserviceable(struct nvgpu_channel *ch) -{ - nvgpu_spinlock_acquire(&ch->unserviceable_lock); - ch->unserviceable = true; - nvgpu_spinlock_release(&ch->unserviceable_lock); -} - -bool nvgpu_channel_check_unserviceable(struct nvgpu_channel *ch) -{ - bool unserviceable_status; - - nvgpu_spinlock_acquire(&ch->unserviceable_lock); - unserviceable_status = ch->unserviceable; - nvgpu_spinlock_release(&ch->unserviceable_lock); - - return unserviceable_status; -} - -void nvgpu_channel_abort(struct nvgpu_channel *ch, bool channel_preempt) -{ - struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch); - - nvgpu_log_fn(ch->g, " "); - - if (tsg != NULL) { - return nvgpu_tsg_abort(ch->g, tsg, channel_preempt); - } else { - nvgpu_err(ch->g, "chid: %d is not bound to tsg", ch->chid); - } -} - -void nvgpu_channel_wait_until_counter_is_N( - struct nvgpu_channel *ch, nvgpu_atomic_t *counter, int wait_value, - struct nvgpu_cond *c, const char *caller, const char *counter_name) -{ - while (true) { - if (NVGPU_COND_WAIT( - c, - nvgpu_atomic_read(counter) == wait_value, - 5000U) == 0) { - break; - } - - nvgpu_warn(ch->g, - "%s: channel %d, still waiting, %s left: %d, waiting for: %d", - caller, ch->chid, counter_name, - nvgpu_atomic_read(counter), wait_value); - - gk20a_channel_dump_ref_actions(ch); - } -} - -static void nvgpu_channel_usermode_deinit(struct nvgpu_channel *ch) -{ - struct gk20a *g = ch->g; - - nvgpu_channel_free_usermode_buffers(ch); - (void) nvgpu_userd_init_channel(g, ch); - ch->usermode_submit_enabled = false; -} - static void nvgpu_channel_kernelmode_deinit(struct nvgpu_channel *ch) { struct vm_gk20a *ch_vm = ch->vm; @@ -285,497 +228,6 @@ static void nvgpu_channel_kernelmode_deinit(struct nvgpu_channel *ch) nvgpu_mutex_release(&ch->sync_lock); } -/* call ONLY when no references to the channel exist: after the last put */ -static void gk20a_free_channel(struct nvgpu_channel *ch, bool force) -{ - struct gk20a *g = ch->g; - struct nvgpu_tsg *tsg; - struct nvgpu_fifo *f = &g->fifo; - struct vm_gk20a *ch_vm = ch->vm; - unsigned long timeout; -#ifdef CONFIG_NVGPU_DEBUGGER - struct dbg_session_gk20a *dbg_s; - struct dbg_session_data *session_data, *tmp_s; - struct dbg_session_channel_data *ch_data, *tmp; - bool deferred_reset_pending; -#endif - int err; - - if (g == NULL) { - nvgpu_do_assert_print(g, "ch already freed"); - return; - } - - nvgpu_log_fn(g, " "); - - timeout = nvgpu_get_poll_timeout(g); - -#ifdef CONFIG_NVGPU_TRACE - trace_gk20a_free_channel(ch->chid); -#endif - - /* - * Disable channel/TSG and unbind here. This should not be executed if - * HW access is not available during shutdown/removal path as it will - * trigger a timeout - */ - if (!nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) { - /* abort channel and remove from runlist */ - tsg = nvgpu_tsg_from_ch(ch); - if (tsg != NULL) { - /* Between tsg is not null and unbind_channel call, - * ioctl cannot be called anymore because user doesn't - * have an open channel fd anymore to use for the unbind - * ioctl. - */ - err = nvgpu_tsg_unbind_channel(tsg, ch); - if (err != 0) { - nvgpu_err(g, - "failed to unbind channel %d from TSG", - ch->chid); - } - } else { - /* - * Channel is already unbound from TSG by User with - * explicit call - * Nothing to do here in that case - */ - } - } - - /* - * OS channel close may require that syncpoint should be set to some - * safe value before it is called. nvgpu_tsg_unbind_channel(above) is - * internally doing that by calling nvgpu_nvhost_syncpt_set_safe_state - * deep down in the stack. Otherwise os_channel close may block if the - * app is killed abruptly (which was going to do the syncpoint signal). - */ - if (g->os_channel.close != NULL) { - g->os_channel.close(ch, force); - } - - /* wait until there's only our ref to the channel */ - if (!force) { - nvgpu_channel_wait_until_counter_is_N( - ch, &ch->ref_count, 1, &ch->ref_count_dec_wq, - __func__, "references"); - } - - /* wait until all pending interrupts for recently completed - * jobs are handled */ - nvgpu_wait_for_deferred_interrupts(g); - - /* prevent new refs */ - nvgpu_spinlock_acquire(&ch->ref_obtain_lock); - if (!ch->referenceable) { - nvgpu_spinlock_release(&ch->ref_obtain_lock); - nvgpu_err(ch->g, - "Extra %s() called to channel %u", - __func__, ch->chid); - return; - } - ch->referenceable = false; - nvgpu_spinlock_release(&ch->ref_obtain_lock); - - /* matches with the initial reference in gk20a_open_new_channel() */ - nvgpu_atomic_dec(&ch->ref_count); - - /* wait until no more refs to the channel */ - if (!force) { - nvgpu_channel_wait_until_counter_is_N( - ch, &ch->ref_count, 0, &ch->ref_count_dec_wq, - __func__, "references"); - } - -#ifdef CONFIG_NVGPU_DEBUGGER - /* if engine reset was deferred, perform it now */ - nvgpu_mutex_acquire(&f->deferred_reset_mutex); - deferred_reset_pending = g->fifo.deferred_reset_pending; - nvgpu_mutex_release(&f->deferred_reset_mutex); - - if (deferred_reset_pending) { - nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was" - " deferred, running now"); - nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex); - - nvgpu_assert(nvgpu_channel_deferred_reset_engines(g, ch) == 0); - - nvgpu_mutex_release(&g->fifo.engines_reset_mutex); - } -#endif - - if (!nvgpu_channel_as_bound(ch)) { - goto unbind; - } - - nvgpu_log_info(g, "freeing bound channel context, timeout=%ld", - timeout); - -#ifdef CONFIG_NVGPU_FECS_TRACE - if (g->ops.gr.fecs_trace.unbind_channel && !ch->vpr) - g->ops.gr.fecs_trace.unbind_channel(g, &ch->inst_block); -#endif - - if (g->ops.gr.setup.free_subctx != NULL) { - g->ops.gr.setup.free_subctx(ch); - ch->subctx = NULL; - } - - g->ops.gr.intr.flush_channel_tlb(g); - - if (ch->usermode_submit_enabled) { - nvgpu_channel_usermode_deinit(ch); - } else { - nvgpu_channel_kernelmode_deinit(ch); - } - - if (ch->user_sync != NULL) { - /* - * Set user managed syncpoint to safe state - * But it's already done if channel is recovered - */ - if (nvgpu_channel_check_unserviceable(ch)) { - nvgpu_channel_sync_destroy(ch->user_sync, false); - } else { - nvgpu_channel_sync_destroy(ch->user_sync, true); - } - ch->user_sync = NULL; - } - nvgpu_mutex_release(&ch->sync_lock); - - /* - * free the channel used semaphore index. - * we need to do this before releasing the address space, - * as the semaphore pool might get freed after that point. - */ - if (ch->hw_sema != NULL) { - nvgpu_hw_semaphore_free(ch); - } - - /* - * When releasing the channel we unbind the VM - so release the ref. - */ - nvgpu_vm_put(ch_vm); - - /* make sure we don't have deferred interrupts pending that - * could still touch the channel */ - nvgpu_wait_for_deferred_interrupts(g); - -unbind: - g->ops.channel.unbind(ch); - g->ops.channel.free_inst(g, ch); - - /* put back the channel-wide submit ref from init */ - if (ch->deterministic) { - nvgpu_rwsem_down_read(&g->deterministic_busy); - ch->deterministic = false; - if (!ch->deterministic_railgate_allowed) { - gk20a_idle(g); - } - ch->deterministic_railgate_allowed = false; - - nvgpu_rwsem_up_read(&g->deterministic_busy); - } - - ch->vpr = false; - ch->vm = NULL; - - WARN_ON(ch->sync != NULL); - -#ifdef CONFIG_NVGPU_DEBUGGER - /* unlink all debug sessions */ - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - - nvgpu_list_for_each_entry_safe(session_data, tmp_s, - &ch->dbg_s_list, dbg_session_data, dbg_s_entry) { - dbg_s = session_data->dbg_s; - nvgpu_mutex_acquire(&dbg_s->ch_list_lock); - nvgpu_list_for_each_entry_safe(ch_data, tmp, &dbg_s->ch_list, - dbg_session_channel_data, ch_entry) { - if (ch_data->chid == ch->chid) { - if (ch_data->unbind_single_channel(dbg_s, - ch_data) != 0) { - nvgpu_err(g, - "unbind failed for chid: %d", - ch_data->chid); - } - } - } - nvgpu_mutex_release(&dbg_s->ch_list_lock); - } - - nvgpu_mutex_release(&g->dbg_sessions_lock); -#endif - -#if GK20A_CHANNEL_REFCOUNT_TRACKING - (void) memset(ch->ref_actions, 0, sizeof(ch->ref_actions)); - ch->ref_actions_put = 0; -#endif - - /* make sure we catch accesses of unopened channels in case - * there's non-refcounted channel pointers hanging around */ - ch->g = NULL; - nvgpu_smp_wmb(); - - /* ALWAYS last */ - free_channel(f, ch); -} - -static void gk20a_channel_dump_ref_actions(struct nvgpu_channel *ch) -{ -#if GK20A_CHANNEL_REFCOUNT_TRACKING - size_t i, get; - s64 now = nvgpu_current_time_ms(); - s64 prev = 0; - struct gk20a *g = ch->g; - - nvgpu_spinlock_acquire(&ch->ref_actions_lock); - - nvgpu_info(g, "ch %d: refs %d. Actions, most recent last:", - ch->chid, nvgpu_atomic_read(&ch->ref_count)); - - /* start at the oldest possible entry. put is next insertion point */ - get = ch->ref_actions_put; - - /* - * If the buffer is not full, this will first loop to the oldest entry, - * skipping not-yet-initialized entries. There is no ref_actions_get. - */ - for (i = 0; i < GK20A_CHANNEL_REFCOUNT_TRACKING; i++) { - struct nvgpu_channel_ref_action *act = &ch->ref_actions[get]; - - if (act->trace.nr_entries) { - nvgpu_info(g, - "%s ref %zu steps ago (age %lld ms, diff %lld ms)", - act->type == channel_gk20a_ref_action_get - ? "GET" : "PUT", - GK20A_CHANNEL_REFCOUNT_TRACKING - 1 - i, - now - act->timestamp_ms, - act->timestamp_ms - prev); - - print_stack_trace(&act->trace, 0); - prev = act->timestamp_ms; - } - - get = (get + 1) % GK20A_CHANNEL_REFCOUNT_TRACKING; - } - - nvgpu_spinlock_release(&ch->ref_actions_lock); -#endif -} - -static void gk20a_channel_save_ref_source(struct nvgpu_channel *ch, - enum nvgpu_channel_ref_action_type type) -{ -#if GK20A_CHANNEL_REFCOUNT_TRACKING - struct nvgpu_channel_ref_action *act; - - nvgpu_spinlock_acquire(&ch->ref_actions_lock); - - act = &ch->ref_actions[ch->ref_actions_put]; - act->type = type; - act->trace.max_entries = GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN; - act->trace.nr_entries = 0; - act->trace.skip = 3; /* onwards from the caller of this */ - act->trace.entries = act->trace_entries; - save_stack_trace(&act->trace); - act->timestamp_ms = nvgpu_current_time_ms(); - ch->ref_actions_put = (ch->ref_actions_put + 1) % - GK20A_CHANNEL_REFCOUNT_TRACKING; - - nvgpu_spinlock_release(&ch->ref_actions_lock); -#endif -} - -/* Try to get a reference to the channel. Return nonzero on success. If fails, - * the channel is dead or being freed elsewhere and you must not touch it. - * - * Always when a nvgpu_channel pointer is seen and about to be used, a - * reference must be held to it - either by you or the caller, which should be - * documented well or otherwise clearly seen. This usually boils down to the - * file from ioctls directly, or an explicit get in exception handlers when the - * channel is found by a chid. - * - * Most global functions in this file require a reference to be held by the - * caller. - */ -struct nvgpu_channel *nvgpu_channel_get__func(struct nvgpu_channel *ch, - const char *caller) -{ - struct nvgpu_channel *ret; - - nvgpu_spinlock_acquire(&ch->ref_obtain_lock); - - if (likely(ch->referenceable)) { - gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_get); - nvgpu_atomic_inc(&ch->ref_count); - ret = ch; - } else { - ret = NULL; - } - - nvgpu_spinlock_release(&ch->ref_obtain_lock); - - if (ret != NULL) { - trace_nvgpu_channel_get(ch->chid, caller); - } - - return ret; -} - -void nvgpu_channel_put__func(struct nvgpu_channel *ch, const char *caller) -{ - gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_put); - trace_nvgpu_channel_put(ch->chid, caller); - nvgpu_atomic_dec(&ch->ref_count); - if (nvgpu_cond_broadcast(&ch->ref_count_dec_wq) != 0) { - nvgpu_warn(ch->g, "failed to broadcast"); - } - - /* More puts than gets. Channel is probably going to get - * stuck. */ - WARN_ON(nvgpu_atomic_read(&ch->ref_count) < 0); - - /* Also, more puts than gets. ref_count can go to 0 only if - * the channel is closing. Channel is probably going to get - * stuck. */ - WARN_ON(nvgpu_atomic_read(&ch->ref_count) == 0 && ch->referenceable); -} - -struct nvgpu_channel *nvgpu_channel_from_id__func(struct gk20a *g, - u32 chid, const char *caller) -{ - if (chid == NVGPU_INVALID_CHANNEL_ID) { - return NULL; - } - - return nvgpu_channel_get__func(&g->fifo.channel[chid], caller); -} - -void nvgpu_channel_close(struct nvgpu_channel *ch) -{ - gk20a_free_channel(ch, false); -} - -/* - * Be careful with this - it is meant for terminating channels when we know the - * driver is otherwise dying. Ref counts and the like are ignored by this - * version of the cleanup. - */ -void nvgpu_channel_kill(struct nvgpu_channel *ch) -{ - gk20a_free_channel(ch, true); -} - -struct nvgpu_channel *gk20a_open_new_channel(struct gk20a *g, - u32 runlist_id, - bool is_privileged_channel, - pid_t pid, pid_t tid) -{ - struct nvgpu_fifo *f = &g->fifo; - struct nvgpu_channel *ch; - - /* compatibility with existing code */ - if (!nvgpu_engine_is_valid_runlist_id(g, runlist_id)) { - runlist_id = nvgpu_engine_get_gr_runlist_id(g); - } - - nvgpu_log_fn(g, " "); - - ch = allocate_channel(f); - if (ch == NULL) { - /* TBD: we want to make this virtualizable */ - nvgpu_err(g, "out of hw chids"); - return NULL; - } - -#ifdef CONFIG_NVGPU_TRACE - trace_gk20a_open_new_channel(ch->chid); -#endif - - BUG_ON(ch->g != NULL); - ch->g = g; - - /* Runlist for the channel */ - ch->runlist_id = runlist_id; - - /* Channel privilege level */ - ch->is_privileged_channel = is_privileged_channel; - - ch->pid = tid; - ch->tgid = pid; /* process granularity for FECS traces */ - - if (nvgpu_userd_init_channel(g, ch) != 0) { - nvgpu_err(g, "userd init failed"); - goto clean_up; - } - - if (g->ops.channel.alloc_inst(g, ch) != 0) { - nvgpu_err(g, "inst allocation failed"); - goto clean_up; - } - - /* now the channel is in a limbo out of the free list but not marked as - * alive and used (i.e. get-able) yet */ - - /* By default, channel is regular (non-TSG) channel */ - ch->tsgid = NVGPU_INVALID_TSG_ID; - - /* clear ctxsw timeout counter and update timestamp */ - ch->ctxsw_timeout_accumulated_ms = 0; - ch->ctxsw_timeout_gpfifo_get = 0; - /* set gr host default timeout */ - ch->ctxsw_timeout_max_ms = nvgpu_get_poll_timeout(g); - ch->ctxsw_timeout_debug_dump = true; - /* ch is unserviceable until it is bound to tsg */ - ch->unserviceable = true; - -#ifdef CONFIG_NVGPU_CHANNEL_WDT - /* init kernel watchdog timeout */ - ch->wdt.enabled = true; - ch->wdt.limit_ms = g->ch_wdt_init_limit_ms; - ch->wdt.debug_dump = true; -#endif - - ch->obj_class = 0; - ch->subctx_id = 0; - ch->runqueue_sel = 0; - - ch->mmu_nack_handled = false; - - /* The channel is *not* runnable at this point. It still needs to have - * an address space bound and allocate a gpfifo and grctx. */ - - if (nvgpu_cond_init(&ch->notifier_wq) != 0) { - nvgpu_err(g, "cond init failed"); - goto clean_up; - } - if (nvgpu_cond_init(&ch->semaphore_wq) != 0) { - nvgpu_err(g, "cond init failed"); - goto clean_up; - } - - /* Mark the channel alive, get-able, with 1 initial use - * references. The initial reference will be decreased in - * gk20a_free_channel(). - * - * Use the lock, since an asynchronous thread could - * try to access this channel while it's not fully - * initialized. - */ - nvgpu_spinlock_acquire(&ch->ref_obtain_lock); - ch->referenceable = true; - nvgpu_atomic_set(&ch->ref_count, 1); - nvgpu_spinlock_release(&ch->ref_obtain_lock); - - return ch; - -clean_up: - ch->g = NULL; - free_channel(f, ch); - return NULL; -} - /* allocate private cmd buffer. used for inserting commands before/after user submitted buffers. */ static int channel_gk20a_alloc_priv_cmdbuf(struct nvgpu_channel *ch, @@ -1216,78 +668,6 @@ out: return err; } -static int nvgpu_channel_setup_ramfc(struct nvgpu_channel *c, - struct nvgpu_setup_bind_args *args, - u64 gpfifo_gpu_va, u32 gpfifo_size) -{ - int err = 0; - u64 pbdma_acquire_timeout = 0ULL; - struct gk20a *g = c->g; - -#ifdef CONFIG_NVGPU_CHANNEL_WDT - if (c->wdt.enabled && nvgpu_is_timeouts_enabled(c->g)) { - pbdma_acquire_timeout = c->wdt.limit_ms; - } -#else - if (nvgpu_is_timeouts_enabled(c->g)) { - pbdma_acquire_timeout = g->ch_wdt_init_limit_ms; - } -#endif - - err = g->ops.ramfc.setup(c, gpfifo_gpu_va, gpfifo_size, - pbdma_acquire_timeout, args->flags); - - return err; -} - -static int nvgpu_channel_setup_usermode(struct nvgpu_channel *c, - struct nvgpu_setup_bind_args *args) -{ - u32 gpfifo_size = args->num_gpfifo_entries; - int err = 0; - struct gk20a *g = c->g; - u64 gpfifo_gpu_va; - - if (g->os_channel.alloc_usermode_buffers != NULL) { - err = g->os_channel.alloc_usermode_buffers(c, args); - if (err != 0) { - nvgpu_err(g, "Usermode buffer alloc failed"); - goto clean_up; - } - c->userd_iova = nvgpu_mem_get_addr(g, - &c->usermode_userd); - c->usermode_submit_enabled = true; - } else { - nvgpu_err(g, "Usermode submit not supported"); - err = -EINVAL; - goto clean_up; - } - gpfifo_gpu_va = c->usermode_gpfifo.gpu_va; - - nvgpu_log_info(g, "channel %d : gpfifo_base 0x%016llx, size %d", - c->chid, gpfifo_gpu_va, gpfifo_size); - - err = nvgpu_channel_setup_ramfc(c, args, gpfifo_gpu_va, gpfifo_size); - - if (err != 0) { - goto clean_up_unmap; - } - - err = nvgpu_channel_update_runlist(c, true); - if (err != 0) { - goto clean_up_unmap; - } - - return 0; - -clean_up_unmap: - nvgpu_channel_free_usermode_buffers(c); - (void) nvgpu_userd_init_channel(g, c); - c->usermode_submit_enabled = false; -clean_up: - return err; -} - static int nvgpu_channel_setup_kernelmode(struct nvgpu_channel *c, struct nvgpu_setup_bind_args *args) { @@ -1397,99 +777,6 @@ clean_up: } -int nvgpu_channel_setup_bind(struct nvgpu_channel *c, - struct nvgpu_setup_bind_args *args) -{ - struct gk20a *g = c->g; - int err = 0; - -#ifdef CONFIG_NVGPU_VPR - if ((args->flags & NVGPU_SETUP_BIND_FLAGS_SUPPORT_VPR) != 0U) { - c->vpr = true; - } -#else - c->vpr = false; -#endif - - if ((args->flags & NVGPU_SETUP_BIND_FLAGS_SUPPORT_DETERMINISTIC) != 0U) { - nvgpu_rwsem_down_read(&g->deterministic_busy); - /* - * Railgating isn't deterministic; instead of disallowing - * railgating globally, take a power refcount for this - * channel's lifetime. The gk20a_idle() pair for this happens - * when the channel gets freed. - * - * Deterministic flag and this busy must be atomic within the - * busy lock. - */ - err = gk20a_busy(g); - if (err != 0) { - nvgpu_rwsem_up_read(&g->deterministic_busy); - return err; - } - - c->deterministic = true; - nvgpu_rwsem_up_read(&g->deterministic_busy); - } - - /* an address space needs to have been bound at this point. */ - if (!nvgpu_channel_as_bound(c)) { - nvgpu_err(g, - "not bound to an address space at time of setup_bind"); - err = -EINVAL; - goto clean_up_idle; - } - - if (nvgpu_mem_is_valid(&c->gpfifo.mem) || - c->usermode_submit_enabled) { - nvgpu_err(g, "channel %d :" - "gpfifo already allocated", c->chid); - err = -EEXIST; - goto clean_up_idle; - } - - if ((args->flags & NVGPU_SETUP_BIND_FLAGS_USERMODE_SUPPORT) != 0U) { - err = nvgpu_channel_setup_usermode(c, args); - } else { - if (g->os_channel.open != NULL) { - g->os_channel.open(c); - } - err = nvgpu_channel_setup_kernelmode(c, args); - } - - if (err != 0) { - goto clean_up_idle; - } - - g->ops.channel.bind(c); - - nvgpu_log_fn(g, "done"); - return 0; - -clean_up_idle: - if (c->deterministic) { - nvgpu_rwsem_down_read(&g->deterministic_busy); - gk20a_idle(g); - c->deterministic = false; - nvgpu_rwsem_up_read(&g->deterministic_busy); - } - nvgpu_err(g, "fail"); - return err; -} - -void nvgpu_channel_free_usermode_buffers(struct nvgpu_channel *c) -{ - if (nvgpu_mem_is_valid(&c->usermode_userd)) { - nvgpu_dma_free(c->g, &c->usermode_userd); - } - if (nvgpu_mem_is_valid(&c->usermode_gpfifo)) { - nvgpu_dma_unmap_free(c->vm, &c->usermode_gpfifo); - } - if (c->g->os_channel.free_usermode_buffers != NULL) { - c->g->os_channel.free_usermode_buffers(c); - } -} - /* Update with this periodically to determine how the gpfifo is draining. */ static inline u32 nvgpu_channel_update_gpfifo_get(struct gk20a *g, struct nvgpu_channel *c) @@ -1506,79 +793,6 @@ u32 nvgpu_channel_get_gpfifo_free_count(struct nvgpu_channel *ch) ch->gpfifo.entry_num; } -static bool nvgpu_channel_ctxsw_timeout_debug_dump_state(struct gk20a *g, - struct nvgpu_channel *ch) -{ - bool verbose = false; - if (nvgpu_is_error_notifier_set(ch, - NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT)) { - verbose = ch->ctxsw_timeout_debug_dump; - } - - return verbose; -} - -static void nvgpu_channel_set_has_timedout_and_wakeup_wqs(struct gk20a *g, - struct nvgpu_channel *ch) -{ - /* mark channel as faulted */ - nvgpu_channel_set_unserviceable(ch); - - /* unblock pending waits */ - if (nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq) != 0) { - nvgpu_warn(g, "failed to broadcast"); - } - if (nvgpu_cond_broadcast_interruptible(&ch->notifier_wq) != 0) { - nvgpu_warn(g, "failed to broadcast"); - } -} - -bool nvgpu_channel_mark_error(struct gk20a *g, struct nvgpu_channel *ch) -{ - bool verbose; - - verbose = nvgpu_channel_ctxsw_timeout_debug_dump_state(g, ch); - nvgpu_channel_set_has_timedout_and_wakeup_wqs(g, ch); - - return verbose; -} - -void nvgpu_channel_set_error_notifier(struct gk20a *g, struct nvgpu_channel *ch, - u32 error_notifier) -{ - g->ops.channel.set_error_notifier(ch, error_notifier); -} - -void nvgpu_channel_set_ctx_mmu_error(struct gk20a *g, - struct nvgpu_channel *ch) -{ - nvgpu_err(g, - "channel %d generated a mmu fault", ch->chid); - nvgpu_channel_set_error_notifier(g, ch, - NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT); -} - -bool nvgpu_channel_update_and_check_ctxsw_timeout(struct nvgpu_channel *ch, - u32 timeout_delta_ms, bool *progress) -{ - u32 gpfifo_get = nvgpu_channel_update_gpfifo_get(ch->g, ch); - - if (gpfifo_get == ch->ctxsw_timeout_gpfifo_get) { - /* didn't advance since previous ctxsw timeout check */ - ch->ctxsw_timeout_accumulated_ms += timeout_delta_ms; - *progress = false; - } else { - /* first ctxsw timeout isr encountered */ - ch->ctxsw_timeout_accumulated_ms = timeout_delta_ms; - *progress = true; - } - - ch->ctxsw_timeout_gpfifo_get = gpfifo_get; - - return nvgpu_is_timeouts_enabled(ch->g) && - ch->ctxsw_timeout_accumulated_ms > ch->ctxsw_timeout_max_ms; -} - u32 nvgpu_channel_update_gpfifo_get_and_get_free_count(struct nvgpu_channel *ch) { (void)nvgpu_channel_update_gpfifo_get(ch->g, ch); @@ -1834,7 +1048,7 @@ static void nvgpu_channel_poll_wdt(struct gk20a *g) } } -#endif +#endif /* CONFIG_NVGPU_CHANNEL_WDT */ static inline struct nvgpu_channel_worker * nvgpu_channel_worker_from_worker(struct nvgpu_worker *worker) @@ -1889,7 +1103,7 @@ static u32 nvgpu_channel_worker_poll_wakeup_condition_get_timeout( return ch_worker->watchdog_interval; } -#endif +#endif /* CONFIG_NVGPU_CHANNEL_WDT */ static void nvgpu_channel_worker_poll_wakeup_process_item( struct nvgpu_list_node *work_item) @@ -2250,6 +1464,826 @@ void nvgpu_channel_update(struct nvgpu_channel *c) gk20a_channel_worker_enqueue(c); } +bool nvgpu_channel_update_and_check_ctxsw_timeout(struct nvgpu_channel *ch, + u32 timeout_delta_ms, bool *progress) +{ + u32 gpfifo_get = nvgpu_channel_update_gpfifo_get(ch->g, ch); + + if (gpfifo_get == ch->ctxsw_timeout_gpfifo_get) { + /* didn't advance since previous ctxsw timeout check */ + ch->ctxsw_timeout_accumulated_ms += timeout_delta_ms; + *progress = false; + } else { + /* first ctxsw timeout isr encountered */ + ch->ctxsw_timeout_accumulated_ms = timeout_delta_ms; + *progress = true; + } + + ch->ctxsw_timeout_gpfifo_get = gpfifo_get; + + return nvgpu_is_timeouts_enabled(ch->g) && + ch->ctxsw_timeout_accumulated_ms > ch->ctxsw_timeout_max_ms; +} + +#else + +void nvgpu_channel_abort_clean_up(struct nvgpu_channel *ch) +{ + /* ensure no fences are pending */ + nvgpu_mutex_acquire(&ch->sync_lock); + if (ch->user_sync != NULL) { + nvgpu_channel_sync_set_safe_state(ch->user_sync); + } + nvgpu_mutex_release(&ch->sync_lock); +} + +#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */ + +void nvgpu_channel_set_unserviceable(struct nvgpu_channel *ch) +{ + nvgpu_spinlock_acquire(&ch->unserviceable_lock); + ch->unserviceable = true; + nvgpu_spinlock_release(&ch->unserviceable_lock); +} + +bool nvgpu_channel_check_unserviceable(struct nvgpu_channel *ch) +{ + bool unserviceable_status; + + nvgpu_spinlock_acquire(&ch->unserviceable_lock); + unserviceable_status = ch->unserviceable; + nvgpu_spinlock_release(&ch->unserviceable_lock); + + return unserviceable_status; +} + +void nvgpu_channel_abort(struct nvgpu_channel *ch, bool channel_preempt) +{ + struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch); + + nvgpu_log_fn(ch->g, " "); + + if (tsg != NULL) { + return nvgpu_tsg_abort(ch->g, tsg, channel_preempt); + } else { + nvgpu_err(ch->g, "chid: %d is not bound to tsg", ch->chid); + } +} + +void nvgpu_channel_wait_until_counter_is_N( + struct nvgpu_channel *ch, nvgpu_atomic_t *counter, int wait_value, + struct nvgpu_cond *c, const char *caller, const char *counter_name) +{ + while (true) { + if (NVGPU_COND_WAIT( + c, + nvgpu_atomic_read(counter) == wait_value, + 5000U) == 0) { + break; + } + + nvgpu_warn(ch->g, + "%s: channel %d, still waiting, %s left: %d, waiting for: %d", + caller, ch->chid, counter_name, + nvgpu_atomic_read(counter), wait_value); + + gk20a_channel_dump_ref_actions(ch); + } +} + +static void nvgpu_channel_usermode_deinit(struct nvgpu_channel *ch) +{ + struct gk20a *g = ch->g; + + nvgpu_channel_free_usermode_buffers(ch); + (void) nvgpu_userd_init_channel(g, ch); + ch->usermode_submit_enabled = false; +} + +/* call ONLY when no references to the channel exist: after the last put */ +static void gk20a_free_channel(struct nvgpu_channel *ch, bool force) +{ + struct gk20a *g = ch->g; + struct nvgpu_tsg *tsg; + struct nvgpu_fifo *f = &g->fifo; + struct vm_gk20a *ch_vm = ch->vm; + unsigned long timeout; +#ifdef CONFIG_NVGPU_DEBUGGER + struct dbg_session_gk20a *dbg_s; + struct dbg_session_data *session_data, *tmp_s; + struct dbg_session_channel_data *ch_data, *tmp; + bool deferred_reset_pending; +#endif + int err; + + if (g == NULL) { + nvgpu_do_assert_print(g, "ch already freed"); + return; + } + + nvgpu_log_fn(g, " "); + + timeout = nvgpu_get_poll_timeout(g); + +#ifdef CONFIG_NVGPU_TRACE + trace_gk20a_free_channel(ch->chid); +#endif + + /* + * Disable channel/TSG and unbind here. This should not be executed if + * HW access is not available during shutdown/removal path as it will + * trigger a timeout + */ + if (!nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) { + /* abort channel and remove from runlist */ + tsg = nvgpu_tsg_from_ch(ch); + if (tsg != NULL) { + /* Between tsg is not null and unbind_channel call, + * ioctl cannot be called anymore because user doesn't + * have an open channel fd anymore to use for the unbind + * ioctl. + */ + err = nvgpu_tsg_unbind_channel(tsg, ch); + if (err != 0) { + nvgpu_err(g, + "failed to unbind channel %d from TSG", + ch->chid); + } + } else { + /* + * Channel is already unbound from TSG by User with + * explicit call + * Nothing to do here in that case + */ + } + } + + /* + * OS channel close may require that syncpoint should be set to some + * safe value before it is called. nvgpu_tsg_unbind_channel(above) is + * internally doing that by calling nvgpu_nvhost_syncpt_set_safe_state + * deep down in the stack. Otherwise os_channel close may block if the + * app is killed abruptly (which was going to do the syncpoint signal). + */ + if (g->os_channel.close != NULL) { + g->os_channel.close(ch, force); + } + + /* wait until there's only our ref to the channel */ + if (!force) { + nvgpu_channel_wait_until_counter_is_N( + ch, &ch->ref_count, 1, &ch->ref_count_dec_wq, + __func__, "references"); + } + + /* wait until all pending interrupts for recently completed + * jobs are handled */ + nvgpu_wait_for_deferred_interrupts(g); + + /* prevent new refs */ + nvgpu_spinlock_acquire(&ch->ref_obtain_lock); + if (!ch->referenceable) { + nvgpu_spinlock_release(&ch->ref_obtain_lock); + nvgpu_err(ch->g, + "Extra %s() called to channel %u", + __func__, ch->chid); + return; + } + ch->referenceable = false; + nvgpu_spinlock_release(&ch->ref_obtain_lock); + + /* matches with the initial reference in gk20a_open_new_channel() */ + nvgpu_atomic_dec(&ch->ref_count); + + /* wait until no more refs to the channel */ + if (!force) { + nvgpu_channel_wait_until_counter_is_N( + ch, &ch->ref_count, 0, &ch->ref_count_dec_wq, + __func__, "references"); + } + +#ifdef CONFIG_NVGPU_DEBUGGER + /* if engine reset was deferred, perform it now */ + nvgpu_mutex_acquire(&f->deferred_reset_mutex); + deferred_reset_pending = g->fifo.deferred_reset_pending; + nvgpu_mutex_release(&f->deferred_reset_mutex); + + if (deferred_reset_pending) { + nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was" + " deferred, running now"); + nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex); + + nvgpu_assert(nvgpu_channel_deferred_reset_engines(g, ch) == 0); + + nvgpu_mutex_release(&g->fifo.engines_reset_mutex); + } +#endif + + if (!nvgpu_channel_as_bound(ch)) { + goto unbind; + } + + nvgpu_log_info(g, "freeing bound channel context, timeout=%ld", + timeout); + +#ifdef CONFIG_NVGPU_FECS_TRACE + if (g->ops.gr.fecs_trace.unbind_channel && !ch->vpr) + g->ops.gr.fecs_trace.unbind_channel(g, &ch->inst_block); +#endif + + if (g->ops.gr.setup.free_subctx != NULL) { + g->ops.gr.setup.free_subctx(ch); + ch->subctx = NULL; + } + + g->ops.gr.intr.flush_channel_tlb(g); + + if (ch->usermode_submit_enabled) { + nvgpu_channel_usermode_deinit(ch); + } else { +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT + nvgpu_channel_kernelmode_deinit(ch); +#endif + } + + if (ch->user_sync != NULL) { + /* + * Set user managed syncpoint to safe state + * But it's already done if channel is recovered + */ + if (nvgpu_channel_check_unserviceable(ch)) { + nvgpu_channel_sync_destroy(ch->user_sync, false); + } else { + nvgpu_channel_sync_destroy(ch->user_sync, true); + } + ch->user_sync = NULL; + } + nvgpu_mutex_release(&ch->sync_lock); + + /* + * free the channel used semaphore index. + * we need to do this before releasing the address space, + * as the semaphore pool might get freed after that point. + */ + if (ch->hw_sema != NULL) { + nvgpu_hw_semaphore_free(ch); + } + + /* + * When releasing the channel we unbind the VM - so release the ref. + */ + nvgpu_vm_put(ch_vm); + + /* make sure we don't have deferred interrupts pending that + * could still touch the channel */ + nvgpu_wait_for_deferred_interrupts(g); + +unbind: + g->ops.channel.unbind(ch); + g->ops.channel.free_inst(g, ch); + + /* put back the channel-wide submit ref from init */ + if (ch->deterministic) { + nvgpu_rwsem_down_read(&g->deterministic_busy); + ch->deterministic = false; + if (!ch->deterministic_railgate_allowed) { + gk20a_idle(g); + } + ch->deterministic_railgate_allowed = false; + + nvgpu_rwsem_up_read(&g->deterministic_busy); + } + + ch->vpr = false; + ch->vm = NULL; + +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT + WARN_ON(ch->sync != NULL); +#endif + +#ifdef CONFIG_NVGPU_DEBUGGER + /* unlink all debug sessions */ + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + + nvgpu_list_for_each_entry_safe(session_data, tmp_s, + &ch->dbg_s_list, dbg_session_data, dbg_s_entry) { + dbg_s = session_data->dbg_s; + nvgpu_mutex_acquire(&dbg_s->ch_list_lock); + nvgpu_list_for_each_entry_safe(ch_data, tmp, &dbg_s->ch_list, + dbg_session_channel_data, ch_entry) { + if (ch_data->chid == ch->chid) { + if (ch_data->unbind_single_channel(dbg_s, + ch_data) != 0) { + nvgpu_err(g, + "unbind failed for chid: %d", + ch_data->chid); + } + } + } + nvgpu_mutex_release(&dbg_s->ch_list_lock); + } + + nvgpu_mutex_release(&g->dbg_sessions_lock); +#endif + +#if GK20A_CHANNEL_REFCOUNT_TRACKING + (void) memset(ch->ref_actions, 0, sizeof(ch->ref_actions)); + ch->ref_actions_put = 0; +#endif + + /* make sure we catch accesses of unopened channels in case + * there's non-refcounted channel pointers hanging around */ + ch->g = NULL; + nvgpu_smp_wmb(); + + /* ALWAYS last */ + free_channel(f, ch); +} + +static void gk20a_channel_dump_ref_actions(struct nvgpu_channel *ch) +{ +#if GK20A_CHANNEL_REFCOUNT_TRACKING + size_t i, get; + s64 now = nvgpu_current_time_ms(); + s64 prev = 0; + struct gk20a *g = ch->g; + + nvgpu_spinlock_acquire(&ch->ref_actions_lock); + + nvgpu_info(g, "ch %d: refs %d. Actions, most recent last:", + ch->chid, nvgpu_atomic_read(&ch->ref_count)); + + /* start at the oldest possible entry. put is next insertion point */ + get = ch->ref_actions_put; + + /* + * If the buffer is not full, this will first loop to the oldest entry, + * skipping not-yet-initialized entries. There is no ref_actions_get. + */ + for (i = 0; i < GK20A_CHANNEL_REFCOUNT_TRACKING; i++) { + struct nvgpu_channel_ref_action *act = &ch->ref_actions[get]; + + if (act->trace.nr_entries) { + nvgpu_info(g, + "%s ref %zu steps ago (age %lld ms, diff %lld ms)", + act->type == channel_gk20a_ref_action_get + ? "GET" : "PUT", + GK20A_CHANNEL_REFCOUNT_TRACKING - 1 - i, + now - act->timestamp_ms, + act->timestamp_ms - prev); + + print_stack_trace(&act->trace, 0); + prev = act->timestamp_ms; + } + + get = (get + 1) % GK20A_CHANNEL_REFCOUNT_TRACKING; + } + + nvgpu_spinlock_release(&ch->ref_actions_lock); +#endif +} + +static void gk20a_channel_save_ref_source(struct nvgpu_channel *ch, + enum nvgpu_channel_ref_action_type type) +{ +#if GK20A_CHANNEL_REFCOUNT_TRACKING + struct nvgpu_channel_ref_action *act; + + nvgpu_spinlock_acquire(&ch->ref_actions_lock); + + act = &ch->ref_actions[ch->ref_actions_put]; + act->type = type; + act->trace.max_entries = GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN; + act->trace.nr_entries = 0; + act->trace.skip = 3; /* onwards from the caller of this */ + act->trace.entries = act->trace_entries; + save_stack_trace(&act->trace); + act->timestamp_ms = nvgpu_current_time_ms(); + ch->ref_actions_put = (ch->ref_actions_put + 1) % + GK20A_CHANNEL_REFCOUNT_TRACKING; + + nvgpu_spinlock_release(&ch->ref_actions_lock); +#endif +} + +/* Try to get a reference to the channel. Return nonzero on success. If fails, + * the channel is dead or being freed elsewhere and you must not touch it. + * + * Always when a nvgpu_channel pointer is seen and about to be used, a + * reference must be held to it - either by you or the caller, which should be + * documented well or otherwise clearly seen. This usually boils down to the + * file from ioctls directly, or an explicit get in exception handlers when the + * channel is found by a chid. + * + * Most global functions in this file require a reference to be held by the + * caller. + */ +struct nvgpu_channel *nvgpu_channel_get__func(struct nvgpu_channel *ch, + const char *caller) +{ + struct nvgpu_channel *ret; + + nvgpu_spinlock_acquire(&ch->ref_obtain_lock); + + if (likely(ch->referenceable)) { + gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_get); + nvgpu_atomic_inc(&ch->ref_count); + ret = ch; + } else { + ret = NULL; + } + + nvgpu_spinlock_release(&ch->ref_obtain_lock); + + if (ret != NULL) { + trace_nvgpu_channel_get(ch->chid, caller); + } + + return ret; +} + +void nvgpu_channel_put__func(struct nvgpu_channel *ch, const char *caller) +{ + gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_put); + trace_nvgpu_channel_put(ch->chid, caller); + nvgpu_atomic_dec(&ch->ref_count); + if (nvgpu_cond_broadcast(&ch->ref_count_dec_wq) != 0) { + nvgpu_warn(ch->g, "failed to broadcast"); + } + + /* More puts than gets. Channel is probably going to get + * stuck. */ + WARN_ON(nvgpu_atomic_read(&ch->ref_count) < 0); + + /* Also, more puts than gets. ref_count can go to 0 only if + * the channel is closing. Channel is probably going to get + * stuck. */ + WARN_ON(nvgpu_atomic_read(&ch->ref_count) == 0 && ch->referenceable); +} + +struct nvgpu_channel *nvgpu_channel_from_id__func(struct gk20a *g, + u32 chid, const char *caller) +{ + if (chid == NVGPU_INVALID_CHANNEL_ID) { + return NULL; + } + + return nvgpu_channel_get__func(&g->fifo.channel[chid], caller); +} + +void nvgpu_channel_close(struct nvgpu_channel *ch) +{ + gk20a_free_channel(ch, false); +} + +/* + * Be careful with this - it is meant for terminating channels when we know the + * driver is otherwise dying. Ref counts and the like are ignored by this + * version of the cleanup. + */ +void nvgpu_channel_kill(struct nvgpu_channel *ch) +{ + gk20a_free_channel(ch, true); +} + +struct nvgpu_channel *gk20a_open_new_channel(struct gk20a *g, + u32 runlist_id, + bool is_privileged_channel, + pid_t pid, pid_t tid) +{ + struct nvgpu_fifo *f = &g->fifo; + struct nvgpu_channel *ch; + + /* compatibility with existing code */ + if (!nvgpu_engine_is_valid_runlist_id(g, runlist_id)) { + runlist_id = nvgpu_engine_get_gr_runlist_id(g); + } + + nvgpu_log_fn(g, " "); + + ch = allocate_channel(f); + if (ch == NULL) { + /* TBD: we want to make this virtualizable */ + nvgpu_err(g, "out of hw chids"); + return NULL; + } + +#ifdef CONFIG_NVGPU_TRACE + trace_gk20a_open_new_channel(ch->chid); +#endif + + BUG_ON(ch->g != NULL); + ch->g = g; + + /* Runlist for the channel */ + ch->runlist_id = runlist_id; + + /* Channel privilege level */ + ch->is_privileged_channel = is_privileged_channel; + + ch->pid = tid; + ch->tgid = pid; /* process granularity for FECS traces */ + + if (nvgpu_userd_init_channel(g, ch) != 0) { + nvgpu_err(g, "userd init failed"); + goto clean_up; + } + + if (g->ops.channel.alloc_inst(g, ch) != 0) { + nvgpu_err(g, "inst allocation failed"); + goto clean_up; + } + + /* now the channel is in a limbo out of the free list but not marked as + * alive and used (i.e. get-able) yet */ + + /* By default, channel is regular (non-TSG) channel */ + ch->tsgid = NVGPU_INVALID_TSG_ID; + + /* clear ctxsw timeout counter and update timestamp */ + ch->ctxsw_timeout_accumulated_ms = 0; + ch->ctxsw_timeout_gpfifo_get = 0; + /* set gr host default timeout */ + ch->ctxsw_timeout_max_ms = nvgpu_get_poll_timeout(g); + ch->ctxsw_timeout_debug_dump = true; + /* ch is unserviceable until it is bound to tsg */ + ch->unserviceable = true; + +#ifdef CONFIG_NVGPU_CHANNEL_WDT + /* init kernel watchdog timeout */ + ch->wdt.enabled = true; + ch->wdt.limit_ms = g->ch_wdt_init_limit_ms; + ch->wdt.debug_dump = true; +#endif + + ch->obj_class = 0; + ch->subctx_id = 0; + ch->runqueue_sel = 0; + + ch->mmu_nack_handled = false; + + /* The channel is *not* runnable at this point. It still needs to have + * an address space bound and allocate a gpfifo and grctx. */ + + if (nvgpu_cond_init(&ch->notifier_wq) != 0) { + nvgpu_err(g, "cond init failed"); + goto clean_up; + } + if (nvgpu_cond_init(&ch->semaphore_wq) != 0) { + nvgpu_err(g, "cond init failed"); + goto clean_up; + } + + /* Mark the channel alive, get-able, with 1 initial use + * references. The initial reference will be decreased in + * gk20a_free_channel(). + * + * Use the lock, since an asynchronous thread could + * try to access this channel while it's not fully + * initialized. + */ + nvgpu_spinlock_acquire(&ch->ref_obtain_lock); + ch->referenceable = true; + nvgpu_atomic_set(&ch->ref_count, 1); + nvgpu_spinlock_release(&ch->ref_obtain_lock); + + return ch; + +clean_up: + ch->g = NULL; + free_channel(f, ch); + return NULL; +} + +static int nvgpu_channel_setup_ramfc(struct nvgpu_channel *c, + struct nvgpu_setup_bind_args *args, + u64 gpfifo_gpu_va, u32 gpfifo_size) +{ + int err = 0; + u64 pbdma_acquire_timeout = 0ULL; + struct gk20a *g = c->g; + +#ifdef CONFIG_NVGPU_CHANNEL_WDT + if (c->wdt.enabled && nvgpu_is_timeouts_enabled(c->g)) { + pbdma_acquire_timeout = c->wdt.limit_ms; + } +#else + if (nvgpu_is_timeouts_enabled(c->g)) { + pbdma_acquire_timeout = g->ch_wdt_init_limit_ms; + } +#endif + + err = g->ops.ramfc.setup(c, gpfifo_gpu_va, gpfifo_size, + pbdma_acquire_timeout, args->flags); + + return err; +} + +static int nvgpu_channel_setup_usermode(struct nvgpu_channel *c, + struct nvgpu_setup_bind_args *args) +{ + u32 gpfifo_size = args->num_gpfifo_entries; + int err = 0; + struct gk20a *g = c->g; + u64 gpfifo_gpu_va; + + if (g->os_channel.alloc_usermode_buffers != NULL) { + err = g->os_channel.alloc_usermode_buffers(c, args); + if (err != 0) { + nvgpu_err(g, "Usermode buffer alloc failed"); + goto clean_up; + } + c->userd_iova = nvgpu_mem_get_addr(g, + &c->usermode_userd); + c->usermode_submit_enabled = true; + } else { + nvgpu_err(g, "Usermode submit not supported"); + err = -EINVAL; + goto clean_up; + } + gpfifo_gpu_va = c->usermode_gpfifo.gpu_va; + + nvgpu_log_info(g, "channel %d : gpfifo_base 0x%016llx, size %d", + c->chid, gpfifo_gpu_va, gpfifo_size); + + err = nvgpu_channel_setup_ramfc(c, args, gpfifo_gpu_va, gpfifo_size); + + if (err != 0) { + goto clean_up_unmap; + } + + err = nvgpu_channel_update_runlist(c, true); + if (err != 0) { + goto clean_up_unmap; + } + + return 0; + +clean_up_unmap: + nvgpu_channel_free_usermode_buffers(c); + (void) nvgpu_userd_init_channel(g, c); + c->usermode_submit_enabled = false; +clean_up: + return err; +} + +int nvgpu_channel_setup_bind(struct nvgpu_channel *c, + struct nvgpu_setup_bind_args *args) +{ + struct gk20a *g = c->g; + int err = 0; + +#ifdef CONFIG_NVGPU_VPR + if ((args->flags & NVGPU_SETUP_BIND_FLAGS_SUPPORT_VPR) != 0U) { + c->vpr = true; + } +#else + c->vpr = false; +#endif + + if ((args->flags & NVGPU_SETUP_BIND_FLAGS_SUPPORT_DETERMINISTIC) != 0U) { + nvgpu_rwsem_down_read(&g->deterministic_busy); + /* + * Railgating isn't deterministic; instead of disallowing + * railgating globally, take a power refcount for this + * channel's lifetime. The gk20a_idle() pair for this happens + * when the channel gets freed. + * + * Deterministic flag and this busy must be atomic within the + * busy lock. + */ + err = gk20a_busy(g); + if (err != 0) { + nvgpu_rwsem_up_read(&g->deterministic_busy); + return err; + } + + c->deterministic = true; + nvgpu_rwsem_up_read(&g->deterministic_busy); + } + + /* an address space needs to have been bound at this point. */ + if (!nvgpu_channel_as_bound(c)) { + nvgpu_err(g, + "not bound to an address space at time of setup_bind"); + err = -EINVAL; + goto clean_up_idle; + } + + if (c->usermode_submit_enabled) { + nvgpu_err(g, "channel %d : " + "usermode buffers allocated", c->chid); + err = -EEXIST; + goto clean_up_idle; + } + +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT + if (nvgpu_mem_is_valid(&c->gpfifo.mem)) { + nvgpu_err(g, "channel %d :" + "gpfifo already allocated", c->chid); + err = -EEXIST; + goto clean_up_idle; + } +#endif + + if ((args->flags & NVGPU_SETUP_BIND_FLAGS_USERMODE_SUPPORT) != 0U) { + err = nvgpu_channel_setup_usermode(c, args); + } else { +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT + if (g->os_channel.open != NULL) { + g->os_channel.open(c); + } + err = nvgpu_channel_setup_kernelmode(c, args); +#else + err = -EINVAL; +#endif + } + + if (err != 0) { + goto clean_up_idle; + } + + g->ops.channel.bind(c); + + nvgpu_log_fn(g, "done"); + return 0; + +clean_up_idle: + if (c->deterministic) { + nvgpu_rwsem_down_read(&g->deterministic_busy); + gk20a_idle(g); + c->deterministic = false; + nvgpu_rwsem_up_read(&g->deterministic_busy); + } + nvgpu_err(g, "fail"); + return err; +} + +void nvgpu_channel_free_usermode_buffers(struct nvgpu_channel *c) +{ + if (nvgpu_mem_is_valid(&c->usermode_userd)) { + nvgpu_dma_free(c->g, &c->usermode_userd); + } + if (nvgpu_mem_is_valid(&c->usermode_gpfifo)) { + nvgpu_dma_unmap_free(c->vm, &c->usermode_gpfifo); + } + if (c->g->os_channel.free_usermode_buffers != NULL) { + c->g->os_channel.free_usermode_buffers(c); + } +} + +static bool nvgpu_channel_ctxsw_timeout_debug_dump_state(struct gk20a *g, + struct nvgpu_channel *ch) +{ + bool verbose = false; + if (nvgpu_is_error_notifier_set(ch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT)) { + verbose = ch->ctxsw_timeout_debug_dump; + } + + return verbose; +} + +static void nvgpu_channel_set_has_timedout_and_wakeup_wqs(struct gk20a *g, + struct nvgpu_channel *ch) +{ + /* mark channel as faulted */ + nvgpu_channel_set_unserviceable(ch); + + /* unblock pending waits */ + if (nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq) != 0) { + nvgpu_warn(g, "failed to broadcast"); + } + if (nvgpu_cond_broadcast_interruptible(&ch->notifier_wq) != 0) { + nvgpu_warn(g, "failed to broadcast"); + } +} + +bool nvgpu_channel_mark_error(struct gk20a *g, struct nvgpu_channel *ch) +{ + bool verbose; + + verbose = nvgpu_channel_ctxsw_timeout_debug_dump_state(g, ch); + nvgpu_channel_set_has_timedout_and_wakeup_wqs(g, ch); + + return verbose; +} + +void nvgpu_channel_set_error_notifier(struct gk20a *g, struct nvgpu_channel *ch, + u32 error_notifier) +{ + g->ops.channel.set_error_notifier(ch, error_notifier); +} + +void nvgpu_channel_set_ctx_mmu_error(struct gk20a *g, + struct nvgpu_channel *ch) +{ + nvgpu_err(g, + "channel %d generated a mmu fault", ch->chid); + nvgpu_channel_set_error_notifier(g, ch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT); +} + /* * Stop deterministic channel activity for do_idle() when power needs to go off * momentarily but deterministic channels keep power refs for potentially a @@ -2334,8 +2368,10 @@ void nvgpu_channel_deterministic_unidle(struct gk20a *g) static void nvgpu_channel_destroy(struct gk20a *g, struct nvgpu_channel *c) { nvgpu_mutex_destroy(&c->ioctl_lock); +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT nvgpu_mutex_destroy(&c->joblist.cleanup_lock); nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock); +#endif nvgpu_mutex_destroy(&c->sync_lock); #if defined(CONFIG_NVGPU_CYCLESTATS) nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex); @@ -2393,18 +2429,20 @@ int nvgpu_channel_init_support(struct gk20a *g, u32 chid) #if GK20A_CHANNEL_REFCOUNT_TRACKING nvgpu_spinlock_init(&c->ref_actions_lock); #endif - nvgpu_spinlock_init(&c->joblist.dynamic.lock); +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT #ifdef CONFIG_NVGPU_CHANNEL_WDT nvgpu_spinlock_init(&c->wdt.lock); #endif - + nvgpu_spinlock_init(&c->joblist.dynamic.lock); nvgpu_init_list_node(&c->joblist.dynamic.jobs); - nvgpu_init_list_node(&c->dbg_s_list); nvgpu_init_list_node(&c->worker_item); - nvgpu_mutex_init(&c->ioctl_lock); nvgpu_mutex_init(&c->joblist.cleanup_lock); nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock); + +#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */ + nvgpu_init_list_node(&c->dbg_s_list); + nvgpu_mutex_init(&c->ioctl_lock); nvgpu_mutex_init(&c->sync_lock); #if defined(CONFIG_NVGPU_CYCLESTATS) nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex); @@ -2593,6 +2631,7 @@ void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events) } } #endif +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT /* * Only non-deterministic channels get the * channel_update callback. We don't allow @@ -2607,6 +2646,7 @@ void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events) if (!c->deterministic) { nvgpu_channel_update(c); } +#endif } nvgpu_channel_put(c); } diff --git a/drivers/gpu/nvgpu/common/fifo/fifo.c b/drivers/gpu/nvgpu/common/fifo/fifo.c index 6b7cd5350..78184ecec 100644 --- a/drivers/gpu/nvgpu/common/fifo/fifo.c +++ b/drivers/gpu/nvgpu/common/fifo/fifo.c @@ -60,7 +60,9 @@ void nvgpu_fifo_cleanup_sw_common(struct gk20a *g) void nvgpu_fifo_cleanup_sw(struct gk20a *g) { +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT nvgpu_channel_worker_deinit(g); +#endif nvgpu_fifo_cleanup_sw_common(g); } @@ -169,19 +171,23 @@ int nvgpu_fifo_setup_sw(struct gk20a *g) return err; } +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT err = nvgpu_channel_worker_init(g); if (err != 0) { nvgpu_err(g, "worker init fail, err=%d", err); goto clean_up; } +#endif f->sw_ready = true; nvgpu_log_fn(g, "done"); return 0; +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT clean_up: nvgpu_fifo_cleanup_sw_common(g); +#endif return err; } diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c index d3ed866a4..86bdaec1e 100644 --- a/drivers/gpu/nvgpu/common/fifo/tsg.c +++ b/drivers/gpu/nvgpu/common/fifo/tsg.c @@ -460,6 +460,7 @@ void nvgpu_tsg_set_ctx_mmu_error(struct gk20a *g, struct nvgpu_tsg *tsg) NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT); } +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT bool nvgpu_tsg_check_ctxsw_timeout(struct nvgpu_tsg *tsg, bool *debug_dump, u32 *ms) { @@ -522,6 +523,7 @@ bool nvgpu_tsg_check_ctxsw_timeout(struct nvgpu_tsg *tsg, nvgpu_rwsem_up_read(&tsg->ch_list_lock); return recover; } +#endif #ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING int nvgpu_tsg_set_interleave(struct nvgpu_tsg *tsg, u32 level) diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync.c b/drivers/gpu/nvgpu/common/sync/channel_sync.c index 29e32a24d..ca867ec6a 100644 --- a/drivers/gpu/nvgpu/common/sync/channel_sync.c +++ b/drivers/gpu/nvgpu/common/sync/channel_sync.c @@ -45,7 +45,11 @@ struct nvgpu_channel_sync *nvgpu_channel_sync_create(struct nvgpu_channel *c, if (nvgpu_has_syncpoints(c->g)) { return nvgpu_channel_sync_syncpt_create(c, user_managed); } else { +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT return nvgpu_channel_sync_semaphore_create(c, user_managed); +#else + return NULL; +#endif } } @@ -64,6 +68,7 @@ bool nvgpu_has_syncpoints(struct gk20a *g) #endif } +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT int nvgpu_channel_sync_wait_fence_fd(struct nvgpu_channel_sync *s, int fd, struct priv_cmd_entry *entry, u32 max_wait_cmds) { @@ -91,6 +96,18 @@ void nvgpu_channel_sync_set_min_eq_max(struct nvgpu_channel_sync *s) s->set_min_eq_max(s); } +void nvgpu_channel_sync_get_ref(struct nvgpu_channel_sync *s) +{ + nvgpu_atomic_inc(&s->refcount); +} + +bool nvgpu_channel_sync_put_ref_and_check(struct nvgpu_channel_sync *s) +{ + return nvgpu_atomic_dec_and_test(&s->refcount); +} + +#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */ + void nvgpu_channel_sync_set_safe_state(struct nvgpu_channel_sync *s) { s->set_safe_state(s); @@ -105,13 +122,4 @@ void nvgpu_channel_sync_destroy(struct nvgpu_channel_sync *sync, sync->destroy(sync); } -void nvgpu_channel_sync_get_ref(struct nvgpu_channel_sync *s) -{ - nvgpu_atomic_inc(&s->refcount); -} - -bool nvgpu_channel_sync_put_ref_and_check(struct nvgpu_channel_sync *s) -{ - return nvgpu_atomic_dec_and_test(&s->refcount); -} diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h b/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h index c92f70bb0..5235a2b65 100644 --- a/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h +++ b/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h @@ -39,6 +39,7 @@ struct nvgpu_fence_type; struct nvgpu_channel_sync { nvgpu_atomic_t refcount; +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT int (*wait_fence_raw)(struct nvgpu_channel_sync *s, u32 id, u32 thresh, struct priv_cmd_entry *entry); @@ -58,7 +59,7 @@ struct nvgpu_channel_sync { bool wfi, bool need_sync_fence, bool register_irq); - +#endif void (*set_min_eq_max)(struct nvgpu_channel_sync *s); void (*set_safe_state)(struct nvgpu_channel_sync *s); diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c index 68280cd0b..f352ad930 100644 --- a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c +++ b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c @@ -54,6 +54,7 @@ nvgpu_channel_sync_syncpt_from_ops(struct nvgpu_channel_sync *ops) offsetof(struct nvgpu_channel_sync_syncpt, ops)); } +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT static int channel_sync_syncpt_gen_wait_cmd(struct nvgpu_channel *c, u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd, u32 wait_cmd_size, u32 pos, bool preallocated) @@ -292,6 +293,14 @@ static int channel_sync_syncpt_incr_user(struct nvgpu_channel_sync *s, entry, fence, need_sync_fence); } +int nvgpu_channel_sync_wait_syncpt(struct nvgpu_channel_sync_syncpt *s, + u32 id, u32 thresh, struct priv_cmd_entry *entry) +{ + return channel_sync_syncpt_wait_raw(s, id, thresh, entry); +} + +#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */ + static void channel_sync_syncpt_set_min_eq_max(struct nvgpu_channel_sync *s) { struct nvgpu_channel_sync_syncpt *sp = @@ -339,18 +348,12 @@ u64 nvgpu_channel_sync_get_syncpt_address(struct nvgpu_channel_sync_syncpt *s) return channel_sync_syncpt_get_address(s); } -int nvgpu_channel_sync_wait_syncpt(struct nvgpu_channel_sync_syncpt *s, - u32 id, u32 thresh, struct priv_cmd_entry *entry) -{ - return channel_sync_syncpt_wait_raw(s, id, thresh, entry); -} - struct nvgpu_channel_sync_syncpt * nvgpu_channel_sync_to_syncpt(struct nvgpu_channel_sync *sync) { struct nvgpu_channel_sync_syncpt *syncpt = NULL; - if (sync->wait_fence_fd == channel_sync_syncpt_wait_fd) { + if (sync->set_min_eq_max == channel_sync_syncpt_set_min_eq_max) { syncpt = nvgpu_channel_sync_syncpt_from_ops(sync); } @@ -396,9 +399,11 @@ nvgpu_channel_sync_syncpt_create(struct nvgpu_channel *c, bool user_managed) nvgpu_nvhost_syncpt_set_min_eq_max_ext(sp->nvhost_dev, sp->id); nvgpu_atomic_set(&sp->ops.refcount, 0); +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT sp->ops.wait_fence_fd = channel_sync_syncpt_wait_fd; sp->ops.incr = channel_sync_syncpt_incr; sp->ops.incr_user = channel_sync_syncpt_incr_user; +#endif sp->ops.set_min_eq_max = channel_sync_syncpt_set_min_eq_max; sp->ops.set_safe_state = channel_sync_syncpt_set_safe_state; sp->ops.destroy = channel_sync_syncpt_destroy; diff --git a/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c index 846b8ab6e..2e1b13017 100644 --- a/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c @@ -72,10 +72,12 @@ int vgpu_fifo_setup_sw(struct gk20a *g) return err; } +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT err = nvgpu_channel_worker_init(g); if (err) { goto clean_up; } +#endif f->channel_base = priv->constants.channel_base; @@ -84,9 +86,11 @@ int vgpu_fifo_setup_sw(struct gk20a *g) nvgpu_log_fn(g, "done"); return 0; +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT clean_up: /* FIXME: unmap from bar1 */ nvgpu_fifo_cleanup_sw_common(g); +#endif return err; } diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c index 574abf66d..069cab598 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c @@ -579,7 +579,9 @@ static const struct gpu_ops vgpu_gp10b_ops = { .unbind_channel_check_hw_state = NULL, .unbind_channel_check_ctx_reload = NULL, .unbind_channel_check_eng_faulted = NULL, +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT .check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout, +#endif .force_reset = vgpu_tsg_force_reset_ch, .post_event_id = nvgpu_tsg_post_event_id, .set_timeslice = vgpu_tsg_set_timeslice, diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c index 796647760..64539e5ca 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c @@ -666,7 +666,9 @@ static const struct gpu_ops vgpu_gv11b_ops = { .unbind_channel_check_hw_state = NULL, .unbind_channel_check_ctx_reload = NULL, .unbind_channel_check_eng_faulted = NULL, +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT .check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout, +#endif .force_reset = vgpu_tsg_force_reset_ch, .post_event_id = nvgpu_tsg_post_event_id, .set_timeslice = vgpu_tsg_set_timeslice, diff --git a/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gk20a.c b/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gk20a.c index e9e0b97b0..0dea390fc 100644 --- a/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gk20a.c +++ b/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gk20a.c @@ -115,10 +115,11 @@ bool gk20a_fifo_handle_ctxsw_timeout(struct gk20a *g) nvgpu_channel_put(ch); } } - +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT if (tsg != NULL) { recover = g->ops.tsg.check_ctxsw_timeout(tsg, &debug_dump, &ms); } +#endif if (recover) { nvgpu_err(g, diff --git a/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gv11b.c b/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gv11b.c index b61e825fa..08ebcea5b 100644 --- a/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gv11b.c +++ b/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gv11b.c @@ -221,8 +221,10 @@ bool gv11b_fifo_handle_ctxsw_timeout(struct gk20a *g) 0, GPU_HOST_PFIFO_CTXSW_TIMEOUT_ERROR, tsgid); +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT recover = g->ops.tsg.check_ctxsw_timeout(tsg, &debug_dump, &ms); +#endif if (recover) { info_status_str = invalid_str; if (info_status < diff --git a/drivers/gpu/nvgpu/hal/fifo/userd_gk20a.c b/drivers/gpu/nvgpu/hal/fifo/userd_gk20a.c index ea2ab1eed..783f03de5 100644 --- a/drivers/gpu/nvgpu/hal/fifo/userd_gk20a.c +++ b/drivers/gpu/nvgpu/hal/fifo/userd_gk20a.c @@ -50,6 +50,7 @@ void gk20a_userd_init_mem(struct gk20a *g, struct nvgpu_channel *c) nvgpu_mem_wr32(g, mem, offset + ram_userd_gp_put_w(), 0); } +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT u32 gk20a_userd_gp_get(struct gk20a *g, struct nvgpu_channel *c) { u64 userd_gpu_va = nvgpu_channel_userd_gpu_va(c); @@ -82,6 +83,7 @@ void gk20a_userd_gp_put(struct gk20a *g, struct nvgpu_channel *c) BUG_ON(u64_hi32(addr) != 0U); nvgpu_bar1_writel(g, (u32)addr, c->gpfifo.put); } +#endif u32 gk20a_userd_entry_size(struct gk20a *g) { diff --git a/drivers/gpu/nvgpu/hal/fifo/userd_gk20a.h b/drivers/gpu/nvgpu/hal/fifo/userd_gk20a.h index 43eddb0c1..45f41b19e 100644 --- a/drivers/gpu/nvgpu/hal/fifo/userd_gk20a.h +++ b/drivers/gpu/nvgpu/hal/fifo/userd_gk20a.h @@ -27,9 +27,11 @@ struct gk20a; struct nvgpu_channel; void gk20a_userd_init_mem(struct gk20a *g, struct nvgpu_channel *c); +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT u32 gk20a_userd_gp_get(struct gk20a *g, struct nvgpu_channel *c); u64 gk20a_userd_pb_get(struct gk20a *g, struct nvgpu_channel *c); void gk20a_userd_gp_put(struct gk20a *g, struct nvgpu_channel *c); +#endif u32 gk20a_userd_entry_size(struct gk20a *g); #endif /* USERD_GK20A_H */ diff --git a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c index 803965ad5..c0d88e013 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c @@ -730,11 +730,13 @@ static const struct gpu_ops gm20b_ops = { .cleanup_sw = nvgpu_userd_cleanup_sw, #ifdef CONFIG_NVGPU_USERD .init_mem = gk20a_userd_init_mem, +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT .gp_get = gk20a_userd_gp_get, .gp_put = gk20a_userd_gp_put, .pb_get = gk20a_userd_pb_get, - .entry_size = gk20a_userd_entry_size, #endif + .entry_size = gk20a_userd_entry_size, +#endif /* CONFIG_NVGPU_USERD */ }, .channel = { .alloc_inst = nvgpu_channel_alloc_inst, @@ -764,7 +766,9 @@ static const struct gpu_ops gm20b_ops = { .unbind_channel_check_ctx_reload = nvgpu_tsg_unbind_channel_check_ctx_reload, .unbind_channel_check_eng_faulted = NULL, +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT .check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout, +#endif #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL .force_reset = nvgpu_tsg_force_reset_ch, .post_event_id = nvgpu_tsg_post_event_id, diff --git a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c index 35e3f964f..e841d92db 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c @@ -815,11 +815,13 @@ static const struct gpu_ops gp10b_ops = { .cleanup_sw = nvgpu_userd_cleanup_sw, #ifdef CONFIG_NVGPU_USERD .init_mem = gk20a_userd_init_mem, +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT .gp_get = gk20a_userd_gp_get, .gp_put = gk20a_userd_gp_put, .pb_get = gk20a_userd_pb_get, - .entry_size = gk20a_userd_entry_size, #endif + .entry_size = gk20a_userd_entry_size, +#endif /* CONFIG_NVGPU_USERD */ }, .channel = { .alloc_inst = nvgpu_channel_alloc_inst, @@ -831,7 +833,9 @@ static const struct gpu_ops gp10b_ops = { .count = gm20b_channel_count, .read_state = gk20a_channel_read_state, .force_ctx_reload = gm20b_channel_force_ctx_reload, +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT .set_syncpt = nvgpu_channel_set_syncpt, +#endif .abort_clean_up = nvgpu_channel_abort_clean_up, .suspend_all_serviceable_ch = nvgpu_channel_suspend_all_serviceable_ch, @@ -850,7 +854,9 @@ static const struct gpu_ops gp10b_ops = { .unbind_channel_check_ctx_reload = nvgpu_tsg_unbind_channel_check_ctx_reload, .unbind_channel_check_eng_faulted = NULL, +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT .check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout, +#endif #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL .force_reset = nvgpu_tsg_force_reset_ch, .post_event_id = nvgpu_tsg_post_event_id, diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c index 25cafa8c0..73f8d5c83 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c @@ -948,11 +948,13 @@ static const struct gpu_ops gv11b_ops = { .cleanup_sw = nvgpu_userd_cleanup_sw, #ifdef CONFIG_NVGPU_USERD .init_mem = gk20a_userd_init_mem, +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT .gp_get = gv11b_userd_gp_get, .gp_put = gv11b_userd_gp_put, .pb_get = gv11b_userd_pb_get, - .entry_size = gk20a_userd_entry_size, #endif + .entry_size = gk20a_userd_entry_size, +#endif /* CONFIG_NVGPU_USERD */ }, .channel = { .alloc_inst = nvgpu_channel_alloc_inst, @@ -989,7 +991,9 @@ static const struct gpu_ops gv11b_ops = { nvgpu_tsg_unbind_channel_check_ctx_reload, .unbind_channel_check_eng_faulted = gv11b_tsg_unbind_channel_check_eng_faulted, +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT .check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout, +#endif #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL .force_reset = nvgpu_tsg_force_reset_ch, .post_event_id = nvgpu_tsg_post_event_id, diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c index 86f3ac95a..d4ca7f6d7 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c +++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c @@ -991,11 +991,13 @@ static const struct gpu_ops tu104_ops = { .cleanup_sw = nvgpu_userd_cleanup_sw, #ifdef CONFIG_NVGPU_USERD .init_mem = gk20a_userd_init_mem, +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT .gp_get = gv11b_userd_gp_get, .gp_put = gv11b_userd_gp_put, .pb_get = gv11b_userd_pb_get, - .entry_size = gk20a_userd_entry_size, #endif + .entry_size = gk20a_userd_entry_size, +#endif /* CONFIG_NVGPU_USERD */ }, .channel = { .alloc_inst = nvgpu_channel_alloc_inst, @@ -1032,7 +1034,9 @@ static const struct gpu_ops tu104_ops = { nvgpu_tsg_unbind_channel_check_ctx_reload, .unbind_channel_check_eng_faulted = gv11b_tsg_unbind_channel_check_eng_faulted, +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT .check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout, +#endif #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL .force_reset = nvgpu_tsg_force_reset_ch, .post_event_id = nvgpu_tsg_post_event_id, diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h index 7311c6f74..d4e49a622 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h @@ -303,13 +303,24 @@ struct nvgpu_channel { struct nvgpu_list_node ch_entry; /* channel's entry in TSG */ +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT struct nvgpu_channel_joblist joblist; + struct gpfifo_desc gpfifo; + struct priv_cmd_queue priv_cmd_q; + struct nvgpu_channel_sync *sync; + /* for job cleanup handling in the background worker */ + struct nvgpu_list_node worker_item; + +#ifdef CONFIG_NVGPU_CHANNEL_WDT + /* kernel watchdog to kill stuck jobs */ + struct nvgpu_channel_wdt wdt; +#endif /* CONFIG_NVGPU_CHANNEL_WDT */ +#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */ + struct nvgpu_allocator fence_allocator; struct vm_gk20a *vm; - struct gpfifo_desc gpfifo; - struct nvgpu_mem usermode_userd; /* Used for Usermode Submission */ struct nvgpu_mem usermode_gpfifo; struct nvgpu_mem inst_block; @@ -319,19 +330,9 @@ struct nvgpu_channel { struct nvgpu_mem *userd_mem; /* kernel mode userd */ u32 userd_offset; /* in bytes from start of userd_mem */ - struct priv_cmd_queue priv_cmd_q; - struct nvgpu_cond notifier_wq; struct nvgpu_cond semaphore_wq; -#ifdef CONFIG_NVGPU_CHANNEL_WDT - /* kernel watchdog to kill stuck jobs */ - struct nvgpu_channel_wdt wdt; -#endif - - /* for job cleanup handling in the background worker */ - struct nvgpu_list_node worker_item; - #if defined(CONFIG_NVGPU_CYCLESTATS) struct { void *cyclestate_buffer; @@ -346,7 +347,6 @@ struct nvgpu_channel { struct nvgpu_list_node dbg_s_list; struct nvgpu_mutex sync_lock; - struct nvgpu_channel_sync *sync; struct nvgpu_channel_sync *user_sync; #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION @@ -388,6 +388,66 @@ struct nvgpu_channel { bool mmu_debug_mode_enabled; #endif }; + +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT + +static inline struct nvgpu_channel * +nvgpu_channel_from_worker_item(struct nvgpu_list_node *node) +{ + return (struct nvgpu_channel *) + ((uintptr_t)node - offsetof(struct nvgpu_channel, worker_item)); +}; +int nvgpu_channel_alloc_priv_cmdbuf(struct nvgpu_channel *c, u32 orig_size, + struct priv_cmd_entry *e); +void nvgpu_channel_update_priv_cmd_q_and_free_entry( + struct nvgpu_channel *ch, struct priv_cmd_entry *e); +int nvgpu_channel_worker_init(struct gk20a *g); +void nvgpu_channel_worker_deinit(struct gk20a *g); +struct nvgpu_channel *nvgpu_channel_get_from_file(int fd); +void nvgpu_channel_update(struct nvgpu_channel *c); +int nvgpu_channel_alloc_job(struct nvgpu_channel *c, + struct nvgpu_channel_job **job_out); +void nvgpu_channel_free_job(struct nvgpu_channel *c, + struct nvgpu_channel_job *job); +u32 nvgpu_channel_update_gpfifo_get_and_get_free_count( + struct nvgpu_channel *ch); +u32 nvgpu_channel_get_gpfifo_free_count(struct nvgpu_channel *ch); +int nvgpu_channel_add_job(struct nvgpu_channel *c, + struct nvgpu_channel_job *job, + bool skip_buffer_refcounting); +void nvgpu_channel_free_priv_cmd_entry(struct nvgpu_channel *c, + struct priv_cmd_entry *e); +void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c, + bool clean_all); +int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c, + struct nvgpu_gpfifo_userdata userdata, + u32 num_entries, + u32 flags, + struct nvgpu_channel_fence *fence, + struct nvgpu_fence_type **fence_out, + struct nvgpu_profile *profile); + +int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c, + struct nvgpu_gpfifo_entry *gpfifo, + u32 num_entries, + u32 flags, + struct nvgpu_channel_fence *fence, + struct nvgpu_fence_type **fence_out); +int nvgpu_channel_set_syncpt(struct nvgpu_channel *ch); +void nvgpu_channel_joblist_lock(struct nvgpu_channel *c); +void nvgpu_channel_joblist_unlock(struct nvgpu_channel *c); +bool nvgpu_channel_joblist_is_empty(struct nvgpu_channel *c); +bool nvgpu_channel_is_prealloc_enabled(struct nvgpu_channel *c); + +bool nvgpu_channel_update_and_check_ctxsw_timeout(struct nvgpu_channel *ch, + u32 timeout_delta_ms, bool *progress); + +#ifdef CONFIG_NVGPU_CHANNEL_WDT +void nvgpu_channel_wdt_restart_all_channels(struct gk20a *g); +#endif + +#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */ + static inline struct nvgpu_channel * nvgpu_channel_from_free_chs(struct nvgpu_list_node *node) { @@ -402,13 +462,6 @@ nvgpu_channel_from_ch_entry(struct nvgpu_list_node *node) ((uintptr_t)node - offsetof(struct nvgpu_channel, ch_entry)); }; -static inline struct nvgpu_channel * -nvgpu_channel_from_worker_item(struct nvgpu_list_node *node) -{ - return (struct nvgpu_channel *) - ((uintptr_t)node - offsetof(struct nvgpu_channel, worker_item)); -}; - static inline bool nvgpu_channel_as_bound(struct nvgpu_channel *ch) { return (ch->vm != NULL); @@ -426,19 +479,12 @@ void nvgpu_channel_set_ctx_mmu_error(struct gk20a *g, struct nvgpu_channel *ch); bool nvgpu_channel_mark_error(struct gk20a *g, struct nvgpu_channel *ch); -bool nvgpu_channel_update_and_check_ctxsw_timeout(struct nvgpu_channel *ch, - u32 timeout_delta_ms, bool *progress); - void nvgpu_channel_recover(struct gk20a *g, struct nvgpu_channel *ch, bool verbose, u32 rc_type); void nvgpu_channel_abort(struct nvgpu_channel *ch, bool channel_preempt); void nvgpu_channel_abort_clean_up(struct nvgpu_channel *ch); void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events); -int nvgpu_channel_alloc_priv_cmdbuf(struct nvgpu_channel *c, u32 orig_size, - struct priv_cmd_entry *e); -void nvgpu_channel_update_priv_cmd_q_and_free_entry( - struct nvgpu_channel *ch, struct priv_cmd_entry *e); int nvgpu_channel_enable_tsg(struct gk20a *g, struct nvgpu_channel *ch); int nvgpu_channel_disable_tsg(struct gk20a *g, struct nvgpu_channel *ch); @@ -449,12 +495,6 @@ void nvgpu_channel_resume_all_serviceable_ch(struct gk20a *g); void nvgpu_channel_deterministic_idle(struct gk20a *g); void nvgpu_channel_deterministic_unidle(struct gk20a *g); -int nvgpu_channel_worker_init(struct gk20a *g); -void nvgpu_channel_worker_deinit(struct gk20a *g); - -struct nvgpu_channel *nvgpu_channel_get_from_file(int fd); -void nvgpu_channel_update(struct nvgpu_channel *c); - /* returns ch if reference was obtained */ struct nvgpu_channel *__must_check nvgpu_channel_get__func( struct nvgpu_channel *ch, const char *caller); @@ -479,51 +519,15 @@ struct nvgpu_channel *gk20a_open_new_channel(struct gk20a *g, int nvgpu_channel_setup_bind(struct nvgpu_channel *c, struct nvgpu_setup_bind_args *args); -void nvgpu_channel_wdt_restart_all_channels(struct gk20a *g); - -bool nvgpu_channel_is_prealloc_enabled(struct nvgpu_channel *c); -void nvgpu_channel_joblist_lock(struct nvgpu_channel *c); -void nvgpu_channel_joblist_unlock(struct nvgpu_channel *c); -bool nvgpu_channel_joblist_is_empty(struct nvgpu_channel *c); - int nvgpu_channel_update_runlist(struct nvgpu_channel *c, bool add); void nvgpu_channel_wait_until_counter_is_N( struct nvgpu_channel *ch, nvgpu_atomic_t *counter, int wait_value, struct nvgpu_cond *c, const char *caller, const char *counter_name); -int nvgpu_channel_alloc_job(struct nvgpu_channel *c, - struct nvgpu_channel_job **job_out); -void nvgpu_channel_free_job(struct nvgpu_channel *c, - struct nvgpu_channel_job *job); -u32 nvgpu_channel_update_gpfifo_get_and_get_free_count( - struct nvgpu_channel *ch); -u32 nvgpu_channel_get_gpfifo_free_count(struct nvgpu_channel *ch); -int nvgpu_channel_add_job(struct nvgpu_channel *c, - struct nvgpu_channel_job *job, - bool skip_buffer_refcounting); -void nvgpu_channel_free_priv_cmd_entry(struct nvgpu_channel *c, - struct priv_cmd_entry *e); -void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c, - bool clean_all); void nvgpu_channel_free_usermode_buffers(struct nvgpu_channel *c); u32 nvgpu_get_gpfifo_entry_size(void); -int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c, - struct nvgpu_gpfifo_userdata userdata, - u32 num_entries, - u32 flags, - struct nvgpu_channel_fence *fence, - struct nvgpu_fence_type **fence_out, - struct nvgpu_profile *profile); - -int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c, - struct nvgpu_gpfifo_entry *gpfifo, - u32 num_entries, - u32 flags, - struct nvgpu_channel_fence *fence, - struct nvgpu_fence_type **fence_out); - #ifdef CONFIG_DEBUG_FS void trace_write_pushbuffers(struct nvgpu_channel *c, u32 count); #else @@ -550,7 +554,6 @@ int nvgpu_channel_alloc_inst(struct gk20a *g, struct nvgpu_channel *ch); void nvgpu_channel_free_inst(struct gk20a *g, struct nvgpu_channel *ch); void nvgpu_channel_set_error_notifier(struct gk20a *g, struct nvgpu_channel *ch, u32 error_notifier); -int nvgpu_channel_set_syncpt(struct nvgpu_channel *ch); struct nvgpu_channel *nvgpu_channel_refch_from_inst_ptr(struct gk20a *g, u64 inst_ptr); void nvgpu_channel_debug_dump_all(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h b/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h index c76342763..68e676057 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h @@ -29,6 +29,9 @@ #include struct nvgpu_channel_sync; + +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT + struct priv_cmd_entry; struct nvgpu_channel; struct nvgpu_fence_type; @@ -73,6 +76,17 @@ int nvgpu_channel_sync_incr_user(struct nvgpu_channel_sync *s, * for semaphores. */ void nvgpu_channel_sync_set_min_eq_max(struct nvgpu_channel_sync *s); +/* + * Increment the usage_counter for this instance. + */ +void nvgpu_channel_sync_get_ref(struct nvgpu_channel_sync *s); + +/* + * Decrement the usage_counter for this instance and return if equals 0. + */ +bool nvgpu_channel_sync_put_ref_and_check(struct nvgpu_channel_sync *s); +#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */ + /* * Set the channel syncpoint/semaphore to safe state * This should be used to reset User managed syncpoint since we don't @@ -86,16 +100,6 @@ void nvgpu_channel_sync_set_safe_state(struct nvgpu_channel_sync *s); void nvgpu_channel_sync_destroy(struct nvgpu_channel_sync *sync, bool set_safe_state); -/* - * Increment the usage_counter for this instance. - */ -void nvgpu_channel_sync_get_ref(struct nvgpu_channel_sync *s); - -/* - * Decrement the usage_counter for this instance and return if equals 0. - */ -bool nvgpu_channel_sync_put_ref_and_check(struct nvgpu_channel_sync *s); - /* * Construct a channel_sync backed by either a syncpoint or a semaphore. * A channel_sync is by default constructed as backed by a syncpoint diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel_sync_semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/channel_sync_semaphore.h index c2c7249ac..e19c752f4 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel_sync_semaphore.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel_sync_semaphore.h @@ -29,6 +29,8 @@ #include #include +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT + struct nvgpu_channel; struct nvgpu_channel_sync_semaphore; /* @@ -46,4 +48,6 @@ struct nvgpu_channel_sync * nvgpu_channel_sync_semaphore_create( struct nvgpu_channel *c, bool user_managed); +#endif + #endif /* NVGPU_CHANNEL_SYNC_SEMAPHORE_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/fence.h b/drivers/gpu/nvgpu/include/nvgpu/fence.h index 04061fd99..48e674d3b 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/fence.h +++ b/drivers/gpu/nvgpu/include/nvgpu/fence.h @@ -80,7 +80,9 @@ int nvgpu_fence_pool_alloc(struct nvgpu_channel *ch, unsigned int count); void nvgpu_fence_pool_free(struct nvgpu_channel *ch); +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT struct nvgpu_fence_type *nvgpu_fence_alloc(struct nvgpu_channel *ch); +#endif void nvgpu_fence_init(struct nvgpu_fence_type *f, const struct nvgpu_fence_ops *ops, diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 7f78d62f4..e31dc476c 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -1118,9 +1118,11 @@ struct gpu_ops { int (*setup_sw)(struct gk20a *g); void (*cleanup_sw)(struct gk20a *g); void (*init_mem)(struct gk20a *g, struct nvgpu_channel *c); +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT u32 (*gp_get)(struct gk20a *g, struct nvgpu_channel *c); void (*gp_put)(struct gk20a *g, struct nvgpu_channel *c); u64 (*pb_get)(struct gk20a *g, struct nvgpu_channel *c); +#endif u32 (*entry_size)(struct gk20a *g); } userd; @@ -1229,7 +1231,9 @@ struct gpu_ops { void (*set_error_notifier)(struct nvgpu_channel *ch, u32 error); void (*reset_faulted)(struct gk20a *g, struct nvgpu_channel *ch, bool eng, bool pbdma); +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT int (*set_syncpt)(struct nvgpu_channel *ch); +#endif void (*debug_dump)(struct gk20a *g, struct nvgpu_debug_context *o, struct nvgpu_channel_dump_info *info); @@ -1257,8 +1261,10 @@ struct gpu_ops { void (*unbind_channel_check_eng_faulted)(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch, struct nvgpu_channel_hw_state *state); +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT bool (*check_ctxsw_timeout)(struct nvgpu_tsg *tsg, bool *verbose, u32 *ms); +#endif #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL int (*force_reset)(struct nvgpu_channel *ch, u32 err_code, bool verbose); diff --git a/drivers/gpu/nvgpu/include/nvgpu/tsg.h b/drivers/gpu/nvgpu/include/nvgpu/tsg.h index 71438eae0..b89bfec65 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h +++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h @@ -127,8 +127,10 @@ void nvgpu_tsg_set_ctx_mmu_error(struct gk20a *g, struct nvgpu_tsg *tsg); bool nvgpu_tsg_mark_error(struct gk20a *g, struct nvgpu_tsg *tsg); +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT bool nvgpu_tsg_check_ctxsw_timeout(struct nvgpu_tsg *tsg, bool *debug_dump, u32 *ms); +#endif int nvgpu_tsg_set_runlist_interleave(struct nvgpu_tsg *tsg, u32 level); #ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING int nvgpu_tsg_set_timeslice(struct nvgpu_tsg *tsg, u32 timeslice_us);