diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c index 3f9832572..a6835d633 100644 --- a/drivers/gpu/nvgpu/common/fifo/tsg.c +++ b/drivers/gpu/nvgpu/common/fifo/tsg.c @@ -65,7 +65,8 @@ struct nvgpu_tsg *nvgpu_tsg_get_from_id(struct gk20a *g, u32 tsgid) } -static bool gk20a_is_channel_active(struct gk20a *g, struct nvgpu_channel *ch) +static bool nvgpu_tsg_is_channel_active(struct gk20a *g, + struct nvgpu_channel *ch) { struct nvgpu_fifo *f = &g->fifo; struct nvgpu_runlist_info *runlist; @@ -99,7 +100,7 @@ int nvgpu_tsg_bind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch) } /* channel cannot be bound to TSG if it is already active */ - if (gk20a_is_channel_active(tsg->g, ch)) { + if (nvgpu_tsg_is_channel_active(tsg->g, ch)) { return -EINVAL; } @@ -135,42 +136,7 @@ int nvgpu_tsg_bind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch) return err; } -/* The caller must ensure that channel belongs to a tsg */ -int nvgpu_tsg_unbind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch) -{ - struct gk20a *g = ch->g; - int err; - - nvgpu_log_fn(g, "unbind tsg:%u ch:%u\n", tsg->tsgid, ch->chid); - - err = nvgpu_tsg_unbind_channel_common(tsg, ch); - if (err != 0) { - nvgpu_err(g, "Channel %d unbind failed, tearing down TSG %d", - ch->chid, tsg->tsgid); - - nvgpu_tsg_abort(g, tsg, true); - /* If channel unbind fails, channel is still part of runlist */ - if (nvgpu_channel_update_runlist(ch, false) != 0) { - nvgpu_err(g, - "remove ch %u from runlist failed", ch->chid); - } - - nvgpu_rwsem_down_write(&tsg->ch_list_lock); - nvgpu_list_del(&ch->ch_entry); - ch->tsgid = NVGPU_INVALID_TSG_ID; - nvgpu_rwsem_up_write(&tsg->ch_list_lock); - } - - if (g->ops.tsg.unbind_channel != NULL) { - err = g->ops.tsg.unbind_channel(tsg, ch); - } - - nvgpu_ref_put(&tsg->refcount, nvgpu_tsg_release); - - return 0; -} - -int nvgpu_tsg_unbind_channel_common(struct nvgpu_tsg *tsg, +static int nvgpu_tsg_unbind_channel_common(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch) { struct gk20a *g = ch->g; @@ -251,6 +217,41 @@ fail_enable_tsg: return err; } +/* The caller must ensure that channel belongs to a tsg */ +int nvgpu_tsg_unbind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch) +{ + struct gk20a *g = ch->g; + int err; + + nvgpu_log_fn(g, "unbind tsg:%u ch:%u\n", tsg->tsgid, ch->chid); + + err = nvgpu_tsg_unbind_channel_common(tsg, ch); + if (err != 0) { + nvgpu_err(g, "Channel %d unbind failed, tearing down TSG %d", + ch->chid, tsg->tsgid); + + nvgpu_tsg_abort(g, tsg, true); + /* If channel unbind fails, channel is still part of runlist */ + if (nvgpu_channel_update_runlist(ch, false) != 0) { + nvgpu_err(g, + "remove ch %u from runlist failed", ch->chid); + } + + nvgpu_rwsem_down_write(&tsg->ch_list_lock); + nvgpu_list_del(&ch->ch_entry); + ch->tsgid = NVGPU_INVALID_TSG_ID; + nvgpu_rwsem_up_write(&tsg->ch_list_lock); + } + + if (g->ops.tsg.unbind_channel != NULL) { + err = g->ops.tsg.unbind_channel(tsg, ch); + } + + nvgpu_ref_put(&tsg->refcount, nvgpu_tsg_release); + + return 0; +} + int nvgpu_tsg_unbind_channel_check_hw_state(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch) { @@ -405,6 +406,7 @@ bool nvgpu_tsg_mark_error(struct gk20a *g, } +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT void nvgpu_tsg_set_ctxsw_timeout_accumulated_ms(struct nvgpu_tsg *tsg, u32 ms) { struct nvgpu_channel *ch = NULL; @@ -437,6 +439,7 @@ bool nvgpu_tsg_ctxsw_timeout_debug_dump_state(struct nvgpu_tsg *tsg) return verbose; } +#endif void nvgpu_tsg_set_error_notifier(struct gk20a *g, struct nvgpu_tsg *tsg, u32 error_notifier) @@ -608,14 +611,15 @@ void nvgpu_tsg_disable_sched(struct gk20a *g, struct nvgpu_tsg *tsg) RUNLIST_DISABLED); } -static void release_used_tsg(struct nvgpu_fifo *f, struct nvgpu_tsg *tsg) +static void nvgpu_tsg_release_used_tsg(struct nvgpu_fifo *f, + struct nvgpu_tsg *tsg) { nvgpu_mutex_acquire(&f->tsg_inuse_mutex); f->tsg[tsg->tsgid].in_use = false; nvgpu_mutex_release(&f->tsg_inuse_mutex); } -static struct nvgpu_tsg *gk20a_tsg_acquire_unused_tsg(struct nvgpu_fifo *f) +static struct nvgpu_tsg *nvgpu_tsg_acquire_unused_tsg(struct nvgpu_fifo *f) { struct nvgpu_tsg *tsg = NULL; unsigned int tsgid; @@ -696,14 +700,14 @@ struct nvgpu_tsg *nvgpu_tsg_open(struct gk20a *g, pid_t pid) struct nvgpu_tsg *tsg; int err; - tsg = gk20a_tsg_acquire_unused_tsg(&g->fifo); + tsg = nvgpu_tsg_acquire_unused_tsg(&g->fifo); if (tsg == NULL) { return NULL; } err = nvgpu_tsg_open_common(g, tsg, pid); if (err != 0) { - release_used_tsg(&g->fifo, tsg); + nvgpu_tsg_release_used_tsg(&g->fifo, tsg); nvgpu_err(g, "tsg %d open failed %d", tsg->tsgid, err); return NULL; } @@ -765,7 +769,7 @@ void nvgpu_tsg_release(struct nvgpu_ref *ref) nvgpu_mutex_release(&tsg->event_id_list_lock); nvgpu_tsg_release_common(g, tsg); - release_used_tsg(&g->fifo, tsg); + nvgpu_tsg_release_used_tsg(&g->fifo, tsg); nvgpu_log(g, gpu_dbg_fn, "tsg released %d", tsg->tsgid); } diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index d8e56a4bd..627a26fd5 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -679,11 +679,9 @@ struct gpu_ops { u64 zcull_va, u32 mode); #endif -#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL int (*set_preemption_mode)(struct nvgpu_channel *ch, u32 graphics_preempt_mode, u32 compute_preempt_mode); -#endif } setup; #ifdef CONFIG_NVGPU_GRAPHICS struct { diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/setup.h b/drivers/gpu/nvgpu/include/nvgpu/gr/setup.h index 2ba25d767..775df9a0a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/setup.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/setup.h @@ -40,10 +40,8 @@ void nvgpu_gr_setup_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx); void nvgpu_gr_setup_free_subctx(struct nvgpu_channel *c); -#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL int nvgpu_gr_setup_set_preemption_mode(struct nvgpu_channel *ch, u32 graphics_preempt_mode, u32 compute_preempt_mode); -#endif #endif /* NVGPU_GR_SETUP_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/tsg.h b/drivers/gpu/nvgpu/include/nvgpu/tsg.h index 878c65310..6cba6f20a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h +++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h @@ -21,6 +21,11 @@ */ #ifndef NVGPU_TSG_H #define NVGPU_TSG_H +/** + * @file + * + * Abstract interface for TSG related functionality. + */ #include #include @@ -28,6 +33,9 @@ #include #include +/** + * Software defined invalid TSG id value. + */ #define NVGPU_INVALID_TSG_ID (U32_MAX) #define NVGPU_TSG_TIMESLICE_LOW_PRIORITY_US 1300U @@ -35,6 +43,9 @@ #define NVGPU_TSG_TIMESLICE_HIGH_PRIORITY_US 5200U #define NVGPU_TSG_TIMESLICE_MIN_US 1000U #define NVGPU_TSG_TIMESLICE_MAX_US 50000U +/** + * Default TSG timeslice value in microseconds. Currently it is 1024 us. + */ #define NVGPU_TSG_TIMESLICE_DEFAULT_US (128U << 3U) struct gk20a; @@ -42,8 +53,11 @@ struct nvgpu_channel; struct nvgpu_gr_ctx; struct nvgpu_channel_hw_state; +#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL enum nvgpu_event_id_type; +#endif +/** Format for reporting SM errors read from h/w registers */ struct nvgpu_tsg_sm_error_state { u32 hww_global_esr; u32 hww_warp_esr; @@ -52,40 +66,125 @@ struct nvgpu_tsg_sm_error_state { u32 hww_warp_esr_report_mask; }; +/** + * Fields corresponding to TSG's s/w context. + */ struct nvgpu_tsg { + /** Pointer to GPU driver struct. */ struct gk20a *g; + /** Points to TSG's virtual memory */ struct vm_gk20a *vm; + /** + * Starting with Volta, when a Channel/TSG is set up, a recovery buffer + * region must be allocated in BAR2, to allow engine to save methods if + * it faults. Virtual memory address for this buffer is set by s/w in + * the channel instance block. + * S/w allocates memory for the #nvgpu_mem type struct for + * #nvgpu_fifo.num_pbdma. This is then used to alloc and map memory from + * BAR2 VM. Size of the actual method buffer is chip specific and + * calculated by s/w during TSG init. + */ struct nvgpu_mem *eng_method_buffers; + /** Allocated during TSG open and freed during TSG release */ struct nvgpu_gr_ctx *gr_ctx; + /** + * This ref is initialized during tsg setup s/w. + * This is ref_get whenever a channel is bound to the TSG. + * This is ref_put whenever a channel is unbound from the TSG. + */ struct nvgpu_ref refcount; + /** List of channels bound to a tsgid */ struct nvgpu_list_node ch_list; + /** + * Ioctls using this field are not supported in the safety build. + * Refer NVGPU_FEATURE_CHANNEL_TSG_CONTROL config. + */ struct nvgpu_list_node event_id_list; + /** + * Read write type of semaphore lock used for accessing/modifying + * #ch_list. + */ struct nvgpu_rwsem ch_list_lock; + /** + * Mutex used to access/modify #event_id_list. + * Ioctls using this field are not supported in the safety build. + * Refer NVGPU_FEATURE_CHANNEL_TSG_CONTROL config. + */ struct nvgpu_mutex event_id_list_lock; + /** + * Total number of channels that are bound to a TSG. This can change + * during run time whenever channels are bound to a TSG or unbound + * from a TSG. + */ u32 num_active_channels; + /** + * This is for timeout amount for the TSG's timslice. + * All channels in a TSG share the same runlist timeslice + * which specifies how long a single context runs on an engine + * or PBDMA before being swapped for a different context. + * The timeslice period is set in the TSG header of runlist entry + * defined by h/w. + * The timeslice period should normally not be set to zero. A timeslice + * of zero will be treated as a timeslice period of 1 ns (Bug 1652173). + * The runlist timeslice period begins after the context has been + * loaded on a PBDMA but is paused while the channel has an outstanding + * context load to an engine. Time spent switching a context into an + * engine is not part of the runlist timeslice. + */ unsigned int timeslice_us; - unsigned int timeslice_timeout; - unsigned int timeslice_scale; + /** + * See include/nvgpu/runlist.h and + * refer #NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW. + */ u32 interleave_level; + /** This ranges from 0 to #nvgpu_fifo.num_channels. */ u32 tsgid; + /** + * There is maximum number of runlists defined by the h/w. Usually it + * is one runlist per engine (graphics and grcopy share a runlist). + * The runlist_id specifies the h/w runlist to which a runlist in + * memory is being submitted. Each runlist serves a specific set of + * engines. Refer top.h. + */ u32 runlist_id; + /** tgid (OS specific) of the process that openend the TSG. */ pid_t tgid; + /** + * Number of active TPCs as requested by userspace. + * This is used while requesting for dynamic TPC PG (power gating). + * TPC PG is specific to chip. + */ u32 num_active_tpcs; + /** Set to non-zero if dynamic TPC PG is requested to be enabled. */ u8 tpc_pg_enabled; + /** + * Set to true if dynamic TPC PG is enabled and #num_active_tpcs is + * non-zero. + */ bool tpc_num_initialized; + /** + * Set to true if tsgid is acquired else set to false. + * This is protected by #nvgpu_fifo.tsg_inuse_mutex. Acquire/Release + * to check if tsgid is already acquired or not. + */ bool in_use; + /** Non abortable TSG is for vidmem clear */ bool abortable; - /* MMU debug mode enabled if mmu_debug_mode_refcnt > 0 */ + /** MMU debug mode enabled if mmu_debug_mode_refcnt > 0 */ u32 mmu_debug_mode_refcnt; + /** + * Pointer to store SM errors read from h/w registers. + * Check #nvgpu_tsg_sm_error_state. + */ struct nvgpu_tsg_sm_error_state *sm_error_states; #ifdef CONFIG_NVGPU_DEBUGGER @@ -97,43 +196,290 @@ struct nvgpu_tsg { }; int nvgpu_tsg_open_common(struct gk20a *g, struct nvgpu_tsg *tsg, pid_t pid); +/** + * @brief Open and initialize unused TSG + * + * @param g[in] The GPU driver struct. + * @param pid[in] The PID of the process. + * + * - Acquire unused TSG. + * - Set s/w context of the acquired TSG. + * + * @return Pointer to TSG struct. See #nvgpu_tsg . + * @retval NULL if there is no un-used TSG. + * @retval NULL if setting s/w context for opened TSG failed. If setting s/w + * context failed, release acquired TSG back to the pool of unused + * TSGs. + */ struct nvgpu_tsg *nvgpu_tsg_open(struct gk20a *g, pid_t pid); + +/** + * @brief Clean up resources used by tsg. This is needed for releasing TSG. + * + * @param g[in] The GPU driver struct. + * @param tsg[in] Pointer to TSG struct. + * + * - Call non-NULL HAL to release tsg. This HAL is non-NULL for vgpu only. + * - Call nvgpu_free_gr_ctx_struct to free #nvgpu_tsg.gr_ctx. + * - Set #nvgpu_tsg.gr_ctx to NULL. + * - If #nvgpu_tsg.vm is non-NULL, do #nvgpu_vm_put for this vm and set + * it to NULL (Unhook TSG from VM). + * - If #nvgpu_tsg.sm_error_states is non-NULL, free allocated memory and set + * it to NULL. + */ void nvgpu_tsg_release_common(struct gk20a *g, struct nvgpu_tsg *tsg); + +/** + * @brief Release TSG to the pool of free TSGs. + * + * @param ref[in] Pointer to #nvgpu_tsg.refcount. + * + * - Get pointer to the #nvgpu_tsg using #ref. + * - Call HAL to free #nvgpu_tsg.gr_ctx if this memory pointer is non-NULL + * and valid and also #nvgpu_tsg.vm is non-NULL. + * - Unhook all events created on the TSG being released. + * -- Acquire #nvgpu_tsg.event_id_list_lock. + * -- While #nvgpu_tsg.event_id_list is non-empty, + * --- Delete #nvgpu_tsg.event_id_list.next. + * -- Release #nvgpu_tsg.event_id_list_lock. + * - Call #nvgpu_tsg_release_common. + * - Set #nvgpu_tsg.in_use to false so that tsg can be made available + * to the pool of unused tsgs. + */ void nvgpu_tsg_release(struct nvgpu_ref *ref); +/** + * @brief Initialize s/w context for TSGs. + * + * @param g[in] The GPU driver struct. + * + * Initialize s/w context for TSGs: + * - Allocate zero initialized kernel memory area for #nvgpu_fifo.num_channels + * number of #nvgpu_fifo.tsg struct. This area of memory is indexed by + * tsgid starting from 0 to #nvgpu_fifo.num_channels. + * - Upon successful allocation of memory, initialize memory area assigned to + * each TSG with s/w defaults. + * + * @return 0 for successful init. + * @retval -ENOMEM if kernel memory could not be allocated to support TSG + * s/w context. + */ int nvgpu_tsg_setup_sw(struct gk20a *g); + +/** + * @brief De-initialize s/w context for TSGs. + * + * @param g[in] The GPU driver struct. + * + * De-initialize s/w context for TSGs: + * - Destroy s/w context for all tsgid starting from 0 to + * #nvgpu_fifo.num_channels. + * - De-allocate kernel memory area allocated to support s/w context of + * #nvgpu_fifo.num_channels number of TSGs. + */ void nvgpu_tsg_cleanup_sw(struct gk20a *g); +/** + * @brief Get pointer to #nvgpu_tsg for the tsgid of the given Channel. + * + * @param ch[in] Pointer to Channel struct. + * + * Validate tsgid of the given channel. If tsgid is not equal to + * #NVGPU_INVALID_TSG_ID, get pointer to area of memory, reserved for s/w + * context of TSG and indexed by tsgid. + * + * @return Pointer to #nvgpu_tsg struct. + * @retval NULL if tsgid of the given channel is #NVGPU_INVALID_TSG_ID. + * @note This does not check if tsgid is < num_channels. + */ struct nvgpu_tsg *nvgpu_tsg_from_ch(struct nvgpu_channel *ch); +/** + * @brief Disable all the channels bound to a TSG. + * + * @param tsg[in] Pointer to TSG struct. + * + * Disable all the channels bound to a TSG so that h/w scheduler does not + * schedule these channels. + */ void nvgpu_tsg_disable(struct nvgpu_tsg *tsg); + +/** + * @brief Bind a channel to the TSG. + * + * @param tsg[in] Pointer to TSG struct. + * @param ch[in] Pointer to Channel struct. + * + * - Make sure channel is not already bound to a TSG. + * - Make sure channel is not part of any runlists. + * - Set runlist id of TSG to channel's runlist_id if runlist_id of TSG + * is set to #NVGPU_INVALID_TSG_ID. + * - Call HAL to bind channel to TSG. + * - Add channel to TSG's list of channels. See #nvgpu_tsg.ch_list + * - Set #nvgpu_channel.tsgid to #nvgpu_tsg.tsgid. + * - Set #nvgpu_channel.unserviceable to false to mark that channel is + * serviceable. + * - Bind Engine Method Buffers (This may not be required for all the chips). + * - Get #nvgpu_tsg.refcount to prevent TSG from being freed till channel/s are + * bound to this TSG. + * + * @return 0 for successful bind + * @retval -EINVAL if channel is already bound to a TSG. + * @retval -EINVAL if channel is already active. This is done by checking if + * bit corresponding to chid is set in the + * #nvgpu_runlist_info.active_channels of any of the supported + * #nvgpu_fifo.num_runlists. + * @retval -EINVAL if runlist_id of the channel and tsg do not match. + */ int nvgpu_tsg_bind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch); + +/** + * @brief Get pointer to #nvgpu_tsg for the tsgid. + * + * @param g[in] The GPU driver struct. + * @param tsgid[in] Id of the TSG. + * + * Get pointer to area of memory, reserved for s/w context of TSG + * and indexed by tsgid. + * + * @return Pointer to #nvgpu_tsg struct. + */ struct nvgpu_tsg *nvgpu_tsg_get_from_id(struct gk20a *g, u32 tsgid); + +/** + * @brief Validate tsgid and get pointer to #nvgpu_tsg for this tsgid. + * + * @param g[in] The GPU driver struct. + * @param tsgid[in] Id of the TSG. + * + * If tsgid is not equal to #NVGPU_INVALID_TSG_ID, get pointer to area of + * memory, reserved for s/w context of TSG and indexed by tsgid. + * + * @return Pointer to #nvgpu_tsg struct. + * @retval NULL if tsgid is #NVGPU_INVALID_TSG_ID. + */ struct nvgpu_tsg *nvgpu_tsg_check_and_get_from_id(struct gk20a *g, u32 tsgid); + +/** + * @brief Unbind a channel from the TSG it is bound to. + * + * @param tsg[in] Pointer to TSG struct. + * @param ch[in] Pointer to Channel struct. + * + * Unbind channel from TSG: + * - Check if channel being unbound has become unserviceable. + * - Disable TSG. + * - Preempt TSG. + * - Check hw state of the channel. + * - Remove channel from its runlist. + * - Remove channel from TSG's channel list. + * - Set tsgid of the channel to #NVGPU_INVALID_TSG_ID. + * - Disable channel so that it is not picked up by h/w scheduler. + * - Enable TSG if it is still serviceable. TSG becomes unserviceable + * if channel being unbound has become unserviceable. + * - Do clean up for aborting channel. + * If an error occurred during previous steps: + * - Call #nvgpu_tsg_abort to abort the tsg. + * - Call #nvgpu_channel_update_runlist to remove the channel from the runlist. + * - Acquire #nvgpu_tsg.ch_list_lock of the tsg and delete channel from + * #nvgpu_tsg.ch_list. + * - Set #nvgpu_channel.tsgid to #NVGPU_INVALID_TSG_ID + * - Release #nvgpu_tsg.ch_list_lock of the tsg. + * Call non NULL HAL to unbind channel from the tsg. This HAL is vgpu specific + * and does not apply for non-vgpu. + * Release #nvgpu_tsg.refcount and call #nvgpu_tsg_release if refcount + * becomes 0. + * + * @return 0 + * @note Caller of this function must make sure that channel requested to be + * unbound from the TSG is bound to the TSG. + */ int nvgpu_tsg_unbind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch); -int nvgpu_tsg_unbind_channel_common(struct nvgpu_tsg *tsg, - struct nvgpu_channel *ch); + +/** + * @brief Check h/w channel status before unbinding Channel. + * + * @param tsg[in] Pointer to TSG struct. + * @param ch[in] Pointer to Channel struct. + * + * - Call HAL to read chip specific h/w channel status register into hw_state + * local variable. + * - If next bit is not set in hw_state, + * -- Call HAL (if supported for the chip) to check ctx_reload bit in hw_state. + * --- If set, move ctx_reload h/w + * state to some other channel's h/w status register. New channel id should + * be different than the channel requested to be unbound. + * -- Call HAL (if supported for the chip) to check eng_faulted bit in hw_state. + * --- If set, clear the CE method buffer in #ASYNC_CE_RUNQUE index of + * #nvgpu_tsg.eng_method_buffers of the tsg that the channel being unbound + * is bound to. + * + * @return 0 in case of success and < 0 in case of failure. + * @retval -EINVAL if next bit is set in hw_state. + */ int nvgpu_tsg_unbind_channel_check_hw_state(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch); + +/** + * @brief Find another channel in the TSG and force ctx reload if + * h/w channel status of the channel is set to ctx_reload. + * + * @param tsg[in] Pointer to TSG struct. + * @param ch[in] Pointer to Channel struct. + * @param hw_state[in] Pointer to nvgpu_channel_hw_state struct. + * + * Find another channel in the TSG and force ctx reload. + * + * @note If there is only one channel in this TSG then function will not find + * another channel to force ctx reload. + */ void nvgpu_tsg_unbind_channel_check_ctx_reload(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch, struct nvgpu_channel_hw_state *hw_state); + #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL int nvgpu_tsg_force_reset_ch(struct nvgpu_channel *ch, u32 err_code, bool verbose); -#endif void nvgpu_tsg_post_event_id(struct nvgpu_tsg *tsg, enum nvgpu_event_id_type event_id); +#endif +/** + * @brief Set mmu fault error notifier for all the channels bound to a TSG. + * + * @param g[in] The GPU driver struct. + * @param tsg[in] Pointer to TSG struct. + * + * Set mmu fault error notifier for all the channels bound to the TSG. + */ void nvgpu_tsg_set_ctx_mmu_error(struct gk20a *g, struct nvgpu_tsg *tsg); + +/** + * @brief Mark error for all the referenceable channels of tsg's channel list. + * + * @param g[in] The GPU driver struct. + * @param tsg[in] Pointer to TSG struct. + * + * - Set verbose local variable to false. + * - Acquire #nvgpu_tsg.ch_list_lock of the tsg. + * - For each entry of the channels in #nvgpu_tsg.ch_list of the tsg, + * -- Get reference to the channel. + * -- If channel is referenceable, + * --- Call #nvgpu_channel_mark_error and set verbose local variable to true + * if return value of this function is true. + * --- Put reference to the channel. + * - Release #nvgpu_tsg.ch_list_lock of the tsg. + * + * @return verbose bool variable. This is used to decide if driver needs to dump + * debug info. This can be either true or false. + */ bool nvgpu_tsg_mark_error(struct gk20a *g, struct nvgpu_tsg *tsg); #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT bool nvgpu_tsg_check_ctxsw_timeout(struct nvgpu_tsg *tsg, bool *debug_dump, u32 *ms); #endif -int nvgpu_tsg_set_runlist_interleave(struct nvgpu_tsg *tsg, u32 level); #ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING int nvgpu_tsg_set_timeslice(struct nvgpu_tsg *tsg, u32 timeslice_us); u32 nvgpu_tsg_get_timeslice(struct nvgpu_tsg *tsg); @@ -141,9 +487,56 @@ int nvgpu_tsg_set_priority(struct gk20a *g, struct nvgpu_tsg *tsg, u32 priority); int nvgpu_tsg_set_interleave(struct nvgpu_tsg *tsg, u32 level); #endif +/** + * @brief Get default TSG timeslice in us as defined by nvgpu driver. + * + * @param g[in] The GPU driver struct. + * + * Get TSG timeslice value in microseconds. This is the default timeslice + * value in us as defined by s/w. + * + * @return S/w defined default TSG timeslice value in us. + */ u32 nvgpu_tsg_default_timeslice_us(struct gk20a *g); + +/** + * @brief Enable h/w runlist scheduler corresponding to the runlist_id + * of the TSG. + * + * @param g[in] The GPU driver struct. + * @param tsg[in] Pointer to the TSG struct. + * + * Enable h/w runlist scheduler for #nvgpu_tsg.runlist_id. + */ void nvgpu_tsg_enable_sched(struct gk20a *g, struct nvgpu_tsg *tsg); + +/** + * @brief Disable h/w runlist scheduler corresponding to the runlist_id + * of the TSG. + * + * @param g[in] The GPU driver struct. + * @param tsg[in] Pointer to the TSG struct. + * + * Disable h/w runlist scheduler for #nvgpu_tsg.runlist_id. + */ void nvgpu_tsg_disable_sched(struct gk20a *g, struct nvgpu_tsg *tsg); + +/** + * @brief Allocate zero initialized memory to store SM errors. + * + * @param g[in] The GPU driver struct. + * @param tsg[in] Pointer to the TSG struct. + * @param num_sm[in] Total number of SMs supported by h/w. + * + * Allocate zero initialized memory to store SM errors for all the SMs + * supported by h/w. + * + * @return 0 in case of success, < 0 in case of failure. + * @retval -EINVAL if memory is already allocated to store + * SM error states. + * @retval -ENOMEM if memory could not be allocated to store + * SM error states. + */ int nvgpu_tsg_alloc_sm_error_states_mem(struct gk20a *g, struct nvgpu_tsg *tsg, u32 num_sm); @@ -152,6 +545,7 @@ int nvgpu_tsg_set_sm_exception_type_mask(struct nvgpu_channel *ch, u32 exception_mask); #endif +#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL struct gk20a_event_id_data { struct gk20a *g; @@ -172,12 +566,59 @@ gk20a_event_id_data_from_event_id_node(struct nvgpu_list_node *node) return (struct gk20a_event_id_data *) ((uintptr_t)node - offsetof(struct gk20a_event_id_data, event_id_node)); }; +#endif +/** + * @brief Set error notifier for all the channels bound to a TSG. + * + * @param g[in] The GPU driver struct. + * @param tsg[in] Pointer to TSG struct. + * @param error_notifier[in] Error notifier defined by s/w. + * + * Set error notifier for all the channels bound to the tsg. + * See include/nvgpu/error_notifier.h. + */ void nvgpu_tsg_set_error_notifier(struct gk20a *g, struct nvgpu_tsg *tsg, u32 error_notifier); + +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT bool nvgpu_tsg_ctxsw_timeout_debug_dump_state(struct nvgpu_tsg *tsg); void nvgpu_tsg_set_ctxsw_timeout_accumulated_ms(struct nvgpu_tsg *tsg, u32 ms); +#endif + +/** + * @brief Abort all the channels bound to the TSG. + * + * @param g[in] The GPU driver struct. + * @param tsg[in] Pointer to TSG struct. + * @param preempt[in] Flag to ask for preempting TSG. + * + * - Disable all the channels bound to the #tsg so that h/w does not schedule + * them. + * - Preempt #tsg if #preempt flag is set. This is to offload all the channels + * bound to the #tsg from the pbdma/engines. + * - Set #nvgpu_channel.unserviceable of all the channels bound to the #tsg + * to let s/w know of bad state of channels. + * - Do s/w formalities so that channels bound to the #tsg are ready to be + * closed by userspace. + */ void nvgpu_tsg_abort(struct gk20a *g, struct nvgpu_tsg *tsg, bool preempt); + +/** + * @brief Clear h/w bits PBDMA_FAULTED and ENG_FAULTED in CCSR channel h/w + * register for all the channels bound to the TSG. + * + * @param g[in] The GPU driver struct. + * @param tsg[in] Pointer to TSG struct. + * @param eng[in] Flag to ask for clearing ENG_FAULTED h/w bit. + * @param pbdma[in] Flag to ask for clearing PBDMA_FAULTED h/w bit. + * + * If chip supports the h/w bits PBDMA_FAULTED and ENG_FAULTED and tsg + * is non-NULL, clear PBDMA_FAULTED bit in CCSR channel h/w register if #pbdma + * is set, clear ENG_FAULTED bit in CCSR channel h/w register if #eng is set. + * For chips that do not support these h/w bits, just return. Also return if + * #tsg input param is NULL. + */ void nvgpu_tsg_reset_faulted_eng_pbdma(struct gk20a *g, struct nvgpu_tsg *tsg, bool eng, bool pbdma); #ifdef CONFIG_NVGPU_DEBUGGER diff --git a/drivers/gpu/nvgpu/os/posix/posix-tsg.c b/drivers/gpu/nvgpu/os/posix/posix-tsg.c index aaac2f84f..154b62bf9 100644 --- a/drivers/gpu/nvgpu/os/posix/posix-tsg.c +++ b/drivers/gpu/nvgpu/os/posix/posix-tsg.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -23,7 +23,9 @@ #include #include +#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL void nvgpu_tsg_post_event_id(struct nvgpu_tsg *tsg, enum nvgpu_event_id_type event_id) { } +#endif