mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
Add usermode_gpfifo member to channel to store the usermode-owned buffer data. This makes it more explicit about which (user or kernel) memory is accessed. Jira NVGPU-967 Change-Id: I98e4f4568f04aa31e00e497794fadd4997ba8aab Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1807503 Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Sourab Gupta <sourabg@nvidia.com> Reviewed-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
410 lines
12 KiB
C
410 lines
12 KiB
C
/*
|
|
* GK20A graphics channel
|
|
*
|
|
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
#ifndef CHANNEL_GK20A_H
|
|
#define CHANNEL_GK20A_H
|
|
|
|
#include <nvgpu/list.h>
|
|
#include <nvgpu/lock.h>
|
|
#include <nvgpu/timers.h>
|
|
#include <nvgpu/cond.h>
|
|
#include <nvgpu/atomic.h>
|
|
|
|
#include "gr_gk20a.h"
|
|
|
|
struct gk20a;
|
|
struct dbg_session_gk20a;
|
|
struct gk20a_fence;
|
|
struct fifo_profile_gk20a;
|
|
struct gk20a_channel_sync;
|
|
|
|
/* Flags to be passed to gk20a_channel_alloc_gpfifo() */
|
|
#define NVGPU_GPFIFO_FLAGS_SUPPORT_VPR (1U << 0U)
|
|
#define NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC (1U << 1U)
|
|
#define NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE (1U << 2U)
|
|
#define NVGPU_GPFIFO_FLAGS_USERMODE_SUPPORT (1U << 3U)
|
|
|
|
/* Flags to be passed to nvgpu_submit_channel_gpfifo() */
|
|
#define NVGPU_SUBMIT_FLAGS_FENCE_WAIT (1U << 0U)
|
|
#define NVGPU_SUBMIT_FLAGS_FENCE_GET (1U << 1U)
|
|
#define NVGPU_SUBMIT_FLAGS_HW_FORMAT (1U << 2U)
|
|
#define NVGPU_SUBMIT_FLAGS_SYNC_FENCE (1U << 3U)
|
|
#define NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI (1U << 4U)
|
|
#define NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING (1U << 5U)
|
|
|
|
/*
|
|
* The binary format of 'struct nvgpu_channel_fence' introduced here
|
|
* should match that of 'struct nvgpu_fence' defined in uapi header, since
|
|
* this struct is intended to be a mirror copy of the uapi struct. This is
|
|
* not a hard requirement though because of nvgpu_get_fence_args conversion
|
|
* function.
|
|
*/
|
|
struct nvgpu_channel_fence {
|
|
u32 id;
|
|
u32 value;
|
|
};
|
|
|
|
/*
|
|
* The binary format of 'struct nvgpu_gpfifo_entry' introduced here
|
|
* should match that of 'struct nvgpu_gpfifo' defined in uapi header, since
|
|
* this struct is intended to be a mirror copy of the uapi struct. This is
|
|
* a rigid requirement because there's no conversion function and there are
|
|
* memcpy's present between the user gpfifo (of type nvgpu_gpfifo) and the
|
|
* kern gpfifo (of type nvgpu_gpfifo_entry).
|
|
*/
|
|
struct nvgpu_gpfifo_entry {
|
|
u32 entry0;
|
|
u32 entry1;
|
|
};
|
|
|
|
struct nvgpu_gpfifo_args {
|
|
u32 num_entries;
|
|
u32 num_inflight_jobs;
|
|
u32 userd_dmabuf_fd;
|
|
u32 gpfifo_dmabuf_fd;
|
|
u32 work_submit_token;
|
|
u32 flags;
|
|
};
|
|
|
|
struct notification {
|
|
struct {
|
|
u32 nanoseconds[2];
|
|
} timestamp;
|
|
u32 info32;
|
|
u16 info16;
|
|
u16 status;
|
|
};
|
|
|
|
struct channel_gk20a_job {
|
|
struct nvgpu_mapped_buf **mapped_buffers;
|
|
int num_mapped_buffers;
|
|
struct gk20a_fence *post_fence;
|
|
struct priv_cmd_entry *wait_cmd;
|
|
struct priv_cmd_entry *incr_cmd;
|
|
struct nvgpu_list_node list;
|
|
};
|
|
|
|
static inline struct channel_gk20a_job *
|
|
channel_gk20a_job_from_list(struct nvgpu_list_node *node)
|
|
{
|
|
return (struct channel_gk20a_job *)
|
|
((uintptr_t)node - offsetof(struct channel_gk20a_job, list));
|
|
};
|
|
|
|
struct channel_gk20a_joblist {
|
|
struct {
|
|
bool enabled;
|
|
unsigned int length;
|
|
unsigned int put;
|
|
unsigned int get;
|
|
struct channel_gk20a_job *jobs;
|
|
struct nvgpu_mutex read_lock;
|
|
} pre_alloc;
|
|
|
|
struct {
|
|
struct nvgpu_list_node jobs;
|
|
struct nvgpu_spinlock lock;
|
|
} dynamic;
|
|
|
|
/*
|
|
* Synchronize abort cleanup (when closing a channel) and job cleanup
|
|
* (asynchronously from worker) - protect from concurrent access when
|
|
* job resources are being freed.
|
|
*/
|
|
struct nvgpu_mutex cleanup_lock;
|
|
};
|
|
|
|
struct channel_gk20a_timeout {
|
|
/* lock protects the running timer state */
|
|
struct nvgpu_raw_spinlock lock;
|
|
struct nvgpu_timeout timer;
|
|
bool running;
|
|
u32 gp_get;
|
|
u64 pb_get;
|
|
|
|
/* lock not needed */
|
|
u32 limit_ms;
|
|
bool enabled;
|
|
bool debug_dump;
|
|
};
|
|
|
|
/*
|
|
* Track refcount actions, saving their stack traces. This number specifies how
|
|
* many most recent actions are stored in a buffer. Set to 0 to disable. 128
|
|
* should be enough to track moderately hard problems from the start.
|
|
*/
|
|
#define GK20A_CHANNEL_REFCOUNT_TRACKING 0
|
|
/* Stack depth for the saved actions. */
|
|
#define GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN 8
|
|
|
|
/*
|
|
* Because the puts and gets are not linked together explicitly (although they
|
|
* should always come in pairs), it's not possible to tell which ref holder to
|
|
* delete from the list when doing a put. So, just store some number of most
|
|
* recent gets and puts in a ring buffer, to obtain a history.
|
|
*
|
|
* These are zeroed when a channel is closed, so a new one starts fresh.
|
|
*/
|
|
|
|
enum channel_gk20a_ref_action_type {
|
|
channel_gk20a_ref_action_get,
|
|
channel_gk20a_ref_action_put
|
|
};
|
|
|
|
#if GK20A_CHANNEL_REFCOUNT_TRACKING
|
|
|
|
#include <linux/stacktrace.h>
|
|
|
|
struct channel_gk20a_ref_action {
|
|
enum channel_gk20a_ref_action_type type;
|
|
s64 timestamp_ms;
|
|
/*
|
|
* Many of these traces will be similar. Simpler to just capture
|
|
* duplicates than to have a separate database for the entries.
|
|
*/
|
|
struct stack_trace trace;
|
|
unsigned long trace_entries[GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN];
|
|
};
|
|
#endif
|
|
|
|
/* this is the priv element of struct nvhost_channel */
|
|
struct channel_gk20a {
|
|
struct gk20a *g; /* set only when channel is active */
|
|
|
|
struct nvgpu_list_node free_chs;
|
|
|
|
struct nvgpu_spinlock ref_obtain_lock;
|
|
nvgpu_atomic_t ref_count;
|
|
struct nvgpu_cond ref_count_dec_wq;
|
|
#if GK20A_CHANNEL_REFCOUNT_TRACKING
|
|
/*
|
|
* Ring buffer for most recent refcount gets and puts. Protected by
|
|
* ref_actions_lock when getting or putting refs (i.e., adding
|
|
* entries), and when reading entries.
|
|
*/
|
|
struct channel_gk20a_ref_action ref_actions[
|
|
GK20A_CHANNEL_REFCOUNT_TRACKING];
|
|
size_t ref_actions_put; /* index of next write */
|
|
struct nvgpu_spinlock ref_actions_lock;
|
|
#endif
|
|
|
|
struct nvgpu_semaphore_int *hw_sema;
|
|
|
|
nvgpu_atomic_t bound;
|
|
|
|
int chid;
|
|
int tsgid;
|
|
pid_t pid;
|
|
pid_t tgid;
|
|
struct nvgpu_mutex ioctl_lock;
|
|
|
|
struct nvgpu_list_node ch_entry; /* channel's entry in TSG */
|
|
|
|
struct channel_gk20a_joblist joblist;
|
|
struct nvgpu_allocator fence_allocator;
|
|
|
|
struct vm_gk20a *vm;
|
|
|
|
struct gpfifo_desc gpfifo;
|
|
|
|
struct nvgpu_mem usermode_userd; /* Used for Usermode Submission */
|
|
struct nvgpu_mem usermode_gpfifo;
|
|
struct nvgpu_mem inst_block;
|
|
|
|
u64 userd_iova;
|
|
u64 userd_gpu_va;
|
|
|
|
struct priv_cmd_queue priv_cmd_q;
|
|
|
|
struct nvgpu_cond notifier_wq;
|
|
struct nvgpu_cond semaphore_wq;
|
|
|
|
/* kernel watchdog to kill stuck jobs */
|
|
struct channel_gk20a_timeout timeout;
|
|
|
|
/* for job cleanup handling in the background worker */
|
|
struct nvgpu_list_node worker_item;
|
|
|
|
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
|
struct {
|
|
void *cyclestate_buffer;
|
|
u32 cyclestate_buffer_size;
|
|
struct nvgpu_mutex cyclestate_buffer_mutex;
|
|
} cyclestate;
|
|
|
|
struct nvgpu_mutex cs_client_mutex;
|
|
struct gk20a_cs_snapshot_client *cs_client;
|
|
#endif
|
|
struct nvgpu_mutex dbg_s_lock;
|
|
struct nvgpu_list_node dbg_s_list;
|
|
|
|
struct nvgpu_mutex sync_lock;
|
|
struct gk20a_channel_sync *sync;
|
|
struct gk20a_channel_sync *user_sync;
|
|
|
|
#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
|
|
u64 virt_ctx;
|
|
#endif
|
|
|
|
struct ctx_header_desc ctx_header;
|
|
|
|
/* Any operating system specific data. */
|
|
void *os_priv;
|
|
|
|
u32 obj_class; /* we support only one obj per channel */
|
|
|
|
u32 timeout_accumulated_ms;
|
|
u32 timeout_gpfifo_get;
|
|
|
|
u32 subctx_id;
|
|
u32 runqueue_sel;
|
|
|
|
u32 timeout_ms_max;
|
|
u32 runlist_id;
|
|
|
|
bool mmu_nack_handled;
|
|
bool has_timedout;
|
|
bool referenceable;
|
|
bool vpr;
|
|
bool deterministic;
|
|
/* deterministic, but explicitly idle and submits disallowed */
|
|
bool deterministic_railgate_allowed;
|
|
bool cde;
|
|
bool usermode_submit_enabled;
|
|
bool timeout_debug_dump;
|
|
bool has_os_fence_framework_support;
|
|
|
|
bool is_privileged_channel;
|
|
};
|
|
|
|
static inline struct channel_gk20a *
|
|
channel_gk20a_from_free_chs(struct nvgpu_list_node *node)
|
|
{
|
|
return (struct channel_gk20a *)
|
|
((uintptr_t)node - offsetof(struct channel_gk20a, free_chs));
|
|
};
|
|
|
|
static inline struct channel_gk20a *
|
|
channel_gk20a_from_ch_entry(struct nvgpu_list_node *node)
|
|
{
|
|
return (struct channel_gk20a *)
|
|
((uintptr_t)node - offsetof(struct channel_gk20a, ch_entry));
|
|
};
|
|
|
|
static inline struct channel_gk20a *
|
|
channel_gk20a_from_worker_item(struct nvgpu_list_node *node)
|
|
{
|
|
return (struct channel_gk20a *)
|
|
((uintptr_t)node - offsetof(struct channel_gk20a, worker_item));
|
|
};
|
|
|
|
static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch)
|
|
{
|
|
return !!ch->vm;
|
|
}
|
|
int channel_gk20a_commit_va(struct channel_gk20a *c);
|
|
int gk20a_init_channel_support(struct gk20a *, u32 chid);
|
|
|
|
/* must be inside gk20a_busy()..gk20a_idle() */
|
|
void gk20a_channel_close(struct channel_gk20a *ch);
|
|
void __gk20a_channel_kill(struct channel_gk20a *ch);
|
|
|
|
bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
|
|
u32 timeout_delta_ms, bool *progress);
|
|
void gk20a_disable_channel(struct channel_gk20a *ch);
|
|
void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt);
|
|
void gk20a_channel_abort_clean_up(struct channel_gk20a *ch);
|
|
void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events);
|
|
int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size,
|
|
struct priv_cmd_entry *entry);
|
|
int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e);
|
|
|
|
int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch);
|
|
int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch);
|
|
|
|
int gk20a_channel_suspend(struct gk20a *g);
|
|
int gk20a_channel_resume(struct gk20a *g);
|
|
|
|
void gk20a_channel_deterministic_idle(struct gk20a *g);
|
|
void gk20a_channel_deterministic_unidle(struct gk20a *g);
|
|
|
|
int nvgpu_channel_worker_init(struct gk20a *g);
|
|
void nvgpu_channel_worker_deinit(struct gk20a *g);
|
|
|
|
struct channel_gk20a *gk20a_get_channel_from_file(int fd);
|
|
void gk20a_channel_update(struct channel_gk20a *c);
|
|
|
|
/* returns ch if reference was obtained */
|
|
struct channel_gk20a *__must_check _gk20a_channel_get(struct channel_gk20a *ch,
|
|
const char *caller);
|
|
#define gk20a_channel_get(ch) _gk20a_channel_get(ch, __func__)
|
|
|
|
|
|
void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller);
|
|
#define gk20a_channel_put(ch) _gk20a_channel_put(ch, __func__)
|
|
|
|
int gk20a_wait_channel_idle(struct channel_gk20a *ch);
|
|
|
|
/* runlist_id -1 is synonym for ENGINE_GR_GK20A runlist id */
|
|
struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
|
|
s32 runlist_id,
|
|
bool is_privileged_channel,
|
|
pid_t pid, pid_t tid);
|
|
|
|
int gk20a_channel_alloc_gpfifo(struct channel_gk20a *c,
|
|
struct nvgpu_gpfifo_args *gpfifo_args);
|
|
|
|
void gk20a_channel_timeout_restart_all_channels(struct gk20a *g);
|
|
|
|
bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c);
|
|
void channel_gk20a_joblist_lock(struct channel_gk20a *c);
|
|
void channel_gk20a_joblist_unlock(struct channel_gk20a *c);
|
|
bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c);
|
|
|
|
int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add);
|
|
int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
|
|
unsigned int timeslice_period,
|
|
unsigned int *__timeslice_timeout, unsigned int *__timeslice_scale);
|
|
|
|
void gk20a_wait_until_counter_is_N(
|
|
struct channel_gk20a *ch, nvgpu_atomic_t *counter, int wait_value,
|
|
struct nvgpu_cond *c, const char *caller, const char *counter_name);
|
|
int channel_gk20a_alloc_job(struct channel_gk20a *c,
|
|
struct channel_gk20a_job **job_out);
|
|
void channel_gk20a_free_job(struct channel_gk20a *c,
|
|
struct channel_gk20a_job *job);
|
|
u32 nvgpu_get_gp_free_count(struct channel_gk20a *c);
|
|
u32 nvgpu_gp_free_count(struct channel_gk20a *c);
|
|
int gk20a_channel_add_job(struct channel_gk20a *c,
|
|
struct channel_gk20a_job *job,
|
|
bool skip_buffer_refcounting);
|
|
void free_priv_cmdbuf(struct channel_gk20a *c,
|
|
struct priv_cmd_entry *e);
|
|
void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
|
|
bool clean_all);
|
|
|
|
void gk20a_channel_free_usermode_buffers(struct channel_gk20a *c);
|
|
u32 nvgpu_get_gpfifo_entry_size(void);
|
|
|
|
#endif /* CHANNEL_GK20A_H */
|