mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: sema based gpfifo submission tracking
Implement a hw semaphore which is used to track the gpfifo submission. This is implementation used when the userd.gp_get() is not defined and also the feature flag NVGPU_SUPPORT_SEMA_BASED_GPFIFO_GET is set. At the end of each job submitted, submit a semaphore to write the gpfifo get pointer at hw semaphore addr. At next job submission processing we will read the gpfifo.get from the designated hw semaphore location. JIRA NVGPU-9588 Change-Id: Ic88ace1a3f60e3f38f159e1861464ebcaea04469 Signed-off-by: Ramalingam C <ramalingamc@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2898143 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Sagar Kamble <skamble@nvidia.com> Reviewed-by: Martin Radev <mradev@nvidia.com> Reviewed-by: Ankur Kishore <ankkishore@nvidia.com> Tested-by: Martin Radev <mradev@nvidia.com> GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
32f6f9c5d0
commit
ad320f60b9
@@ -251,6 +251,10 @@ static void channel_kernelmode_deinit(struct nvgpu_channel *ch)
|
||||
nvgpu_channel_sync_destroy(ch->sync);
|
||||
ch->sync = NULL;
|
||||
}
|
||||
if (ch->gpfifo_sync != NULL) {
|
||||
nvgpu_channel_sync_destroy(ch->gpfifo_sync);
|
||||
ch->gpfifo_sync = NULL;
|
||||
}
|
||||
nvgpu_mutex_release(&ch->sync_lock);
|
||||
}
|
||||
|
||||
@@ -370,6 +374,18 @@ static int channel_setup_kernelmode(struct nvgpu_channel *c,
|
||||
nvgpu_mutex_release(&c->sync_lock);
|
||||
goto clean_up_unmap;
|
||||
}
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEMA_BASED_GPFIFO_GET)) {
|
||||
c->gpfifo_sync = nvgpu_channel_sync_semaphore_create(c);
|
||||
if (c->gpfifo_sync == NULL) {
|
||||
err = -ENOMEM;
|
||||
goto clean_up_sync;
|
||||
}
|
||||
nvgpu_mutex_acquire(&c->gpfifo_hw_sema_lock);
|
||||
nvgpu_channel_sync_hw_semaphore_init(c->gpfifo_sync);
|
||||
nvgpu_mutex_release(&c->gpfifo_hw_sema_lock);
|
||||
}
|
||||
|
||||
nvgpu_mutex_release(&c->sync_lock);
|
||||
|
||||
if (g->ops.channel.set_syncpt != NULL) {
|
||||
@@ -431,6 +447,10 @@ clean_up_priv_cmd:
|
||||
clean_up_prealloc:
|
||||
nvgpu_channel_joblist_deinit(c);
|
||||
clean_up_sync:
|
||||
if (c->gpfifo_sync != NULL) {
|
||||
nvgpu_channel_sync_destroy(c->gpfifo_sync);
|
||||
c->gpfifo_sync = NULL;
|
||||
}
|
||||
if (c->sync != NULL) {
|
||||
nvgpu_channel_sync_destroy(c->sync);
|
||||
c->sync = NULL;
|
||||
@@ -448,9 +468,9 @@ clean_up:
|
||||
}
|
||||
|
||||
/* Update with this periodically to determine how the gpfifo is draining. */
|
||||
static inline u32 channel_update_gpfifo_get(struct gk20a *g,
|
||||
struct nvgpu_channel *c)
|
||||
static inline u32 channel_update_gpfifo_get(struct nvgpu_channel *c)
|
||||
{
|
||||
struct gk20a *g = c->g;
|
||||
u32 new_get = 0U;
|
||||
|
||||
if (g->ops.userd.gp_get != NULL) {
|
||||
@@ -469,7 +489,7 @@ u32 nvgpu_channel_get_gpfifo_free_count(struct nvgpu_channel *ch)
|
||||
|
||||
u32 nvgpu_channel_update_gpfifo_get_and_get_free_count(struct nvgpu_channel *ch)
|
||||
{
|
||||
(void)channel_update_gpfifo_get(ch->g, ch);
|
||||
(void)channel_update_gpfifo_get(ch);
|
||||
return nvgpu_channel_get_gpfifo_free_count(ch);
|
||||
}
|
||||
|
||||
@@ -514,6 +534,9 @@ static void nvgpu_channel_finalize_job(struct nvgpu_channel *c,
|
||||
* semaphore or even a syncfd.
|
||||
*/
|
||||
nvgpu_fence_put(&job->post_fence);
|
||||
if (job->gpfifo_sema != NULL) {
|
||||
nvgpu_semaphore_put(job->gpfifo_sema);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free the private command buffers (in order of allocation)
|
||||
@@ -522,6 +545,9 @@ static void nvgpu_channel_finalize_job(struct nvgpu_channel *c,
|
||||
nvgpu_priv_cmdbuf_free(c->priv_cmd_q, job->wait_cmd);
|
||||
}
|
||||
nvgpu_priv_cmdbuf_free(c->priv_cmd_q, job->incr_cmd);
|
||||
if (job->gpfifo_incr_cmd != NULL) {
|
||||
nvgpu_priv_cmdbuf_free(c->priv_cmd_q, job->gpfifo_incr_cmd);
|
||||
}
|
||||
|
||||
nvgpu_channel_free_job(c, job);
|
||||
|
||||
@@ -590,9 +616,22 @@ void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c)
|
||||
|
||||
WARN_ON(c->sync == NULL);
|
||||
|
||||
if (c->gpfifo_sync != NULL) {
|
||||
if (g->aggressive_sync_destroy_thresh != 0U) {
|
||||
nvgpu_mutex_acquire(&c->sync_lock);
|
||||
if (nvgpu_channel_sync_put_ref_and_check(c->gpfifo_sync)
|
||||
&& g->aggressive_sync_destroy) {
|
||||
nvgpu_channel_sync_destroy(c->gpfifo_sync);
|
||||
c->gpfifo_sync = NULL;
|
||||
}
|
||||
nvgpu_mutex_release(&c->sync_lock);
|
||||
}
|
||||
}
|
||||
|
||||
if (c->sync != NULL) {
|
||||
if (c->has_os_fence_framework_support &&
|
||||
g->os_channel.os_fence_framework_inst_exists(c)) {
|
||||
g->os_channel.os_fence_framework_inst_exists(c) &&
|
||||
!nvgpu_has_syncpoints(g)) {
|
||||
g->os_channel.signal_os_fence_framework(c,
|
||||
&job->post_fence);
|
||||
}
|
||||
@@ -689,7 +728,7 @@ bool nvgpu_channel_update_and_check_ctxsw_timeout(struct nvgpu_channel *ch,
|
||||
goto done;
|
||||
}
|
||||
|
||||
gpfifo_get = channel_update_gpfifo_get(ch->g, ch);
|
||||
gpfifo_get = channel_update_gpfifo_get(ch);
|
||||
|
||||
if (gpfifo_get == ch->ctxsw_timeout_gpfifo_get) {
|
||||
/* didn't advance since previous ctxsw timeout check */
|
||||
@@ -1042,6 +1081,9 @@ unbind:
|
||||
|
||||
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
|
||||
WARN_ON(ch->sync != NULL);
|
||||
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEMA_BASED_GPFIFO_GET)) {
|
||||
WARN_ON(ch->gpfifo_sync != NULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
channel_free_unlink_debug_session(ch);
|
||||
@@ -1751,6 +1793,7 @@ static void nvgpu_channel_destroy(struct nvgpu_channel *c)
|
||||
nvgpu_mutex_destroy(&c->ioctl_lock);
|
||||
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
|
||||
nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
|
||||
nvgpu_mutex_destroy(&c->gpfifo_hw_sema_lock);
|
||||
#endif
|
||||
nvgpu_mutex_destroy(&c->sync_lock);
|
||||
#if defined(CONFIG_NVGPU_CYCLESTATS)
|
||||
@@ -1815,6 +1858,7 @@ int nvgpu_channel_init_support(struct gk20a *g, u32 chid)
|
||||
nvgpu_init_list_node(&c->worker_item);
|
||||
|
||||
nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock);
|
||||
nvgpu_mutex_init(&c->gpfifo_hw_sema_lock);
|
||||
|
||||
#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */
|
||||
nvgpu_mutex_init(&c->ioctl_lock);
|
||||
|
||||
@@ -67,6 +67,7 @@ int nvgpu_priv_cmdbuf_queue_alloc(struct vm_gk20a *vm,
|
||||
int err = 0;
|
||||
u32 wait_size, incr_size;
|
||||
u32 mem_per_job;
|
||||
u32 gpfifo_incr_size = 0;
|
||||
|
||||
/*
|
||||
* sema size is at least as much as syncpt size, but semas may not be
|
||||
@@ -77,6 +78,9 @@ int nvgpu_priv_cmdbuf_queue_alloc(struct vm_gk20a *vm,
|
||||
#ifdef CONFIG_NVGPU_SW_SEMAPHORE
|
||||
wait_size = g->ops.sync.sema.get_wait_cmd_size();
|
||||
incr_size = g->ops.sync.sema.get_incr_cmd_size();
|
||||
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEMA_BASED_GPFIFO_GET)) {
|
||||
gpfifo_incr_size = g->ops.sync.sema.get_incr_cmd_size();
|
||||
}
|
||||
#else
|
||||
wait_size = g->ops.sync.syncpt.get_wait_cmd_size();
|
||||
incr_size = g->ops.sync.syncpt.get_incr_cmd_size(true);
|
||||
@@ -84,22 +88,24 @@ int nvgpu_priv_cmdbuf_queue_alloc(struct vm_gk20a *vm,
|
||||
|
||||
/*
|
||||
* Compute the amount of priv_cmdbuf space we need. In general the
|
||||
* worst case is the kernel inserts both a semaphore pre-fence and
|
||||
* post-fence. Any sync-pt fences will take less memory so we can
|
||||
* ignore them unless they're the only supported type. Jobs can also
|
||||
* have more than one pre-fence but that's abnormal and we'll -EAGAIN
|
||||
* if such jobs would fill the queue.
|
||||
* worst case is the kernel inserts both a semaphore pre-fence,
|
||||
* post-fence and semaphore for gp.get tracking. Any sync-pt fences
|
||||
* will take less memory so we can ignore them unless they're the only
|
||||
* supported type. Jobs can also have more than one pre-fence but
|
||||
* that's abnormal and we'll -EAGAIN if such jobs would fill the queue.
|
||||
*
|
||||
* A semaphore ACQ (fence-wait) is 8 words: semaphore_a, semaphore_b,
|
||||
* semaphore_c, and semaphore_d. A semaphore INCR (fence-get) will be
|
||||
* 10 words: all the same as an ACQ plus a non-stalling intr which is
|
||||
* another 2 words. In reality these numbers vary by chip but we'll use
|
||||
* 8 and 10 as examples.
|
||||
* another 2 words. Semaphore for updating the gp.get also needs same
|
||||
* as A semaphore INCR (fence-get) i.e 10 words. In reality these
|
||||
* numbers vary by chip but we'll use 8, 10 and 10 as examples.
|
||||
*
|
||||
* Given the job count, cmdbuf space is allocated such that each job
|
||||
* can get one wait command and one increment command:
|
||||
* can get one wait command, one increment command and a semaphore for
|
||||
* gp.get tracking
|
||||
*
|
||||
* job_count * (8 + 10) * 4 bytes
|
||||
* job_count * (8 + 10 + 10) * 4 bytes
|
||||
*
|
||||
* These cmdbufs are inserted as gpfifo entries right before and after
|
||||
* the user submitted gpfifo entries per submit.
|
||||
@@ -109,13 +115,15 @@ int nvgpu_priv_cmdbuf_queue_alloc(struct vm_gk20a *vm,
|
||||
* is full when the number of consumed entries is one less than the
|
||||
* allocation size:
|
||||
*
|
||||
* alloc bytes = job_count * (wait + incr + 1) * slot in bytes
|
||||
* alloc bytes = job_count * (wait + incr + gpfifo_incr + 1) * slot
|
||||
* in bytes
|
||||
*/
|
||||
mem_per_job = nvgpu_safe_mult_u32(
|
||||
mem_per_job = nvgpu_safe_add_u32(
|
||||
nvgpu_safe_add_u32(
|
||||
nvgpu_safe_add_u32(wait_size, incr_size),
|
||||
1U),
|
||||
(u32)sizeof(u32));
|
||||
gpfifo_incr_size), 1U);
|
||||
mem_per_job = nvgpu_safe_mult_u32(mem_per_job, (u32)sizeof(u32));
|
||||
|
||||
/* both 32 bit and mem_per_job is small */
|
||||
size = nvgpu_safe_mult_u64((u64)job_count, (u64)mem_per_job);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -39,6 +39,7 @@
|
||||
#include <nvgpu/trace.h>
|
||||
#include <nvgpu/nvhost.h>
|
||||
#include <nvgpu/user_fence.h>
|
||||
#include <nvgpu/channel_sync_semaphore.h>
|
||||
|
||||
#include <nvgpu/fifo/swprofile.h>
|
||||
|
||||
@@ -106,7 +107,7 @@ static int nvgpu_submit_create_incr_cmd(struct nvgpu_channel *c,
|
||||
static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c,
|
||||
struct nvgpu_channel_fence *fence,
|
||||
struct nvgpu_channel_job *job,
|
||||
u32 flags)
|
||||
u32 flags, u32 gpfifo_entries)
|
||||
{
|
||||
struct gk20a *g = c->g;
|
||||
bool need_sync_fence;
|
||||
@@ -116,6 +117,8 @@ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c,
|
||||
bool flag_fence_get = (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) != 0U;
|
||||
bool flag_sync_fence = (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) != 0U;
|
||||
bool flag_fence_wait = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) != 0U;
|
||||
bool sema_tracking = nvgpu_is_enabled(g,
|
||||
NVGPU_SUPPORT_SEMA_BASED_GPFIFO_GET);
|
||||
|
||||
if (g->aggressive_sync_destroy_thresh != 0U) {
|
||||
nvgpu_mutex_acquire(&c->sync_lock);
|
||||
@@ -128,6 +131,20 @@ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c,
|
||||
new_sync_created = true;
|
||||
}
|
||||
nvgpu_channel_sync_get_ref(c->sync);
|
||||
|
||||
if (c->gpfifo_sync == NULL && sema_tracking) {
|
||||
c->gpfifo_sync = nvgpu_channel_sync_semaphore_create(c);
|
||||
if (c->gpfifo_sync == NULL) {
|
||||
err = -ENOMEM;
|
||||
goto clean_up_put_sync;
|
||||
}
|
||||
nvgpu_mutex_acquire(&c->gpfifo_hw_sema_lock);
|
||||
nvgpu_channel_sync_hw_semaphore_init(c->gpfifo_sync);
|
||||
nvgpu_mutex_release(&c->gpfifo_hw_sema_lock);
|
||||
}
|
||||
if (c->gpfifo_sync != NULL) {
|
||||
nvgpu_channel_sync_get_ref(c->gpfifo_sync);
|
||||
}
|
||||
}
|
||||
|
||||
if ((g->ops.channel.set_syncpt != NULL) && new_sync_created) {
|
||||
@@ -151,6 +168,7 @@ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c,
|
||||
|
||||
need_sync_fence = flag_fence_get && flag_sync_fence;
|
||||
|
||||
|
||||
/*
|
||||
* Always generate an increment at the end of a GPFIFO submission. When
|
||||
* we do job tracking, post fences are needed for various reasons even
|
||||
@@ -162,19 +180,41 @@ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c,
|
||||
goto clean_up_wait_cmd;
|
||||
}
|
||||
|
||||
if (sema_tracking) {
|
||||
err = nvgpu_submit_create_gpfifo_tracking_semaphore(
|
||||
c->gpfifo_sync, &job->gpfifo_sema,
|
||||
&job->gpfifo_incr_cmd,
|
||||
nvgpu_safe_add_u32(gpfifo_entries,
|
||||
(flag_fence_wait ? 3U : 2U)));
|
||||
if (err != 0) {
|
||||
goto clean_up_incr_cmd;
|
||||
}
|
||||
}
|
||||
|
||||
if (g->aggressive_sync_destroy_thresh != 0U) {
|
||||
nvgpu_mutex_release(&c->sync_lock);
|
||||
}
|
||||
return 0;
|
||||
|
||||
clean_up_incr_cmd:
|
||||
if (job->incr_cmd != NULL) {
|
||||
nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->incr_cmd);
|
||||
job->incr_cmd = NULL;
|
||||
}
|
||||
clean_up_wait_cmd:
|
||||
if (job->wait_cmd != NULL) {
|
||||
nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->wait_cmd);
|
||||
}
|
||||
job->wait_cmd = NULL;
|
||||
}
|
||||
clean_up_put_sync:
|
||||
if (g->aggressive_sync_destroy_thresh != 0U) {
|
||||
if (nvgpu_channel_sync_put_ref_and_check(c->sync)
|
||||
if (c->gpfifo_sync != NULL &&
|
||||
nvgpu_channel_sync_put_ref_and_check(c->gpfifo_sync)
|
||||
&& g->aggressive_sync_destroy) {
|
||||
nvgpu_channel_sync_destroy(c->gpfifo_sync);
|
||||
}
|
||||
if (c->sync != NULL &&
|
||||
nvgpu_channel_sync_put_ref_and_check(c->sync)
|
||||
&& g->aggressive_sync_destroy) {
|
||||
nvgpu_channel_sync_destroy(c->sync);
|
||||
}
|
||||
@@ -349,7 +389,7 @@ static int nvgpu_submit_prepare_gpfifo_track(struct nvgpu_channel *c,
|
||||
return err;
|
||||
}
|
||||
|
||||
err = nvgpu_submit_prepare_syncs(c, fence, job, flags);
|
||||
err = nvgpu_submit_prepare_syncs(c, fence, job, flags, num_entries);
|
||||
if (err != 0) {
|
||||
goto clean_up_job;
|
||||
}
|
||||
@@ -369,9 +409,10 @@ static int nvgpu_submit_prepare_gpfifo_track(struct nvgpu_channel *c,
|
||||
if (err != 0) {
|
||||
goto clean_up_gpfifo_wait;
|
||||
}
|
||||
|
||||
nvgpu_submit_append_priv_cmdbuf(c, job->incr_cmd);
|
||||
|
||||
if (c->gpfifo_sync != NULL) {
|
||||
nvgpu_submit_append_priv_cmdbuf(c, job->gpfifo_incr_cmd);
|
||||
}
|
||||
err = nvgpu_channel_add_job(c, job, skip_buffer_refcounting);
|
||||
if (err != 0) {
|
||||
goto clean_up_gpfifo_incr;
|
||||
@@ -403,6 +444,17 @@ clean_up_gpfifo_incr:
|
||||
nvgpu_safe_sub_u32(c->gpfifo.entry_num,
|
||||
nvgpu_safe_add_u32(1U, num_entries)))) &
|
||||
nvgpu_safe_sub_u32(c->gpfifo.entry_num, 1U);
|
||||
|
||||
/*
|
||||
* undo the gpfifo incr priv cmdbuf which is similar to undo of
|
||||
* wait_cmd priv cmdbuf.
|
||||
*/
|
||||
if (job->gpfifo_incr_cmd != NULL) {
|
||||
c->gpfifo.put =
|
||||
nvgpu_safe_add_u32(c->gpfifo.put,
|
||||
nvgpu_safe_sub_u32(c->gpfifo.entry_num, 1U)) &
|
||||
nvgpu_safe_sub_u32(c->gpfifo.entry_num, 1U);
|
||||
}
|
||||
clean_up_gpfifo_wait:
|
||||
if (job->wait_cmd != NULL) {
|
||||
/*
|
||||
@@ -419,6 +471,9 @@ clean_up_gpfifo_wait:
|
||||
}
|
||||
nvgpu_fence_put(&job->post_fence);
|
||||
nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->incr_cmd);
|
||||
if (job->gpfifo_incr_cmd != NULL) {
|
||||
nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->gpfifo_incr_cmd);
|
||||
}
|
||||
if (job->wait_cmd != NULL) {
|
||||
nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->wait_cmd);
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Nvgpu Semaphores
|
||||
*
|
||||
* Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2014-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -168,8 +168,29 @@ void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
|
||||
hw_sema->chid, next);
|
||||
}
|
||||
|
||||
void nvgpu_semaphore_prepare_for_gpfifo_get(struct nvgpu_channel *c,
|
||||
struct nvgpu_semaphore *s, struct nvgpu_hw_semaphore *hw_sema,
|
||||
u32 new_entries)
|
||||
{
|
||||
u32 next_get;
|
||||
|
||||
nvgpu_mutex_acquire(&c->gpfifo_hw_sema_lock);
|
||||
next_get = nvgpu_safe_add_u32((u32)nvgpu_hw_semaphore_read_next(hw_sema),
|
||||
new_entries) & nvgpu_safe_sub_u32(c->gpfifo.entry_num,
|
||||
1U);
|
||||
nvgpu_atomic_set(&hw_sema->next_value, (s32)next_get);
|
||||
nvgpu_mutex_release(&c->gpfifo_hw_sema_lock);
|
||||
|
||||
WARN_ON(s->ready_to_wait);
|
||||
|
||||
nvgpu_atomic_set(&s->value, (s32)next_get);
|
||||
s->ready_to_wait = true;
|
||||
|
||||
gpu_sema_verbose_dbg(s->g, "PREP sema for c=%d (%u)",
|
||||
hw_sema->chid, next_get);
|
||||
}
|
||||
|
||||
u64 nvgpu_semaphore_get_hw_pool_page_idx(struct nvgpu_semaphore *s)
|
||||
{
|
||||
return nvgpu_semaphore_pool_get_page_idx(s->location.pool);
|
||||
}
|
||||
|
||||
|
||||
@@ -159,6 +159,11 @@ void nvgpu_hw_semaphore_set(struct nvgpu_hw_semaphore *hw_sema, u32 val)
|
||||
nvgpu_mem_wr(g, &pool->rw_mem, hw_sema->location.offset, val);
|
||||
}
|
||||
|
||||
void nvgpu_hw_semaphore_init_next(struct nvgpu_hw_semaphore *hw_sema)
|
||||
{
|
||||
nvgpu_atomic_set(&hw_sema->next_value, 0);
|
||||
}
|
||||
|
||||
int nvgpu_hw_semaphore_read_next(struct nvgpu_hw_semaphore *hw_sema)
|
||||
{
|
||||
return nvgpu_atomic_read(&hw_sema->next_value);
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* GK20A Channel Synchronization Abstraction
|
||||
*
|
||||
* Copyright (c) 2014-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2014-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -41,12 +41,6 @@
|
||||
|
||||
#include "channel_sync_priv.h"
|
||||
|
||||
struct nvgpu_channel_sync_semaphore {
|
||||
struct nvgpu_channel_sync base;
|
||||
struct nvgpu_channel *c;
|
||||
struct nvgpu_hw_semaphore *hw_sema;
|
||||
};
|
||||
|
||||
static struct nvgpu_channel_sync_semaphore *
|
||||
nvgpu_channel_sync_semaphore_from_base(struct nvgpu_channel_sync *base)
|
||||
{
|
||||
@@ -113,6 +107,34 @@ static void add_sema_incr_cmd(struct gk20a *g, struct nvgpu_channel *c,
|
||||
va, cmd);
|
||||
}
|
||||
|
||||
static void add_sema_incr_cmd_to_write_next_get(struct nvgpu_channel *c,
|
||||
struct nvgpu_semaphore *s, struct priv_cmd_entry *cmd,
|
||||
struct nvgpu_hw_semaphore *hw_sema,
|
||||
u32 entries)
|
||||
{
|
||||
struct gk20a *g = c->g;
|
||||
u32 ch = c->chid;
|
||||
u64 va;
|
||||
|
||||
/* release will need to write back to the semaphore memory. */
|
||||
va = nvgpu_semaphore_gpu_rw_va(s);
|
||||
|
||||
/* find the right sema next_value to write (like syncpt's max). */
|
||||
nvgpu_semaphore_prepare_for_gpfifo_get(c, s, hw_sema, entries);
|
||||
|
||||
/*
|
||||
* gp.get should be updated only when all the cmds are completed.
|
||||
* Hence forcing the wfi to be true always.
|
||||
*/
|
||||
g->ops.sync.sema.add_incr_cmd(g, cmd, s, va, true);
|
||||
gpu_sema_verbose_dbg(g, "(R) c=%u INCR %u (%u) pool=%-3llu"
|
||||
"va=0x%llx entry=%p",
|
||||
ch, nvgpu_semaphore_get_value(s),
|
||||
nvgpu_semaphore_read(s),
|
||||
nvgpu_semaphore_get_hw_pool_page_idx(s),
|
||||
va, cmd);
|
||||
}
|
||||
|
||||
static int channel_sync_semaphore_wait_fd(
|
||||
struct nvgpu_channel_sync *s, int fd,
|
||||
struct priv_cmd_entry **entry, u32 max_wait_cmds)
|
||||
@@ -228,6 +250,42 @@ clean_up_sema:
|
||||
return err;
|
||||
}
|
||||
|
||||
s32 nvgpu_submit_create_gpfifo_tracking_semaphore(
|
||||
struct nvgpu_channel_sync *s,
|
||||
struct nvgpu_semaphore **semaphore,
|
||||
struct priv_cmd_entry **incr_cmd,
|
||||
u32 gpfifo_entries)
|
||||
{
|
||||
u32 incr_cmd_size;
|
||||
struct nvgpu_channel_sync_semaphore *sp =
|
||||
nvgpu_channel_sync_semaphore_from_base(s);
|
||||
struct nvgpu_channel *c = sp->c;
|
||||
s32 err = 0;
|
||||
|
||||
*semaphore = nvgpu_semaphore_alloc(sp->hw_sema);
|
||||
if (*semaphore == NULL) {
|
||||
nvgpu_err(c->g,
|
||||
"ran out of semaphores");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
incr_cmd_size = c->g->ops.sync.sema.get_incr_cmd_size();
|
||||
err = nvgpu_priv_cmdbuf_alloc(c->priv_cmd_q, incr_cmd_size, incr_cmd);
|
||||
if (err != 0) {
|
||||
goto clean_up_sema;
|
||||
}
|
||||
|
||||
/* Release the completion semaphore. */
|
||||
add_sema_incr_cmd_to_write_next_get(c, *semaphore, *incr_cmd,
|
||||
sp->hw_sema, gpfifo_entries);
|
||||
|
||||
return 0;
|
||||
|
||||
clean_up_sema:
|
||||
nvgpu_semaphore_put(*semaphore);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int channel_sync_semaphore_incr(
|
||||
struct nvgpu_channel_sync *s,
|
||||
struct priv_cmd_entry **entry,
|
||||
@@ -396,3 +454,22 @@ err_free_sema:
|
||||
nvgpu_kfree(g, sema);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void nvgpu_channel_sync_hw_semaphore_init(struct nvgpu_channel_sync *sync)
|
||||
{
|
||||
struct nvgpu_channel_sync_semaphore *sp =
|
||||
nvgpu_channel_sync_semaphore_from_base(sync);
|
||||
|
||||
nvgpu_hw_semaphore_set(sp->hw_sema, 0);
|
||||
nvgpu_hw_semaphore_init_next(sp->hw_sema);
|
||||
}
|
||||
|
||||
void nvgpu_channel_update_gpfifo_get(struct nvgpu_channel *c)
|
||||
{
|
||||
struct nvgpu_channel_sync_semaphore *sp;
|
||||
|
||||
if (c->gpfifo_sync != NULL) {
|
||||
sp = nvgpu_channel_sync_semaphore_from_base(c->gpfifo_sync);
|
||||
c->gpfifo.get = nvgpu_hw_semaphore_read(sp->hw_sema);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
#include <nvgpu/channel.h>
|
||||
|
||||
#include <nvgpu/hw/gv11b/hw_ram_gv11b.h>
|
||||
#include <nvgpu/channel_sync_semaphore.h>
|
||||
|
||||
#include "userd_gv11b.h"
|
||||
|
||||
@@ -35,8 +36,20 @@ u32 gv11b_userd_gp_get(struct gk20a *g, struct nvgpu_channel *ch)
|
||||
{
|
||||
struct nvgpu_mem *mem = ch->userd_mem;
|
||||
u32 offset = ch->userd_offset / U32(sizeof(u32));
|
||||
u32 ret;
|
||||
|
||||
return nvgpu_mem_rd32(g, mem, offset + ram_userd_gp_get_w());
|
||||
/*
|
||||
* NVGPU_SUPPORT_SEMA_BASED_GPFIFO_GET is enabled when userd get
|
||||
* is not getting updated by gpu anymore.
|
||||
*/
|
||||
if (nvgpu_is_enabled(g, (u32)NVGPU_SUPPORT_SEMA_BASED_GPFIFO_GET)) {
|
||||
nvgpu_channel_update_gpfifo_get(ch);
|
||||
ret = ch->gpfifo.get;
|
||||
} else {
|
||||
ret = nvgpu_mem_rd32(g, mem, offset + ram_userd_gp_get_w());
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
u64 gv11b_userd_pb_get(struct gk20a *g, struct nvgpu_channel *ch)
|
||||
|
||||
@@ -387,6 +387,9 @@ struct nvgpu_channel {
|
||||
struct gpfifo_desc gpfifo;
|
||||
struct priv_cmd_queue *priv_cmd_q;
|
||||
struct nvgpu_channel_sync *sync;
|
||||
struct nvgpu_channel_sync *gpfifo_sync;
|
||||
/* lock for gpfifo hw_sema access */
|
||||
struct nvgpu_mutex gpfifo_hw_sema_lock;
|
||||
/* for job cleanup handling in the background worker */
|
||||
struct nvgpu_list_node worker_item;
|
||||
#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
*
|
||||
* Nvgpu Channel Synchronization Abstraction (Semaphore)
|
||||
*
|
||||
* Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -28,11 +28,18 @@
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
#include <nvgpu/channel_sync.h>
|
||||
#include "../../common/sync/channel_sync_priv.h"
|
||||
|
||||
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
|
||||
|
||||
struct nvgpu_channel;
|
||||
struct nvgpu_channel_sync_semaphore;
|
||||
struct nvgpu_channel_sync;
|
||||
|
||||
struct nvgpu_channel_sync_semaphore {
|
||||
struct nvgpu_channel_sync base;
|
||||
struct nvgpu_channel *c;
|
||||
struct nvgpu_hw_semaphore *hw_sema;
|
||||
};
|
||||
|
||||
/*
|
||||
* Converts a valid struct nvgpu_channel_sync ptr to
|
||||
@@ -54,6 +61,14 @@ nvgpu_channel_sync_semaphore_hw_sema(
|
||||
*/
|
||||
struct nvgpu_channel_sync *
|
||||
nvgpu_channel_sync_semaphore_create(struct nvgpu_channel *c);
|
||||
void nvgpu_channel_sync_hw_semaphore_init(struct nvgpu_channel_sync *sync);
|
||||
void nvgpu_channel_update_gpfifo_get(struct nvgpu_channel *c);
|
||||
s32 nvgpu_submit_create_gpfifo_tracking_semaphore(
|
||||
struct nvgpu_channel_sync *s,
|
||||
struct nvgpu_semaphore **semaphore,
|
||||
struct priv_cmd_entry **incr_cmd,
|
||||
u32 gpfifo_entries);
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -37,8 +37,10 @@ struct nvgpu_channel_job {
|
||||
struct nvgpu_mapped_buf **mapped_buffers;
|
||||
u32 num_mapped_buffers;
|
||||
struct nvgpu_fence_type post_fence;
|
||||
struct nvgpu_semaphore *gpfifo_sema;
|
||||
struct priv_cmd_entry *wait_cmd;
|
||||
struct priv_cmd_entry *incr_cmd;
|
||||
struct priv_cmd_entry *gpfifo_incr_cmd;
|
||||
struct nvgpu_list_node list;
|
||||
};
|
||||
|
||||
|
||||
@@ -35,6 +35,7 @@ struct nvgpu_hw_semaphore;
|
||||
struct nvgpu_semaphore;
|
||||
struct vm_gk20a;
|
||||
struct nvgpu_allocator;
|
||||
struct nvgpu_channel;
|
||||
|
||||
#define gpu_sema_dbg(g, fmt, args...) \
|
||||
nvgpu_log(g, gpu_dbg_sema, fmt, ##args)
|
||||
@@ -77,6 +78,7 @@ u64 nvgpu_hw_semaphore_addr(struct nvgpu_hw_semaphore *hw_sema);
|
||||
u32 nvgpu_hw_semaphore_read(struct nvgpu_hw_semaphore *hw_sema);
|
||||
bool nvgpu_hw_semaphore_reset(struct nvgpu_hw_semaphore *hw_sema);
|
||||
void nvgpu_hw_semaphore_set(struct nvgpu_hw_semaphore *hw_sema, u32 val);
|
||||
void nvgpu_hw_semaphore_init_next(struct nvgpu_hw_semaphore *hw_sema);
|
||||
int nvgpu_hw_semaphore_read_next(struct nvgpu_hw_semaphore *hw_sema);
|
||||
int nvgpu_hw_semaphore_update_next(struct nvgpu_hw_semaphore *hw_sema);
|
||||
|
||||
@@ -99,6 +101,9 @@ bool nvgpu_semaphore_can_wait(struct nvgpu_semaphore *s);
|
||||
|
||||
void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
|
||||
struct nvgpu_hw_semaphore *hw_sema);
|
||||
void nvgpu_semaphore_prepare_for_gpfifo_get(struct nvgpu_channel *c,
|
||||
struct nvgpu_semaphore *s,
|
||||
struct nvgpu_hw_semaphore *hw_sema, u32 new_entries);
|
||||
u64 nvgpu_semaphore_get_hw_pool_page_idx(struct nvgpu_semaphore *s);
|
||||
|
||||
#endif /* NVGPU_SEMAPHORE_H */
|
||||
|
||||
Reference in New Issue
Block a user