From ad320f60b93dc70c732d9222cf572ead3d830584 Mon Sep 17 00:00:00 2001 From: Ramalingam C Date: Wed, 3 May 2023 19:09:57 +0000 Subject: [PATCH] gpu: nvgpu: sema based gpfifo submission tracking Implement a hw semaphore which is used to track the gpfifo submission. This is implementation used when the userd.gp_get() is not defined and also the feature flag NVGPU_SUPPORT_SEMA_BASED_GPFIFO_GET is set. At the end of each job submitted, submit a semaphore to write the gpfifo get pointer at hw semaphore addr. At next job submission processing we will read the gpfifo.get from the designated hw semaphore location. JIRA NVGPU-9588 Change-Id: Ic88ace1a3f60e3f38f159e1861464ebcaea04469 Signed-off-by: Ramalingam C Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2898143 Reviewed-by: svc-mobile-coverity Reviewed-by: Sagar Kamble Reviewed-by: Martin Radev Reviewed-by: Ankur Kishore Tested-by: Martin Radev GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/common/fifo/channel.c | 54 ++++++++++- drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c | 34 ++++--- drivers/gpu/nvgpu/common/fifo/submit.c | 69 ++++++++++++-- .../gpu/nvgpu/common/semaphore/semaphore.c | 25 ++++- .../gpu/nvgpu/common/semaphore/semaphore_hw.c | 5 + .../common/sync/channel_sync_semaphore.c | 91 +++++++++++++++++-- drivers/gpu/nvgpu/hal/fifo/userd_gv11b.c | 15 ++- drivers/gpu/nvgpu/include/nvgpu/channel.h | 3 + .../include/nvgpu/channel_sync_semaphore.h | 19 +++- drivers/gpu/nvgpu/include/nvgpu/job.h | 4 +- drivers/gpu/nvgpu/include/nvgpu/semaphore.h | 5 + 11 files changed, 286 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c index d01b66daf..70d2d4af7 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel.c +++ b/drivers/gpu/nvgpu/common/fifo/channel.c @@ -251,6 +251,10 @@ static void channel_kernelmode_deinit(struct nvgpu_channel *ch) nvgpu_channel_sync_destroy(ch->sync); ch->sync = NULL; } + if (ch->gpfifo_sync != NULL) { + nvgpu_channel_sync_destroy(ch->gpfifo_sync); + ch->gpfifo_sync = NULL; + } nvgpu_mutex_release(&ch->sync_lock); } @@ -370,6 +374,18 @@ static int channel_setup_kernelmode(struct nvgpu_channel *c, nvgpu_mutex_release(&c->sync_lock); goto clean_up_unmap; } + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEMA_BASED_GPFIFO_GET)) { + c->gpfifo_sync = nvgpu_channel_sync_semaphore_create(c); + if (c->gpfifo_sync == NULL) { + err = -ENOMEM; + goto clean_up_sync; + } + nvgpu_mutex_acquire(&c->gpfifo_hw_sema_lock); + nvgpu_channel_sync_hw_semaphore_init(c->gpfifo_sync); + nvgpu_mutex_release(&c->gpfifo_hw_sema_lock); + } + nvgpu_mutex_release(&c->sync_lock); if (g->ops.channel.set_syncpt != NULL) { @@ -431,6 +447,10 @@ clean_up_priv_cmd: clean_up_prealloc: nvgpu_channel_joblist_deinit(c); clean_up_sync: + if (c->gpfifo_sync != NULL) { + nvgpu_channel_sync_destroy(c->gpfifo_sync); + c->gpfifo_sync = NULL; + } if (c->sync != NULL) { nvgpu_channel_sync_destroy(c->sync); c->sync = NULL; @@ -448,9 +468,9 @@ clean_up: } /* Update with this periodically to determine how the gpfifo is draining. */ -static inline u32 channel_update_gpfifo_get(struct gk20a *g, - struct nvgpu_channel *c) +static inline u32 channel_update_gpfifo_get(struct nvgpu_channel *c) { + struct gk20a *g = c->g; u32 new_get = 0U; if (g->ops.userd.gp_get != NULL) { @@ -469,7 +489,7 @@ u32 nvgpu_channel_get_gpfifo_free_count(struct nvgpu_channel *ch) u32 nvgpu_channel_update_gpfifo_get_and_get_free_count(struct nvgpu_channel *ch) { - (void)channel_update_gpfifo_get(ch->g, ch); + (void)channel_update_gpfifo_get(ch); return nvgpu_channel_get_gpfifo_free_count(ch); } @@ -514,6 +534,9 @@ static void nvgpu_channel_finalize_job(struct nvgpu_channel *c, * semaphore or even a syncfd. */ nvgpu_fence_put(&job->post_fence); + if (job->gpfifo_sema != NULL) { + nvgpu_semaphore_put(job->gpfifo_sema); + } /* * Free the private command buffers (in order of allocation) @@ -522,6 +545,9 @@ static void nvgpu_channel_finalize_job(struct nvgpu_channel *c, nvgpu_priv_cmdbuf_free(c->priv_cmd_q, job->wait_cmd); } nvgpu_priv_cmdbuf_free(c->priv_cmd_q, job->incr_cmd); + if (job->gpfifo_incr_cmd != NULL) { + nvgpu_priv_cmdbuf_free(c->priv_cmd_q, job->gpfifo_incr_cmd); + } nvgpu_channel_free_job(c, job); @@ -590,9 +616,22 @@ void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c) WARN_ON(c->sync == NULL); + if (c->gpfifo_sync != NULL) { + if (g->aggressive_sync_destroy_thresh != 0U) { + nvgpu_mutex_acquire(&c->sync_lock); + if (nvgpu_channel_sync_put_ref_and_check(c->gpfifo_sync) + && g->aggressive_sync_destroy) { + nvgpu_channel_sync_destroy(c->gpfifo_sync); + c->gpfifo_sync = NULL; + } + nvgpu_mutex_release(&c->sync_lock); + } + } + if (c->sync != NULL) { if (c->has_os_fence_framework_support && - g->os_channel.os_fence_framework_inst_exists(c)) { + g->os_channel.os_fence_framework_inst_exists(c) && + !nvgpu_has_syncpoints(g)) { g->os_channel.signal_os_fence_framework(c, &job->post_fence); } @@ -689,7 +728,7 @@ bool nvgpu_channel_update_and_check_ctxsw_timeout(struct nvgpu_channel *ch, goto done; } - gpfifo_get = channel_update_gpfifo_get(ch->g, ch); + gpfifo_get = channel_update_gpfifo_get(ch); if (gpfifo_get == ch->ctxsw_timeout_gpfifo_get) { /* didn't advance since previous ctxsw timeout check */ @@ -1042,6 +1081,9 @@ unbind: #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT WARN_ON(ch->sync != NULL); + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEMA_BASED_GPFIFO_GET)) { + WARN_ON(ch->gpfifo_sync != NULL); + } #endif channel_free_unlink_debug_session(ch); @@ -1751,6 +1793,7 @@ static void nvgpu_channel_destroy(struct nvgpu_channel *c) nvgpu_mutex_destroy(&c->ioctl_lock); #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock); + nvgpu_mutex_destroy(&c->gpfifo_hw_sema_lock); #endif nvgpu_mutex_destroy(&c->sync_lock); #if defined(CONFIG_NVGPU_CYCLESTATS) @@ -1815,6 +1858,7 @@ int nvgpu_channel_init_support(struct gk20a *g, u32 chid) nvgpu_init_list_node(&c->worker_item); nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock); + nvgpu_mutex_init(&c->gpfifo_hw_sema_lock); #endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */ nvgpu_mutex_init(&c->ioctl_lock); diff --git a/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c b/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c index 335bfade4..b9922c726 100644 --- a/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c +++ b/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c @@ -67,6 +67,7 @@ int nvgpu_priv_cmdbuf_queue_alloc(struct vm_gk20a *vm, int err = 0; u32 wait_size, incr_size; u32 mem_per_job; + u32 gpfifo_incr_size = 0; /* * sema size is at least as much as syncpt size, but semas may not be @@ -77,6 +78,9 @@ int nvgpu_priv_cmdbuf_queue_alloc(struct vm_gk20a *vm, #ifdef CONFIG_NVGPU_SW_SEMAPHORE wait_size = g->ops.sync.sema.get_wait_cmd_size(); incr_size = g->ops.sync.sema.get_incr_cmd_size(); + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEMA_BASED_GPFIFO_GET)) { + gpfifo_incr_size = g->ops.sync.sema.get_incr_cmd_size(); + } #else wait_size = g->ops.sync.syncpt.get_wait_cmd_size(); incr_size = g->ops.sync.syncpt.get_incr_cmd_size(true); @@ -84,22 +88,24 @@ int nvgpu_priv_cmdbuf_queue_alloc(struct vm_gk20a *vm, /* * Compute the amount of priv_cmdbuf space we need. In general the - * worst case is the kernel inserts both a semaphore pre-fence and - * post-fence. Any sync-pt fences will take less memory so we can - * ignore them unless they're the only supported type. Jobs can also - * have more than one pre-fence but that's abnormal and we'll -EAGAIN - * if such jobs would fill the queue. + * worst case is the kernel inserts both a semaphore pre-fence, + * post-fence and semaphore for gp.get tracking. Any sync-pt fences + * will take less memory so we can ignore them unless they're the only + * supported type. Jobs can also have more than one pre-fence but + * that's abnormal and we'll -EAGAIN if such jobs would fill the queue. * * A semaphore ACQ (fence-wait) is 8 words: semaphore_a, semaphore_b, * semaphore_c, and semaphore_d. A semaphore INCR (fence-get) will be * 10 words: all the same as an ACQ plus a non-stalling intr which is - * another 2 words. In reality these numbers vary by chip but we'll use - * 8 and 10 as examples. + * another 2 words. Semaphore for updating the gp.get also needs same + * as A semaphore INCR (fence-get) i.e 10 words. In reality these + * numbers vary by chip but we'll use 8, 10 and 10 as examples. * * Given the job count, cmdbuf space is allocated such that each job - * can get one wait command and one increment command: + * can get one wait command, one increment command and a semaphore for + * gp.get tracking * - * job_count * (8 + 10) * 4 bytes + * job_count * (8 + 10 + 10) * 4 bytes * * These cmdbufs are inserted as gpfifo entries right before and after * the user submitted gpfifo entries per submit. @@ -109,13 +115,15 @@ int nvgpu_priv_cmdbuf_queue_alloc(struct vm_gk20a *vm, * is full when the number of consumed entries is one less than the * allocation size: * - * alloc bytes = job_count * (wait + incr + 1) * slot in bytes + * alloc bytes = job_count * (wait + incr + gpfifo_incr + 1) * slot + * in bytes */ - mem_per_job = nvgpu_safe_mult_u32( + mem_per_job = nvgpu_safe_add_u32( nvgpu_safe_add_u32( nvgpu_safe_add_u32(wait_size, incr_size), - 1U), - (u32)sizeof(u32)); + gpfifo_incr_size), 1U); + mem_per_job = nvgpu_safe_mult_u32(mem_per_job, (u32)sizeof(u32)); + /* both 32 bit and mem_per_job is small */ size = nvgpu_safe_mult_u64((u64)job_count, (u64)mem_per_job); diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c index ee16f4777..3279d7876 100644 --- a/drivers/gpu/nvgpu/common/fifo/submit.c +++ b/drivers/gpu/nvgpu/common/fifo/submit.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -39,6 +39,7 @@ #include #include #include +#include #include @@ -106,7 +107,7 @@ static int nvgpu_submit_create_incr_cmd(struct nvgpu_channel *c, static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c, struct nvgpu_channel_fence *fence, struct nvgpu_channel_job *job, - u32 flags) + u32 flags, u32 gpfifo_entries) { struct gk20a *g = c->g; bool need_sync_fence; @@ -116,6 +117,8 @@ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c, bool flag_fence_get = (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) != 0U; bool flag_sync_fence = (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) != 0U; bool flag_fence_wait = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) != 0U; + bool sema_tracking = nvgpu_is_enabled(g, + NVGPU_SUPPORT_SEMA_BASED_GPFIFO_GET); if (g->aggressive_sync_destroy_thresh != 0U) { nvgpu_mutex_acquire(&c->sync_lock); @@ -128,6 +131,20 @@ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c, new_sync_created = true; } nvgpu_channel_sync_get_ref(c->sync); + + if (c->gpfifo_sync == NULL && sema_tracking) { + c->gpfifo_sync = nvgpu_channel_sync_semaphore_create(c); + if (c->gpfifo_sync == NULL) { + err = -ENOMEM; + goto clean_up_put_sync; + } + nvgpu_mutex_acquire(&c->gpfifo_hw_sema_lock); + nvgpu_channel_sync_hw_semaphore_init(c->gpfifo_sync); + nvgpu_mutex_release(&c->gpfifo_hw_sema_lock); + } + if (c->gpfifo_sync != NULL) { + nvgpu_channel_sync_get_ref(c->gpfifo_sync); + } } if ((g->ops.channel.set_syncpt != NULL) && new_sync_created) { @@ -151,6 +168,7 @@ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c, need_sync_fence = flag_fence_get && flag_sync_fence; + /* * Always generate an increment at the end of a GPFIFO submission. When * we do job tracking, post fences are needed for various reasons even @@ -162,19 +180,41 @@ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c, goto clean_up_wait_cmd; } + if (sema_tracking) { + err = nvgpu_submit_create_gpfifo_tracking_semaphore( + c->gpfifo_sync, &job->gpfifo_sema, + &job->gpfifo_incr_cmd, + nvgpu_safe_add_u32(gpfifo_entries, + (flag_fence_wait ? 3U : 2U))); + if (err != 0) { + goto clean_up_incr_cmd; + } + } + if (g->aggressive_sync_destroy_thresh != 0U) { nvgpu_mutex_release(&c->sync_lock); } return 0; +clean_up_incr_cmd: + if (job->incr_cmd != NULL) { + nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->incr_cmd); + job->incr_cmd = NULL; + } clean_up_wait_cmd: if (job->wait_cmd != NULL) { nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->wait_cmd); + job->wait_cmd = NULL; } - job->wait_cmd = NULL; clean_up_put_sync: if (g->aggressive_sync_destroy_thresh != 0U) { - if (nvgpu_channel_sync_put_ref_and_check(c->sync) + if (c->gpfifo_sync != NULL && + nvgpu_channel_sync_put_ref_and_check(c->gpfifo_sync) + && g->aggressive_sync_destroy) { + nvgpu_channel_sync_destroy(c->gpfifo_sync); + } + if (c->sync != NULL && + nvgpu_channel_sync_put_ref_and_check(c->sync) && g->aggressive_sync_destroy) { nvgpu_channel_sync_destroy(c->sync); } @@ -349,7 +389,7 @@ static int nvgpu_submit_prepare_gpfifo_track(struct nvgpu_channel *c, return err; } - err = nvgpu_submit_prepare_syncs(c, fence, job, flags); + err = nvgpu_submit_prepare_syncs(c, fence, job, flags, num_entries); if (err != 0) { goto clean_up_job; } @@ -369,9 +409,10 @@ static int nvgpu_submit_prepare_gpfifo_track(struct nvgpu_channel *c, if (err != 0) { goto clean_up_gpfifo_wait; } - nvgpu_submit_append_priv_cmdbuf(c, job->incr_cmd); - + if (c->gpfifo_sync != NULL) { + nvgpu_submit_append_priv_cmdbuf(c, job->gpfifo_incr_cmd); + } err = nvgpu_channel_add_job(c, job, skip_buffer_refcounting); if (err != 0) { goto clean_up_gpfifo_incr; @@ -403,6 +444,17 @@ clean_up_gpfifo_incr: nvgpu_safe_sub_u32(c->gpfifo.entry_num, nvgpu_safe_add_u32(1U, num_entries)))) & nvgpu_safe_sub_u32(c->gpfifo.entry_num, 1U); + + /* + * undo the gpfifo incr priv cmdbuf which is similar to undo of + * wait_cmd priv cmdbuf. + */ + if (job->gpfifo_incr_cmd != NULL) { + c->gpfifo.put = + nvgpu_safe_add_u32(c->gpfifo.put, + nvgpu_safe_sub_u32(c->gpfifo.entry_num, 1U)) & + nvgpu_safe_sub_u32(c->gpfifo.entry_num, 1U); + } clean_up_gpfifo_wait: if (job->wait_cmd != NULL) { /* @@ -419,6 +471,9 @@ clean_up_gpfifo_wait: } nvgpu_fence_put(&job->post_fence); nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->incr_cmd); + if (job->gpfifo_incr_cmd != NULL) { + nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->gpfifo_incr_cmd); + } if (job->wait_cmd != NULL) { nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->wait_cmd); } diff --git a/drivers/gpu/nvgpu/common/semaphore/semaphore.c b/drivers/gpu/nvgpu/common/semaphore/semaphore.c index e0b349c5a..c8f143eed 100644 --- a/drivers/gpu/nvgpu/common/semaphore/semaphore.c +++ b/drivers/gpu/nvgpu/common/semaphore/semaphore.c @@ -1,7 +1,7 @@ /* * Nvgpu Semaphores * - * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2023, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -168,8 +168,29 @@ void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s, hw_sema->chid, next); } +void nvgpu_semaphore_prepare_for_gpfifo_get(struct nvgpu_channel *c, + struct nvgpu_semaphore *s, struct nvgpu_hw_semaphore *hw_sema, + u32 new_entries) +{ + u32 next_get; + + nvgpu_mutex_acquire(&c->gpfifo_hw_sema_lock); + next_get = nvgpu_safe_add_u32((u32)nvgpu_hw_semaphore_read_next(hw_sema), + new_entries) & nvgpu_safe_sub_u32(c->gpfifo.entry_num, + 1U); + nvgpu_atomic_set(&hw_sema->next_value, (s32)next_get); + nvgpu_mutex_release(&c->gpfifo_hw_sema_lock); + + WARN_ON(s->ready_to_wait); + + nvgpu_atomic_set(&s->value, (s32)next_get); + s->ready_to_wait = true; + + gpu_sema_verbose_dbg(s->g, "PREP sema for c=%d (%u)", + hw_sema->chid, next_get); +} + u64 nvgpu_semaphore_get_hw_pool_page_idx(struct nvgpu_semaphore *s) { return nvgpu_semaphore_pool_get_page_idx(s->location.pool); } - diff --git a/drivers/gpu/nvgpu/common/semaphore/semaphore_hw.c b/drivers/gpu/nvgpu/common/semaphore/semaphore_hw.c index 43e88266e..accc21340 100644 --- a/drivers/gpu/nvgpu/common/semaphore/semaphore_hw.c +++ b/drivers/gpu/nvgpu/common/semaphore/semaphore_hw.c @@ -159,6 +159,11 @@ void nvgpu_hw_semaphore_set(struct nvgpu_hw_semaphore *hw_sema, u32 val) nvgpu_mem_wr(g, &pool->rw_mem, hw_sema->location.offset, val); } +void nvgpu_hw_semaphore_init_next(struct nvgpu_hw_semaphore *hw_sema) +{ + nvgpu_atomic_set(&hw_sema->next_value, 0); +} + int nvgpu_hw_semaphore_read_next(struct nvgpu_hw_semaphore *hw_sema) { return nvgpu_atomic_read(&hw_sema->next_value); diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c index fcc5fa02f..16e2e4037 100644 --- a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c +++ b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c @@ -1,7 +1,7 @@ /* * GK20A Channel Synchronization Abstraction * - * Copyright (c) 2014-2022, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2023, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -41,12 +41,6 @@ #include "channel_sync_priv.h" -struct nvgpu_channel_sync_semaphore { - struct nvgpu_channel_sync base; - struct nvgpu_channel *c; - struct nvgpu_hw_semaphore *hw_sema; -}; - static struct nvgpu_channel_sync_semaphore * nvgpu_channel_sync_semaphore_from_base(struct nvgpu_channel_sync *base) { @@ -113,6 +107,34 @@ static void add_sema_incr_cmd(struct gk20a *g, struct nvgpu_channel *c, va, cmd); } +static void add_sema_incr_cmd_to_write_next_get(struct nvgpu_channel *c, + struct nvgpu_semaphore *s, struct priv_cmd_entry *cmd, + struct nvgpu_hw_semaphore *hw_sema, + u32 entries) +{ + struct gk20a *g = c->g; + u32 ch = c->chid; + u64 va; + + /* release will need to write back to the semaphore memory. */ + va = nvgpu_semaphore_gpu_rw_va(s); + + /* find the right sema next_value to write (like syncpt's max). */ + nvgpu_semaphore_prepare_for_gpfifo_get(c, s, hw_sema, entries); + + /* + * gp.get should be updated only when all the cmds are completed. + * Hence forcing the wfi to be true always. + */ + g->ops.sync.sema.add_incr_cmd(g, cmd, s, va, true); + gpu_sema_verbose_dbg(g, "(R) c=%u INCR %u (%u) pool=%-3llu" + "va=0x%llx entry=%p", + ch, nvgpu_semaphore_get_value(s), + nvgpu_semaphore_read(s), + nvgpu_semaphore_get_hw_pool_page_idx(s), + va, cmd); +} + static int channel_sync_semaphore_wait_fd( struct nvgpu_channel_sync *s, int fd, struct priv_cmd_entry **entry, u32 max_wait_cmds) @@ -228,6 +250,42 @@ clean_up_sema: return err; } +s32 nvgpu_submit_create_gpfifo_tracking_semaphore( + struct nvgpu_channel_sync *s, + struct nvgpu_semaphore **semaphore, + struct priv_cmd_entry **incr_cmd, + u32 gpfifo_entries) +{ + u32 incr_cmd_size; + struct nvgpu_channel_sync_semaphore *sp = + nvgpu_channel_sync_semaphore_from_base(s); + struct nvgpu_channel *c = sp->c; + s32 err = 0; + + *semaphore = nvgpu_semaphore_alloc(sp->hw_sema); + if (*semaphore == NULL) { + nvgpu_err(c->g, + "ran out of semaphores"); + return -ENOMEM; + } + + incr_cmd_size = c->g->ops.sync.sema.get_incr_cmd_size(); + err = nvgpu_priv_cmdbuf_alloc(c->priv_cmd_q, incr_cmd_size, incr_cmd); + if (err != 0) { + goto clean_up_sema; + } + + /* Release the completion semaphore. */ + add_sema_incr_cmd_to_write_next_get(c, *semaphore, *incr_cmd, + sp->hw_sema, gpfifo_entries); + + return 0; + +clean_up_sema: + nvgpu_semaphore_put(*semaphore); + return err; +} + static int channel_sync_semaphore_incr( struct nvgpu_channel_sync *s, struct priv_cmd_entry **entry, @@ -396,3 +454,22 @@ err_free_sema: nvgpu_kfree(g, sema); return NULL; } + +void nvgpu_channel_sync_hw_semaphore_init(struct nvgpu_channel_sync *sync) +{ + struct nvgpu_channel_sync_semaphore *sp = + nvgpu_channel_sync_semaphore_from_base(sync); + + nvgpu_hw_semaphore_set(sp->hw_sema, 0); + nvgpu_hw_semaphore_init_next(sp->hw_sema); +} + +void nvgpu_channel_update_gpfifo_get(struct nvgpu_channel *c) +{ + struct nvgpu_channel_sync_semaphore *sp; + + if (c->gpfifo_sync != NULL) { + sp = nvgpu_channel_sync_semaphore_from_base(c->gpfifo_sync); + c->gpfifo.get = nvgpu_hw_semaphore_read(sp->hw_sema); + } +} diff --git a/drivers/gpu/nvgpu/hal/fifo/userd_gv11b.c b/drivers/gpu/nvgpu/hal/fifo/userd_gv11b.c index fed7c91fb..5753466f4 100644 --- a/drivers/gpu/nvgpu/hal/fifo/userd_gv11b.c +++ b/drivers/gpu/nvgpu/hal/fifo/userd_gv11b.c @@ -28,6 +28,7 @@ #include #include +#include #include "userd_gv11b.h" @@ -35,8 +36,20 @@ u32 gv11b_userd_gp_get(struct gk20a *g, struct nvgpu_channel *ch) { struct nvgpu_mem *mem = ch->userd_mem; u32 offset = ch->userd_offset / U32(sizeof(u32)); + u32 ret; - return nvgpu_mem_rd32(g, mem, offset + ram_userd_gp_get_w()); + /* + * NVGPU_SUPPORT_SEMA_BASED_GPFIFO_GET is enabled when userd get + * is not getting updated by gpu anymore. + */ + if (nvgpu_is_enabled(g, (u32)NVGPU_SUPPORT_SEMA_BASED_GPFIFO_GET)) { + nvgpu_channel_update_gpfifo_get(ch); + ret = ch->gpfifo.get; + } else { + ret = nvgpu_mem_rd32(g, mem, offset + ram_userd_gp_get_w()); + } + + return ret; } u64 gv11b_userd_pb_get(struct gk20a *g, struct nvgpu_channel *ch) diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h index 158f33734..0f0d65790 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h @@ -387,6 +387,9 @@ struct nvgpu_channel { struct gpfifo_desc gpfifo; struct priv_cmd_queue *priv_cmd_q; struct nvgpu_channel_sync *sync; + struct nvgpu_channel_sync *gpfifo_sync; + /* lock for gpfifo hw_sema access */ + struct nvgpu_mutex gpfifo_hw_sema_lock; /* for job cleanup handling in the background worker */ struct nvgpu_list_node worker_item; #endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel_sync_semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/channel_sync_semaphore.h index 72da85ecc..31c16db2f 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel_sync_semaphore.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel_sync_semaphore.h @@ -2,7 +2,7 @@ * * Nvgpu Channel Synchronization Abstraction (Semaphore) * - * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -28,11 +28,18 @@ #include #include +#include "../../common/sync/channel_sync_priv.h" #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT struct nvgpu_channel; -struct nvgpu_channel_sync_semaphore; +struct nvgpu_channel_sync; + +struct nvgpu_channel_sync_semaphore { + struct nvgpu_channel_sync base; + struct nvgpu_channel *c; + struct nvgpu_hw_semaphore *hw_sema; +}; /* * Converts a valid struct nvgpu_channel_sync ptr to @@ -54,6 +61,14 @@ nvgpu_channel_sync_semaphore_hw_sema( */ struct nvgpu_channel_sync * nvgpu_channel_sync_semaphore_create(struct nvgpu_channel *c); +void nvgpu_channel_sync_hw_semaphore_init(struct nvgpu_channel_sync *sync); +void nvgpu_channel_update_gpfifo_get(struct nvgpu_channel *c); +s32 nvgpu_submit_create_gpfifo_tracking_semaphore( + struct nvgpu_channel_sync *s, + struct nvgpu_semaphore **semaphore, + struct priv_cmd_entry **incr_cmd, + u32 gpfifo_entries); + #endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/job.h b/drivers/gpu/nvgpu/include/nvgpu/job.h index 68d77ca22..9c2c9f6f4 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/job.h +++ b/drivers/gpu/nvgpu/include/nvgpu/job.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -37,8 +37,10 @@ struct nvgpu_channel_job { struct nvgpu_mapped_buf **mapped_buffers; u32 num_mapped_buffers; struct nvgpu_fence_type post_fence; + struct nvgpu_semaphore *gpfifo_sema; struct priv_cmd_entry *wait_cmd; struct priv_cmd_entry *incr_cmd; + struct priv_cmd_entry *gpfifo_incr_cmd; struct nvgpu_list_node list; }; diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h index 05c5353b5..86cb4f594 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h +++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h @@ -35,6 +35,7 @@ struct nvgpu_hw_semaphore; struct nvgpu_semaphore; struct vm_gk20a; struct nvgpu_allocator; +struct nvgpu_channel; #define gpu_sema_dbg(g, fmt, args...) \ nvgpu_log(g, gpu_dbg_sema, fmt, ##args) @@ -77,6 +78,7 @@ u64 nvgpu_hw_semaphore_addr(struct nvgpu_hw_semaphore *hw_sema); u32 nvgpu_hw_semaphore_read(struct nvgpu_hw_semaphore *hw_sema); bool nvgpu_hw_semaphore_reset(struct nvgpu_hw_semaphore *hw_sema); void nvgpu_hw_semaphore_set(struct nvgpu_hw_semaphore *hw_sema, u32 val); +void nvgpu_hw_semaphore_init_next(struct nvgpu_hw_semaphore *hw_sema); int nvgpu_hw_semaphore_read_next(struct nvgpu_hw_semaphore *hw_sema); int nvgpu_hw_semaphore_update_next(struct nvgpu_hw_semaphore *hw_sema); @@ -99,6 +101,9 @@ bool nvgpu_semaphore_can_wait(struct nvgpu_semaphore *s); void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s, struct nvgpu_hw_semaphore *hw_sema); +void nvgpu_semaphore_prepare_for_gpfifo_get(struct nvgpu_channel *c, + struct nvgpu_semaphore *s, + struct nvgpu_hw_semaphore *hw_sema, u32 new_entries); u64 nvgpu_semaphore_get_hw_pool_page_idx(struct nvgpu_semaphore *s); #endif /* NVGPU_SEMAPHORE_H */