mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 02:22:34 +03:00
gpu: nvgpu: alloc priv cmdbuf based on chip
The semaphore wait and incr sizes are not 8 and 10 for gv11b onwards. Use the specific HAL API to retrieve their sizes and compute the priv cmdbuf queue based on them instead of the up-to-gp10b values. We haven't run out of space likely for several reasons: 1) userspace may not need both pre and post fences for absolutely each submitted job 2) submitted jobs may consist of more than one gpfifo entry, reducing the relative required sync capacity 3) the queue size is rounded up to the next power of two which leaves some margin for error in this calculation 4) the gpfifo size based num-in-flight guess has been twice as big as it needs to be (fixed in a next patch) Jira NVGPU-4548 Change-Id: I172b5c0d8bb7d2231cc45cbed5e1e8b60ce7c707 Signed-off-by: Konsta Hölttä <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2323148 (cherry picked from commit 03fb194d105242c3eb20a9857a22743f5f64b9b9) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2328412 Reviewed-by: automaticguardword <automaticguardword@nvidia.com> Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
Alex Waterman
parent
00203b42f2
commit
116c385089
@@ -29,6 +29,7 @@
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/channel.h>
|
||||
#include <nvgpu/priv_cmdbuf.h>
|
||||
#include <nvgpu/gk20a.h>
|
||||
|
||||
struct priv_cmd_queue {
|
||||
struct nvgpu_mem mem;
|
||||
@@ -47,23 +48,38 @@ int nvgpu_alloc_priv_cmdbuf_queue(struct nvgpu_channel *ch,
|
||||
struct priv_cmd_queue *q;
|
||||
u64 size, tmp_size;
|
||||
int err = 0;
|
||||
u32 wait_size, incr_size;
|
||||
bool gpfifo_based = false;
|
||||
|
||||
/*
|
||||
* sema size is at least as much as syncpt size, but semas may not be
|
||||
* enabled in the build. If neither semas nor syncpts are enabled, priv
|
||||
* cmdbufs and as such kernel mode submits with job tracking won't be
|
||||
* supported.
|
||||
*/
|
||||
#ifdef CONFIG_NVGPU_SW_SEMAPHORE
|
||||
wait_size = g->ops.sync.sema.get_wait_cmd_size();
|
||||
incr_size = g->ops.sync.sema.get_incr_cmd_size();
|
||||
#else
|
||||
wait_size = g->ops.sync.syncpt.get_wait_cmd_size();
|
||||
incr_size = g->ops.sync.syncpt.get_incr_cmd_size(true);
|
||||
#endif
|
||||
if (num_in_flight == 0U) {
|
||||
num_in_flight = ch->gpfifo.entry_num;
|
||||
gpfifo_based = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute the amount of priv_cmdbuf space we need. In general the worst
|
||||
* case is the kernel inserts both a semaphore pre-fence and post-fence.
|
||||
* Any sync-pt fences will take less memory so we can ignore them for
|
||||
* now.
|
||||
* Compute the amount of priv_cmdbuf space we need. In general the
|
||||
* worst case is the kernel inserts both a semaphore pre-fence and
|
||||
* post-fence. Any sync-pt fences will take less memory so we can
|
||||
* ignore them unless they're the only supported type.
|
||||
*
|
||||
* A semaphore ACQ (fence-wait) is 8 words: semaphore_a, semaphore_b,
|
||||
* semaphore_c, and semaphore_d. A semaphore INCR (fence-get) will be 10
|
||||
* words: all the same as an ACQ plus a non-stalling intr which is
|
||||
* another 2 words.
|
||||
* semaphore_c, and semaphore_d. A semaphore INCR (fence-get) will be
|
||||
* 10 words: all the same as an ACQ plus a non-stalling intr which is
|
||||
* another 2 words. In reality these numbers vary by chip but we'll use
|
||||
* 8 and 10 as examples.
|
||||
*
|
||||
* We have two cases to consider: the first is we base the size of the
|
||||
* priv_cmd_buf on the gpfifo count. Here we multiply by a factor of
|
||||
@@ -73,12 +89,12 @@ int nvgpu_alloc_priv_cmdbuf_queue(struct nvgpu_channel *ch,
|
||||
* nr_gpfifos * (2 / 3) * (8 + 10) * 4 bytes
|
||||
*
|
||||
* If instead num_in_flight is specified then we will use that to size
|
||||
* the priv_cmd_buf. The worst case is two sync commands (one ACQ and
|
||||
* the priv_cmd_buf. The worst case is both sync commands (one ACQ and
|
||||
* one INCR) per submit so we have a priv_cmd_buf size of:
|
||||
*
|
||||
* num_in_flight * (8 + 10) * 4 bytes
|
||||
*/
|
||||
size = num_in_flight * 18UL * sizeof(u32);
|
||||
size = num_in_flight * (wait_size + incr_size) * sizeof(u32);
|
||||
if (gpfifo_based) {
|
||||
size = 2U * size / 3U;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user