mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: use job count for priv cmdbuf size
Reduce the priv cmdbuf allocation size to match the actual space needed in the worst case when num_in_flight is not specified. Although synchronization may indeed take up to 2/3 of the gpfifo entries, the number of jobs is what matters and it will be the remaining 1/3. Each job uses up at most one wait and incr command from the pre and post fences, so half of the 2/3 will be only wait commands and the other half will be only incr commands. Jira NVGPU-4548 Change-Id: Ib3566a76b97d8f65538d961efb97408ef23ec281 Signed-off-by: Konsta Hölttä <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2325233 (cherry picked from commit 515deae4f58fedc7d004988f0f85470a7a894ddf) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2328413 Reviewed-by: automaticguardword <automaticguardword@nvidia.com> Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
Alex Waterman
parent
116c385089
commit
d58d6ff321
@@ -49,7 +49,6 @@ int nvgpu_alloc_priv_cmdbuf_queue(struct nvgpu_channel *ch,
|
||||
u64 size, tmp_size;
|
||||
int err = 0;
|
||||
u32 wait_size, incr_size;
|
||||
bool gpfifo_based = false;
|
||||
|
||||
/*
|
||||
* sema size is at least as much as syncpt size, but semas may not be
|
||||
@@ -64,10 +63,6 @@ int nvgpu_alloc_priv_cmdbuf_queue(struct nvgpu_channel *ch,
|
||||
wait_size = g->ops.sync.syncpt.get_wait_cmd_size();
|
||||
incr_size = g->ops.sync.syncpt.get_incr_cmd_size(true);
|
||||
#endif
|
||||
if (num_in_flight == 0U) {
|
||||
num_in_flight = ch->gpfifo.entry_num;
|
||||
gpfifo_based = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute the amount of priv_cmdbuf space we need. In general the
|
||||
@@ -82,23 +77,28 @@ int nvgpu_alloc_priv_cmdbuf_queue(struct nvgpu_channel *ch,
|
||||
* 8 and 10 as examples.
|
||||
*
|
||||
* We have two cases to consider: the first is we base the size of the
|
||||
* priv_cmd_buf on the gpfifo count. Here we multiply by a factor of
|
||||
* 2/3rds because only at most 2/3rds of the GPFIFO can be used for
|
||||
* sync commands:
|
||||
* queue on the gpfifo count. Here we multiply by a factor of 1/3
|
||||
* because at most a third of the GPFIFO entries can be used for
|
||||
* user-submitted jobs; another third goes to wait entries, and the
|
||||
* final third to incr entries. There will be one pair of acq and incr
|
||||
* commands for each job.
|
||||
*
|
||||
* nr_gpfifos * (2 / 3) * (8 + 10) * 4 bytes
|
||||
* gpfifo entry num * (1 / 3) * (8 + 10) * 4 bytes
|
||||
*
|
||||
* If instead num_in_flight is specified then we will use that to size
|
||||
* the priv_cmd_buf. The worst case is both sync commands (one ACQ and
|
||||
* one INCR) per submit so we have a priv_cmd_buf size of:
|
||||
* the queue instead of a third of the gpfifo entry count. The worst
|
||||
* case is still both sync commands (one ACQ and one INCR) per submit so
|
||||
* we have a queue size of:
|
||||
*
|
||||
* num_in_flight * (8 + 10) * 4 bytes
|
||||
*/
|
||||
size = num_in_flight * (wait_size + incr_size) * sizeof(u32);
|
||||
if (gpfifo_based) {
|
||||
size = 2U * size / 3U;
|
||||
if (num_in_flight == 0U) {
|
||||
/* round down to ensure space for all priv cmds */
|
||||
num_in_flight = ch->gpfifo.entry_num / 3;
|
||||
}
|
||||
|
||||
size = num_in_flight * (wait_size + incr_size) * sizeof(u32);
|
||||
|
||||
tmp_size = PAGE_ALIGN(roundup_pow_of_two(size));
|
||||
nvgpu_assert(tmp_size <= U32_MAX);
|
||||
size = (u32)tmp_size;
|
||||
|
||||
Reference in New Issue
Block a user