gpu: host1x: Optimization for very short syncpoint waits

For tasks that execute very quickly, but not quickly enough to be
already complete by the time execution reaches host1x_syncpt_wait,
the proportion of time spent in allocating a fence and invoking
dma_fence_wait_timeout becomes dominating in comparison to the
time it actually takes to execute the task.

To improve wait latency in these cases, replace the current
"is threshold already reached" check with a short spin loop
to catch these situations before going to the heavy machinery.
For longer waits, since this function blocks anyway, the only
negative effect is slightly increased CPU consumption due to
the loop.

Bug 4001325

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Change-Id: I44e99cda88b4bcb33f190884d1a2e5f7588cb775
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2881716
Reviewed-by: Santosh BS <santoshb@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/2916412
This commit is contained in:
Mikko Perttunen
2023-04-03 13:17:46 +03:00
committed by mobile promotions
parent 1fead62f11
commit fe8568a159

View File

@@ -6,6 +6,7 @@
*/
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/dma-fence.h>
#include <linux/slab.h>
@@ -240,21 +241,32 @@ int host1x_syncpt_wait_ts(struct host1x_syncpt *sp, u32 thresh, long timeout, u3
ktime_t *ts)
{
struct dma_fence *fence;
ktime_t spin_timeout, time;
long wait_err;
host1x_hw_syncpt_load(sp->host, sp);
if (value)
*value = host1x_syncpt_load(sp);
if (ts)
*ts = ktime_get();
if (host1x_syncpt_is_expired(sp, thresh))
return 0;
if (timeout < 0)
timeout = LONG_MAX;
else if (timeout == 0)
/*
* Even 1 jiffy is longer than 50us, so assume timeout is over 50us
* always except for polls (timeout=0)
*/
spin_timeout = ktime_add_us(ktime_get(), timeout > 0 ? 50 : 0);
for (;;) {
host1x_hw_syncpt_load(sp->host, sp);
time = ktime_get();
if (value)
*value = host1x_syncpt_load(sp);
if (ts)
*ts = time;
if (host1x_syncpt_is_expired(sp, thresh))
return 0;
if (ktime_compare(time, spin_timeout) > 0)
break;
udelay(5);
}
if (timeout == 0)
return -EAGAIN;
fence = host1x_fence_create(sp, thresh, false);