From 52835c39aeec34904b9e1c316fd3607277df2ae5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Konsta=20H=C3=B6ltt=C3=A4?= Date: Wed, 1 Apr 2020 09:24:15 +0300 Subject: [PATCH] gpu: nvgpu: do not skip completed syncpt prefences MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A corner case has existed since ancient times for syncpoint-backed prefences to not cause a gpu wait if the fence is found to be completed in the submit path. This adds some unnecessary complexity, so don't check for completion in software. Let the gpu "wait" for these known-to-be-trivial waits too. Necessary priv cmdbuf space has been allocated anyway. Originally nvhost had 16-bit fences which would wrap around relatively quickly, so waiting for an old fence could have looked like waiting for a fence that will expire long in the future. With 32-bit thresholds, this hasn't been the case for several Tegra generations anymore, and nvhost doesn't ignore waits like this either. The wait priv cmdbuf in submit path can still be missing even with a prefence supplied because the Android sync framework supports sync fds that contain zero fences inside; this can happen at least when merging fences that have all been expired. In such conditions the wait cmdbuf wouldn't even get allocated. [this is squashed with commit 8b3b0cb12d118 (gpu: nvgpu: allow no wait cmd with valid input fence) from https://git-master.nvidia.com/r/c/linux-nvgpu/+/2325677] Jira NVGPU-4548 Change-Id: Ie81fd8735c2614d0fedb7242dc9869d0961610eb Signed-off-by: Konsta Hölttä Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2321762 (cherry picked from commit 8f3dac44934eb727b1bf4fb853f019cf4c15a5cd) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2324254 Reviewed-by: automaticguardword Reviewed-by: svc-mobile-coverity Reviewed-by: svc-mobile-misra Reviewed-by: svc-mobile-cert Reviewed-by: Alex Waterman Reviewed-by: mobile promotions GVS: Gerrit_Virtual_Submit Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/fifo/submit.c | 5 +-- .../nvgpu/common/sync/channel_sync_syncpt.c | 34 +++++++------------ 2 files changed, 15 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c index 162cc19b5..1c47aa5f6 100644 --- a/drivers/gpu/nvgpu/common/fifo/submit.c +++ b/drivers/gpu/nvgpu/common/fifo/submit.c @@ -573,8 +573,9 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, nvgpu_profile_snapshot(profile, PROFILE_JOB_TRACKING); /* - * wait_cmd can be unset even if flag_fence_wait exists. See - * the expiration check in channel_sync_syncpt_gen_wait_cmd. + * wait_cmd can be unset even if flag_fence_wait exists; the + * android sync framework for example can provide entirely + * empty fences that act like trivially expired waits. */ if (wait_cmd != NULL) { nvgpu_submit_append_priv_cmdbuf(c, wait_cmd); diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c index feefd3bf7..adcdd6026 100644 --- a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c +++ b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c @@ -60,31 +60,21 @@ static int channel_sync_syncpt_gen_wait_cmd(struct nvgpu_channel *c, u32 wait_cmd_size, u32 pos, bool preallocated) { int err = 0; - bool is_expired = nvgpu_nvhost_syncpt_is_expired_ext( - c->g->nvhost, id, thresh); - if (is_expired) { - if (preallocated) { - nvgpu_memset(c->g, wait_cmd->mem, - (wait_cmd->off + pos * wait_cmd_size) * (u32)sizeof(u32), - 0, wait_cmd_size * (u32)sizeof(u32)); + if (!preallocated) { + err = nvgpu_channel_alloc_priv_cmdbuf(c, + c->g->ops.sync.syncpt.get_wait_cmd_size(), + wait_cmd); + if (err != 0) { + nvgpu_err(c->g, "not enough priv cmd buffer space"); + return err; } - } else { - if (!preallocated) { - err = nvgpu_channel_alloc_priv_cmdbuf(c, - c->g->ops.sync.syncpt.get_wait_cmd_size(), - wait_cmd); - if (err != 0) { - nvgpu_err(c->g, "not enough priv cmd buffer space"); - return err; - } - } - nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx", - id, c->vm->syncpt_ro_map_gpu_va); - c->g->ops.sync.syncpt.add_wait_cmd(c->g, wait_cmd, - pos * wait_cmd_size, id, thresh, - c->vm->syncpt_ro_map_gpu_va); } + nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx", + id, c->vm->syncpt_ro_map_gpu_va); + c->g->ops.sync.syncpt.add_wait_cmd(c->g, wait_cmd, + pos * wait_cmd_size, id, thresh, + c->vm->syncpt_ro_map_gpu_va); return 0; }