From 62955ec7f1a4a59f07aa5542a485ad952936b3a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Konsta=20H=C3=B6ltt=C3=A4?= Date: Tue, 31 Mar 2020 17:52:02 +0300 Subject: [PATCH] gpu: nvgpu: reorganize gpfifo writes in submit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduce the number of branches and make the code flow more straightforward by having two complete paths for the gpfifo entry writes: one when job tracking is done and another when not. Although this adds some very minor duplication (of the user gpfifo append call), this way it's easier to read what happens to the job metadata, and when do we even have one. Jira NVGPU-4548 Change-Id: I6be8bc5afaf139e7c49d5e44837e04f642dd5721 Signed-off-by: Konsta Hölttä Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2321761 (cherry picked from commit 9a3d3c8d556d563b9d67b370636791d6a1dd57ee) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2324253 Tested-by: mobile promotions Reviewed-by: mobile promotions --- drivers/gpu/nvgpu/common/fifo/submit.c | 81 +++++++++++++++----------- 1 file changed, 48 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c index ca8be9fb5..dd3705398 100644 --- a/drivers/gpu/nvgpu/common/fifo/submit.c +++ b/drivers/gpu/nvgpu/common/fifo/submit.c @@ -117,6 +117,7 @@ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c, } if (job->wait_cmd->valid) { + /* not expired yet */ *wait_cmd = job->wait_cmd; } } @@ -378,7 +379,6 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, struct gk20a *g = c->g; struct priv_cmd_entry *wait_cmd = NULL; struct priv_cmd_entry *incr_cmd = NULL; - struct nvgpu_fence_type *post_fence = NULL; struct nvgpu_channel_job *job = NULL; /* we might need two extra gpfifo entries - one for pre fence * and one for post fence. */ @@ -555,6 +555,8 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, } if (need_job_tracking) { + struct nvgpu_fence_type *post_fence = NULL; + err = nvgpu_channel_alloc_job(c, &job); if (err != 0) { goto clean_up; @@ -568,38 +570,48 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, if (err != 0) { goto clean_up_job; } - } - nvgpu_profile_snapshot(profile, PROFILE_JOB_TRACKING); + nvgpu_profile_snapshot(profile, PROFILE_JOB_TRACKING); - if (wait_cmd != NULL) { - nvgpu_submit_append_priv_cmdbuf(c, wait_cmd); - } + /* + * wait_cmd can be unset even if flag_fence_wait exists. See + * the expiration check in channel_sync_syncpt_gen_wait_cmd. + */ + if (wait_cmd != NULL) { + nvgpu_submit_append_priv_cmdbuf(c, wait_cmd); + } - err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata, - num_entries); - if (err != 0) { - goto clean_up_job; - } - - /* - * And here's where we add the incr_cmd we generated earlier. It should - * always run! - */ - if (incr_cmd != NULL) { - nvgpu_submit_append_priv_cmdbuf(c, incr_cmd); - } - - if (fence_out != NULL) { - *fence_out = nvgpu_fence_get(post_fence); - } - - if (need_job_tracking) { - err = nvgpu_channel_add_job(c, job, skip_buffer_refcounting); + err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata, + num_entries); if (err != 0) { + nvgpu_fence_put(post_fence); goto clean_up_job; } + + nvgpu_submit_append_priv_cmdbuf(c, incr_cmd); + + err = nvgpu_channel_add_job(c, job, skip_buffer_refcounting); + if (err != 0) { + nvgpu_fence_put(post_fence); + goto clean_up_job; + } + + if (fence_out != NULL) { + *fence_out = nvgpu_fence_get(post_fence); + } + } else { + nvgpu_profile_snapshot(profile, PROFILE_JOB_TRACKING); + + err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata, + num_entries); + if (err != 0) { + goto clean_up; + } + if (fence_out != NULL) { + *fence_out = NULL; + } } + nvgpu_profile_snapshot(profile, PROFILE_APPEND); g->ops.userd.gp_put(g, c); @@ -612,12 +624,16 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, #endif #ifdef CONFIG_NVGPU_TRACE - trace_gk20a_channel_submitted_gpfifo(g->name, - c->chid, - num_entries, - flags, - post_fence ? post_fence->syncpt_id : 0, - post_fence ? post_fence->syncpt_value : 0); + if (fence_out != NULL && *fence_out != NULL) { + trace_gk20a_channel_submitted_gpfifo(g->name, + c->chid, num_entries, flags, + (*fence_out)->syncpt_id, + (*fence_out)->syncpt_value); + } else { + trace_gk20a_channel_submitted_gpfifo(g->name, + c->chid, num_entries, flags, + 0, 0); + } #endif nvgpu_log_info(g, "post-submit put %d, get %d, size %d", @@ -632,7 +648,6 @@ clean_up_job: nvgpu_channel_free_job(c, job); clean_up: nvgpu_log_fn(g, "fail"); - nvgpu_fence_put(post_fence); #ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS if (c->deterministic) { nvgpu_rwsem_up_read(&g->deterministic_busy);