gpu: nvgpu: reorganize gpfifo writes in submit

Reduce the number of branches and make the code flow more
straightforward by having two complete paths for the gpfifo entry
writes: one when job tracking is done and another when not. Although
this adds some very minor duplication (of the user gpfifo append call),
this way it's easier to read what happens to the job metadata, and when
do we even have one.

Jira NVGPU-4548

Change-Id: I6be8bc5afaf139e7c49d5e44837e04f642dd5721
Signed-off-by: Konsta Hölttä <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2321761
(cherry picked from commit 9a3d3c8d556d563b9d67b370636791d6a1dd57ee)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2324253
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Konsta Hölttä
2020-03-31 17:52:02 +03:00
committed by Alex Waterman
parent 550d45430f
commit 62955ec7f1

View File

@@ -117,6 +117,7 @@ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c,
}
if (job->wait_cmd->valid) {
/* not expired yet */
*wait_cmd = job->wait_cmd;
}
}
@@ -378,7 +379,6 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
struct gk20a *g = c->g;
struct priv_cmd_entry *wait_cmd = NULL;
struct priv_cmd_entry *incr_cmd = NULL;
struct nvgpu_fence_type *post_fence = NULL;
struct nvgpu_channel_job *job = NULL;
/* we might need two extra gpfifo entries - one for pre fence
* and one for post fence. */
@@ -555,6 +555,8 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
}
if (need_job_tracking) {
struct nvgpu_fence_type *post_fence = NULL;
err = nvgpu_channel_alloc_job(c, &job);
if (err != 0) {
goto clean_up;
@@ -568,38 +570,48 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
if (err != 0) {
goto clean_up_job;
}
}
nvgpu_profile_snapshot(profile, PROFILE_JOB_TRACKING);
nvgpu_profile_snapshot(profile, PROFILE_JOB_TRACKING);
if (wait_cmd != NULL) {
nvgpu_submit_append_priv_cmdbuf(c, wait_cmd);
}
/*
* wait_cmd can be unset even if flag_fence_wait exists. See
* the expiration check in channel_sync_syncpt_gen_wait_cmd.
*/
if (wait_cmd != NULL) {
nvgpu_submit_append_priv_cmdbuf(c, wait_cmd);
}
err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata,
num_entries);
if (err != 0) {
goto clean_up_job;
}
/*
* And here's where we add the incr_cmd we generated earlier. It should
* always run!
*/
if (incr_cmd != NULL) {
nvgpu_submit_append_priv_cmdbuf(c, incr_cmd);
}
if (fence_out != NULL) {
*fence_out = nvgpu_fence_get(post_fence);
}
if (need_job_tracking) {
err = nvgpu_channel_add_job(c, job, skip_buffer_refcounting);
err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata,
num_entries);
if (err != 0) {
nvgpu_fence_put(post_fence);
goto clean_up_job;
}
nvgpu_submit_append_priv_cmdbuf(c, incr_cmd);
err = nvgpu_channel_add_job(c, job, skip_buffer_refcounting);
if (err != 0) {
nvgpu_fence_put(post_fence);
goto clean_up_job;
}
if (fence_out != NULL) {
*fence_out = nvgpu_fence_get(post_fence);
}
} else {
nvgpu_profile_snapshot(profile, PROFILE_JOB_TRACKING);
err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata,
num_entries);
if (err != 0) {
goto clean_up;
}
if (fence_out != NULL) {
*fence_out = NULL;
}
}
nvgpu_profile_snapshot(profile, PROFILE_APPEND);
g->ops.userd.gp_put(g, c);
@@ -612,12 +624,16 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
#endif
#ifdef CONFIG_NVGPU_TRACE
trace_gk20a_channel_submitted_gpfifo(g->name,
c->chid,
num_entries,
flags,
post_fence ? post_fence->syncpt_id : 0,
post_fence ? post_fence->syncpt_value : 0);
if (fence_out != NULL && *fence_out != NULL) {
trace_gk20a_channel_submitted_gpfifo(g->name,
c->chid, num_entries, flags,
(*fence_out)->syncpt_id,
(*fence_out)->syncpt_value);
} else {
trace_gk20a_channel_submitted_gpfifo(g->name,
c->chid, num_entries, flags,
0, 0);
}
#endif
nvgpu_log_info(g, "post-submit put %d, get %d, size %d",
@@ -632,7 +648,6 @@ clean_up_job:
nvgpu_channel_free_job(c, job);
clean_up:
nvgpu_log_fn(g, "fail");
nvgpu_fence_put(post_fence);
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
if (c->deterministic) {
nvgpu_rwsem_up_read(&g->deterministic_busy);