gpu: nvgpu: track syncpt max internally

The max values that the Linux nvhost driver tracks are adding some
complexity to our wrapper APIs. Max values are used only for internal
submit syncpoint tracking, so implement that tracking in the sync code
by just storing the last value that the syncpoing will reach after all
jobs are complete.

The value is a simple u32. It's accessed from functions in the submit
path that already is serialized, so there's no worrying about atomic
modifications.

Previously nvhost_syncpt_set_min_eq_max_ext() was used to reset the
syncpoint when necessary. Now with the internal max value we'll use
nvhost_syncpt_set_minval(), so add a wrapper for it.

The maxval reported with the user syncpoint allocation is just the
current value at allocation time since no jobs have affected it yet;
there is no means for the kernel to track the max value of user
syncpoints.

Jira NVGPU-5506

Change-Id: I34672eaa7fe3af36b2fbac92d11babe2bc6a2d2b
Signed-off-by: Konsta Hölttä <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2400635
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Konsta Hölttä
2020-08-18 15:49:38 +03:00
committed by Alex Waterman
parent b062081c52
commit 5e570610b3
7 changed files with 89 additions and 26 deletions

View File

@@ -51,6 +51,7 @@ struct nvgpu_channel_sync_syncpt {
struct nvgpu_nvhost_dev *nvhost;
u32 id;
struct nvgpu_mem syncpt_buf;
u32 max_thresh;
};
static struct nvgpu_channel_sync_syncpt *
@@ -191,22 +192,22 @@ static int channel_sync_syncpt_incr_common(struct nvgpu_channel_sync *s,
nvgpu_channel_sync_syncpt_from_base(s);
struct nvgpu_channel *c = sp->c;
struct nvgpu_os_fence os_fence = {0};
struct gk20a *g = c->g;
err = nvgpu_priv_cmdbuf_alloc(c->priv_cmd_q,
c->g->ops.sync.syncpt.get_incr_cmd_size(wfi_cmd),
g->ops.sync.syncpt.get_incr_cmd_size(wfi_cmd),
incr_cmd);
if (err != 0) {
return err;
}
nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
nvgpu_log(g, gpu_dbg_info, "sp->id %d gpu va %llx",
sp->id, sp->syncpt_buf.gpu_va);
c->g->ops.sync.syncpt.add_incr_cmd(c->g, *incr_cmd,
g->ops.sync.syncpt.add_incr_cmd(g, *incr_cmd,
sp->id, sp->syncpt_buf.gpu_va, wfi_cmd);
thresh = nvgpu_wrapping_add_u32(
nvgpu_nvhost_syncpt_read_maxval(sp->nvhost, sp->id),
c->g->ops.sync.syncpt.get_incr_per_release());
thresh = nvgpu_wrapping_add_u32(sp->max_thresh,
g->ops.sync.syncpt.get_incr_per_release());
if (need_sync_fence) {
err = nvgpu_os_fence_syncpt_create(&os_fence, c, sp->nvhost,
@@ -256,9 +257,8 @@ static void channel_sync_syncpt_mark_progress(struct nvgpu_channel_sync *s,
nvgpu_channel_sync_syncpt_from_base(s);
struct nvgpu_channel *c = sp->c;
struct gk20a *g = c->g;
u32 thresh;
thresh = nvgpu_nvhost_syncpt_incr_max_ext(sp->nvhost, sp->id,
sp->max_thresh = nvgpu_wrapping_add_u32(sp->max_thresh,
g->ops.sync.syncpt.get_incr_per_release());
if (register_irq) {
@@ -275,7 +275,7 @@ static void channel_sync_syncpt_mark_progress(struct nvgpu_channel_sync *s,
int err = nvgpu_nvhost_intr_register_notifier(
sp->nvhost,
sp->id, thresh,
sp->id, sp->max_thresh,
channel_sync_syncpt_update, c);
if (err != 0) {
nvgpu_channel_put(referenced);
@@ -304,7 +304,8 @@ static void channel_sync_syncpt_set_min_eq_max(struct nvgpu_channel_sync *s)
{
struct nvgpu_channel_sync_syncpt *sp =
nvgpu_channel_sync_syncpt_from_base(s);
nvgpu_nvhost_syncpt_set_min_eq_max_ext(sp->nvhost, sp->id);
nvgpu_nvhost_syncpt_set_minval(sp->nvhost, sp->id, sp->max_thresh);
}
static u32 channel_sync_syncpt_get_id(struct nvgpu_channel_sync_syncpt *sp)
@@ -320,7 +321,7 @@ static void channel_sync_syncpt_destroy(struct nvgpu_channel_sync *s)
sp->c->g->ops.sync.syncpt.free_buf(sp->c, &sp->syncpt_buf);
nvgpu_nvhost_syncpt_set_min_eq_max_ext(sp->nvhost, sp->id);
channel_sync_syncpt_set_min_eq_max(s);
nvgpu_nvhost_syncpt_put_ref_ext(sp->nvhost, sp->id);
nvgpu_kfree(sp->c->g, sp);
}
@@ -379,25 +380,35 @@ nvgpu_channel_sync_syncpt_create(struct nvgpu_channel *c)
*/
if ((sp->id == 0U) ||
(sp->id == NVGPU_INVALID_SYNCPT_ID)) {
nvgpu_kfree(c->g, sp);
nvgpu_err(c->g, "failed to get free syncpt");
return NULL;
goto err_free;
}
err = sp->c->g->ops.sync.syncpt.alloc_buf(sp->c, sp->id,
&sp->syncpt_buf);
if (err != 0) {
nvgpu_nvhost_syncpt_put_ref_ext(sp->nvhost, sp->id);
nvgpu_kfree(c->g, sp);
nvgpu_err(c->g, "failed to allocate syncpoint buffer");
return NULL;
goto err_put;
}
nvgpu_nvhost_syncpt_set_min_eq_max_ext(sp->nvhost, sp->id);
err = nvgpu_nvhost_syncpt_read_ext_check(sp->nvhost, sp->id,
&sp->max_thresh);
if (err != 0) {
goto err_free_buf;
}
nvgpu_atomic_set(&sp->base.refcount, 0);
sp->base.ops = &channel_sync_syncpt_ops;
return &sp->base;
err_free_buf:
sp->c->g->ops.sync.syncpt.free_buf(sp->c, &sp->syncpt_buf);
err_put:
nvgpu_nvhost_syncpt_put_ref_ext(sp->nvhost, sp->id);
err_free:
nvgpu_kfree(c->g, sp);
return NULL;
}

View File

@@ -105,8 +105,6 @@ nvgpu_channel_user_syncpt_create(struct nvgpu_channel *ch)
goto err_put;
}
nvgpu_nvhost_syncpt_set_min_eq_max_ext(s->nvhost, s->syncpt_id);
return s;
err_put:
nvgpu_nvhost_syncpt_put_ref_ext(s->nvhost, s->syncpt_id);
@@ -136,7 +134,6 @@ void nvgpu_channel_user_syncpt_destroy(struct nvgpu_channel_user_syncpt *s)
g->ops.sync.syncpt.free_buf(s->ch, &s->syncpt_buf);
nvgpu_nvhost_syncpt_set_min_eq_max_ext(s->nvhost, s->syncpt_id);
nvgpu_nvhost_syncpt_put_ref_ext(s->nvhost, s->syncpt_id);
nvgpu_kfree(g, s);
}

View File

@@ -105,14 +105,25 @@ int nvgpu_nvhost_syncpt_wait_timeout_ext(struct nvgpu_nvhost_dev *nvgpu_syncpt_d
u32 nvgpu_nvhost_syncpt_incr_max_ext(struct nvgpu_nvhost_dev *nvgpu_syncpt_dev,
u32 id, u32 incrs);
int nvgpu_nvhost_syncpt_read_ext_check(struct nvgpu_nvhost_dev *nvgpu_syncpt_dev,
u32 id, u32 *val);
int nvgpu_nvhost_create_symlink(struct gk20a *g);
void nvgpu_nvhost_remove_symlink(struct gk20a *g);
#endif
/**
* @brief Attempt to read the current value of given sync point id.
*
* @param nvgpu_syncpt_dev [in] Sync point device.
* @param id [in] Sync point id.
* @param val [out] Sync point value.
*
* - Read the sync_pt value of the given sync point from hardware.
*
* @return 0 on success.
*/
int nvgpu_nvhost_syncpt_read_ext_check(struct nvgpu_nvhost_dev *nvgpu_syncpt_dev,
u32 id, u32 *val);
/**
* @brief Get the sync_pt name of given sync point id.
*
@@ -147,6 +158,28 @@ void nvgpu_nvhost_syncpt_set_min_eq_max_ext(struct nvgpu_nvhost_dev
*nvgpu_syncpt_dev,
u32 id);
/**
* @brief Increment the value of given sync point to the given value.
*
* @param nvgpu_syncpt_dev [in] Sync point device.
* @param id [in] Sync point id.
* @param val [in] Final desired syncpt value.
*
* - Read the current value of the given sync point by calling
* #NvRmHost1xSyncpointRead().
* - If val is less than current, increment the syncpoint
* by the difference(val - current) by calling
* #nvgpu_nvhost_syncptshim_map_increment().
*
* Note that this can race and cause the syncpt value to go over the desired
* value if some other entity (such as the gpu hardware) is incrementing the
* syncpoint concurrently.
*
* @return None.
*/
void nvgpu_nvhost_syncpt_set_minval(struct nvgpu_nvhost_dev *nvgpu_syncpt_dev,
u32 id, u32 val);
/**
* @brief Read the maximum value of given sync point id.
*

View File

@@ -51,6 +51,8 @@ u32 nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(u32 syncpt_id);
void nvgpu_nvhost_syncpt_set_min_eq_max_ext(
struct nvgpu_nvhost_dev *nvhost_dev, u32 id);
void nvgpu_nvhost_syncpt_set_minval(struct nvgpu_nvhost_dev *nvgpu_syncpt_dev,
u32 id, u32 val);
void nvgpu_nvhost_syncpt_put_ref_ext(
struct nvgpu_nvhost_dev *nvhost_dev, u32 id);

View File

@@ -1087,6 +1087,7 @@ static int nvgpu_ioctl_channel_get_user_syncpoint(struct nvgpu_channel *ch,
{
#ifdef CONFIG_TEGRA_GK20A_NVHOST
struct gk20a *g = ch->g;
int err;
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT)) {
nvgpu_err(g, "user syncpoints not supported");
@@ -1116,8 +1117,17 @@ static int nvgpu_ioctl_channel_get_user_syncpoint(struct nvgpu_channel *ch,
}
args->syncpoint_id = nvgpu_channel_user_syncpt_get_id(ch->user_sync);
args->syncpoint_max = nvgpu_nvhost_syncpt_read_maxval(g->nvhost,
args->syncpoint_id);
/* The current value is the max we're expecting at the moment */
err = nvgpu_nvhost_syncpt_read_ext_check(g->nvhost, args->syncpoint_id,
&args->syncpoint_max);
if (err != 0) {
nvgpu_mutex_acquire(&ch->sync_lock);
nvgpu_channel_user_syncpt_destroy(ch->user_sync);
nvgpu_mutex_release(&ch->sync_lock);
return err;
}
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SYNCPOINT_ADDRESS)) {
args->gpu_va =
nvgpu_channel_user_syncpt_get_address(ch->user_sync);

View File

@@ -144,6 +144,12 @@ void nvgpu_nvhost_syncpt_set_min_eq_max_ext(
nvhost_syncpt_set_min_eq_max_ext(nvhost_dev->host1x_pdev, id);
}
void nvgpu_nvhost_syncpt_set_minval(
struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 val)
{
nvhost_syncpt_set_minval(nvhost_dev->host1x_pdev, id, val);
}
void nvgpu_nvhost_syncpt_put_ref_ext(
struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
{
@@ -199,7 +205,6 @@ void nvgpu_nvhost_syncpt_set_safe_state(
} else {
val += 0x10000;
nvhost_syncpt_set_minval(nvhost_dev->host1x_pdev, id, val);
nvhost_syncpt_set_maxval(nvhost_dev->host1x_pdev, id, val);
}
}

View File

@@ -112,6 +112,11 @@ void nvgpu_nvhost_syncpt_set_min_eq_max_ext(
{
}
void nvgpu_nvhost_syncpt_set_minval(struct nvgpu_nvhost_dev *nvgpu_syncpt_dev,
u32 id, u32 val)
{
}
void nvgpu_nvhost_syncpt_put_ref_ext(
struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
{