gpu: nvgpu: split sema sync hal to wait and incr

Instead of one HAL op with a boolean flag to decide whether to do one
thing or another entirely different thing, use two separate HAL ops for
filling priv cmd bufs with semaphore wait and semaphore increment
commands. It's already two ops for syncpoints, and explicit commands are
more readable than boolean flags.

Change offset into cmdbuf in sem wait HAL to be relative to the cmdbuf,
so the HAL adds the cmdbuf internal offset to it.

While at it, modify the syncpoint cmdbuf HAL ops' prototypes to be
consistent.

Jira NVGPU-4548

Change-Id: Ibac1fc5fe2ef113e4e16b56358ecfa8904464c82
Signed-off-by: Konsta Hölttä <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2323319
(cherry picked from commit 08c1fa38c0fe4effe6ff7a992af55f46e03e77d0)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2328409
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Konsta Hölttä
2020-04-03 14:04:02 +03:00
committed by Alex Waterman
parent 6a7bf6cdc0
commit 6202ead057
17 changed files with 149 additions and 96 deletions

View File

@@ -61,11 +61,8 @@ static void add_sema_cmd(struct gk20a *g, struct nvgpu_channel *c,
u32 offset, bool acquire, bool wfi)
{
int ch = c->chid;
u32 ob, off = cmd->off + offset;
u64 va;
ob = off;
/*
* RO for acquire (since we just need to read the mem) and RW for
* release since we will need to write back to the semaphore memory.
@@ -81,21 +78,21 @@ static void add_sema_cmd(struct gk20a *g, struct nvgpu_channel *c,
nvgpu_semaphore_prepare(s, c->hw_sema);
}
g->ops.sync.sema.add_cmd(g, s, va, cmd, off, acquire, wfi);
if (acquire) {
g->ops.sync.sema.add_wait_cmd(g, cmd, offset, s, va);
gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u pool=%-3llu"
"va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u",
ch, nvgpu_semaphore_get_value(s),
nvgpu_semaphore_get_hw_pool_page_idx(s),
va, cmd->gva, cmd->mem->gpu_va, ob);
va, cmd->gva, cmd->mem->gpu_va, offset);
} else {
g->ops.sync.sema.add_incr_cmd(g, cmd, s, va, wfi);
gpu_sema_verbose_dbg(g, "(R) c=%d INCR %u (%u) pool=%-3llu"
"va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u",
ch, nvgpu_semaphore_get_value(s),
nvgpu_semaphore_read(s),
nvgpu_semaphore_get_hw_pool_page_idx(s),
va, cmd->gva, cmd->mem->gpu_va, ob);
va, cmd->gva, cmd->mem->gpu_va, offset);
}
}

View File

@@ -195,8 +195,8 @@ static int channel_sync_syncpt_incr_common(struct nvgpu_channel_sync *s,
nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
sp->id, sp->syncpt_buf.gpu_va);
c->g->ops.sync.syncpt.add_incr_cmd(c->g, wfi_cmd,
incr_cmd, sp->id, sp->syncpt_buf.gpu_va);
c->g->ops.sync.syncpt.add_incr_cmd(c->g, incr_cmd,
sp->id, sp->syncpt_buf.gpu_va, wfi_cmd);
thresh = nvgpu_nvhost_syncpt_incr_max_ext(sp->nvhost, sp->id,
c->g->ops.sync.syncpt.get_incr_per_release());

View File

@@ -739,9 +739,10 @@ static const struct gpu_ops gm20b_ops = {
#endif /* CONFIG_TEGRA_GK20A_NVHOST */
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
.sema = {
.add_wait_cmd = gk20a_sema_add_wait_cmd,
.get_wait_cmd_size = gk20a_sema_get_wait_cmd_size,
.add_incr_cmd = gk20a_sema_add_incr_cmd,
.get_incr_cmd_size = gk20a_sema_get_incr_cmd_size,
.add_cmd = gk20a_sema_add_cmd,
},
#endif
},

View File

@@ -837,9 +837,10 @@ static const struct gpu_ops gp10b_ops = {
#endif /* CONFIG_TEGRA_GK20A_NVHOST */
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
.sema = {
.add_wait_cmd = gk20a_sema_add_wait_cmd,
.get_wait_cmd_size = gk20a_sema_get_wait_cmd_size,
.add_incr_cmd = gk20a_sema_add_incr_cmd,
.get_incr_cmd_size = gk20a_sema_get_incr_cmd_size,
.add_cmd = gk20a_sema_add_cmd,
},
#endif
},

View File

@@ -1041,9 +1041,10 @@ NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 8_7))
#endif /* CONFIG_TEGRA_GK20A_NVHOST */
#ifdef CONFIG_NVGPU_SW_SEMAPHORE
.sema = {
.add_wait_cmd = gv11b_sema_add_wait_cmd,
.get_wait_cmd_size = gv11b_sema_get_wait_cmd_size,
.add_incr_cmd = gv11b_sema_add_incr_cmd,
.get_incr_cmd_size = gv11b_sema_get_incr_cmd_size,
.add_cmd = gv11b_sema_add_cmd,
},
#endif
},

View File

@@ -1067,9 +1067,10 @@ static const struct gpu_ops tu104_ops = {
#if defined(CONFIG_NVGPU_KERNEL_MODE_SUBMIT) && \
defined(CONFIG_NVGPU_SW_SEMAPHORE)
.sema = {
.add_wait_cmd = gv11b_sema_add_wait_cmd,
.get_wait_cmd_size = gv11b_sema_get_wait_cmd_size,
.add_incr_cmd = gv11b_sema_add_incr_cmd,
.get_incr_cmd_size = gv11b_sema_get_incr_cmd_size,
.add_cmd = gv11b_sema_add_cmd,
},
#endif
},

View File

@@ -40,12 +40,10 @@ u32 gk20a_sema_get_incr_cmd_size(void)
return 10U;
}
void gk20a_sema_add_cmd(struct gk20a *g, struct nvgpu_semaphore *s,
u64 sema_va, struct priv_cmd_entry *cmd,
u32 off, bool acquire, bool wfi)
static u32 gk20a_sema_add_header(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
u64 sema_va)
{
nvgpu_log_fn(g, " ");
/* semaphore_a */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004U);
/* offset_upper */
@@ -55,30 +53,53 @@ void gk20a_sema_add_cmd(struct gk20a *g, struct nvgpu_semaphore *s,
/* offset */
nvgpu_mem_wr32(g, cmd->mem, off++, (u32)sema_va & 0xffffffff);
if (acquire) {
/* semaphore_c */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006U);
/* payload */
nvgpu_mem_wr32(g, cmd->mem, off++,
nvgpu_semaphore_get_value(s));
/* semaphore_d */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007U);
/* operation: acq_geq, switch_en */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x4U | BIT32(12));
} else {
/* semaphore_c */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006U);
/* payload */
nvgpu_mem_wr32(g, cmd->mem, off++,
nvgpu_semaphore_get_value(s));
/* semaphore_d */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007U);
/* operation: release, wfi */
nvgpu_mem_wr32(g, cmd->mem, off++,
0x2UL | ((wfi ? 0x0UL : 0x1UL) << 20));
/* non_stall_int */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008U);
/* ignored */
nvgpu_mem_wr32(g, cmd->mem, off++, 0U);
}
return off;
}
void gk20a_sema_add_wait_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
struct nvgpu_semaphore *s, u64 sema_va)
{
nvgpu_log_fn(g, " ");
off = cmd->off + off;
off = gk20a_sema_add_header(g, cmd, off, sema_va);
/* semaphore_c */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006U);
/* payload */
nvgpu_mem_wr32(g, cmd->mem, off++,
nvgpu_semaphore_get_value(s));
/* semaphore_d */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007U);
/* operation: acq_geq, switch_en */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x4U | BIT32(12));
}
void gk20a_sema_add_incr_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd,
struct nvgpu_semaphore *s, u64 sema_va,
bool wfi)
{
u32 off = cmd->off;
nvgpu_log_fn(g, " ");
off = gk20a_sema_add_header(g, cmd, off, sema_va);
/* semaphore_c */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006U);
/* payload */
nvgpu_mem_wr32(g, cmd->mem, off++,
nvgpu_semaphore_get_value(s));
/* semaphore_d */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007U);
/* operation: release, wfi */
nvgpu_mem_wr32(g, cmd->mem, off++,
0x2UL | ((wfi ? 0x0UL : 0x1UL) << 20));
/* non_stall_int */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008U);
/* ignored */
nvgpu_mem_wr32(g, cmd->mem, off++, 0U);
}

View File

@@ -30,8 +30,12 @@ struct nvgpu_semaphore;
u32 gk20a_sema_get_wait_cmd_size(void);
u32 gk20a_sema_get_incr_cmd_size(void);
void gk20a_sema_add_cmd(struct gk20a *g, struct nvgpu_semaphore *s,
u64 sema_va, struct priv_cmd_entry *cmd,
u32 off, bool acquire, bool wfi);
void gk20a_sema_add_wait_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
struct nvgpu_semaphore *s, u64 sema_va);
void gk20a_sema_add_incr_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd,
struct nvgpu_semaphore *s, u64 sema_va,
bool wfi);
#endif /* NVGPU_SYNC_SEMA_CMDBUF_GK20A_H */

View File

@@ -40,13 +40,10 @@ u32 gv11b_sema_get_incr_cmd_size(void)
return 12U;
}
void gv11b_sema_add_cmd(struct gk20a *g,
struct nvgpu_semaphore *s, u64 sema_va,
struct priv_cmd_entry *cmd,
u32 off, bool acquire, bool wfi)
static u32 gv11b_sema_add_header(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
struct nvgpu_semaphore *s, u64 sema_va)
{
nvgpu_log_fn(g, " ");
/* sema_addr_lo */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010017);
nvgpu_mem_wr32(g, cmd->mem, off++, sema_va & 0xffffffffULL);
@@ -63,18 +60,40 @@ void gv11b_sema_add_cmd(struct gk20a *g,
nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001a);
nvgpu_mem_wr32(g, cmd->mem, off++, 0);
if (acquire) {
/* sema_execute : acq_strict_geq | switch_en | 32bit */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b);
nvgpu_mem_wr32(g, cmd->mem, off++, U32(0x2) | BIT32(12));
} else {
/* sema_execute : release | wfi | 32bit */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b);
nvgpu_mem_wr32(g, cmd->mem, off++,
U32(0x1) | ((wfi ? U32(0x1) : U32(0x0)) << 20U));
/* non_stall_int : payload is ignored */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008);
nvgpu_mem_wr32(g, cmd->mem, off++, 0);
}
return off;
}
void gv11b_sema_add_wait_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
struct nvgpu_semaphore *s, u64 sema_va)
{
nvgpu_log_fn(g, " ");
off = cmd->off + off;
off = gv11b_sema_add_header(g, cmd, off, s, sema_va);
/* sema_execute : acq_strict_geq | switch_en | 32bit */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b);
nvgpu_mem_wr32(g, cmd->mem, off++, U32(0x2) | BIT32(12));
}
void gv11b_sema_add_incr_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd,
struct nvgpu_semaphore *s, u64 sema_va,
bool wfi)
{
u32 off = cmd->off;
nvgpu_log_fn(g, " ");
off = gv11b_sema_add_header(g, cmd, off, s, sema_va);
/* sema_execute : release | wfi | 32bit */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b);
nvgpu_mem_wr32(g, cmd->mem, off++,
U32(0x1) | ((wfi ? U32(0x1) : U32(0x0)) << 20U));
/* non_stall_int : payload is ignored */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008);
nvgpu_mem_wr32(g, cmd->mem, off++, 0);
}

View File

@@ -30,9 +30,12 @@ struct nvgpu_semaphore;
u32 gv11b_sema_get_wait_cmd_size(void);
u32 gv11b_sema_get_incr_cmd_size(void);
void gv11b_sema_add_cmd(struct gk20a *g,
struct nvgpu_semaphore *s, u64 sema_va,
void gv11b_sema_add_wait_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
struct nvgpu_semaphore *s, u64 sema_va);
void gv11b_sema_add_incr_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd,
u32 off, bool acquire, bool wfi);
struct nvgpu_semaphore *s, u64 sema_va,
bool wfi);
#endif /* NVGPU_SYNC_SEMA_CMDBUF_GV11B_H */

View File

@@ -32,7 +32,7 @@
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
void gk20a_syncpt_add_wait_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
u32 id, u32 thresh, u64 gpu_va)
u32 id, u32 thresh, u64 gpu_va_base)
{
nvgpu_log_fn(g, " ");
@@ -58,13 +58,13 @@ u32 gk20a_syncpt_get_incr_per_release(void)
}
void gk20a_syncpt_add_incr_cmd(struct gk20a *g,
bool wfi_cmd, struct priv_cmd_entry *cmd,
u32 id, u64 gpu_va)
struct priv_cmd_entry *cmd,
u32 id, u64 gpu_va, bool wfi)
{
u32 off = cmd->off;
nvgpu_log_fn(g, " ");
if (wfi_cmd) {
if (wfi) {
/* wfi */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001EU);
/* handle, ignored */

View File

@@ -33,12 +33,12 @@ struct nvgpu_mem;
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
void gk20a_syncpt_add_wait_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
u32 id, u32 thresh, u64 gpu_va);
u32 id, u32 thresh, u64 gpu_va_base);
u32 gk20a_syncpt_get_wait_cmd_size(void);
u32 gk20a_syncpt_get_incr_per_release(void);
void gk20a_syncpt_add_incr_cmd(struct gk20a *g,
bool wfi_cmd, struct priv_cmd_entry *cmd,
u32 id, u64 gpu_va);
struct priv_cmd_entry *cmd,
u32 id, u64 gpu_va, bool wfi);
u32 gk20a_syncpt_get_incr_cmd_size(bool wfi_cmd);
#endif
@@ -53,7 +53,7 @@ int gk20a_syncpt_alloc_buf(struct nvgpu_channel *c,
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
static inline void gk20a_syncpt_add_wait_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
u32 id, u32 thresh, u64 gpu_va)
u32 id, u32 thresh, u64 gpu_va_base)
{
}
static inline u32 gk20a_syncpt_get_wait_cmd_size(void)
@@ -65,8 +65,8 @@ static inline u32 gk20a_syncpt_get_incr_per_release(void)
return 0U;
}
static inline void gk20a_syncpt_add_incr_cmd(struct gk20a *g,
bool wfi_cmd, struct priv_cmd_entry *cmd,
u32 id, u64 gpu_va)
struct priv_cmd_entry *cmd,
u32 id, u64 gpu_va, bool wfi)
{
}
static inline u32 gk20a_syncpt_get_incr_cmd_size(bool wfi_cmd)
@@ -87,4 +87,4 @@ static inline int gk20a_syncpt_alloc_buf(struct nvgpu_channel *c,
#endif
#endif /* NVGPU_SYNC_SYNCPT_CMDBUF_GK20A_H */
#endif /* NVGPU_SYNC_SYNCPT_CMDBUF_GK20A_H */

View File

@@ -81,8 +81,8 @@ u32 gv11b_syncpt_get_incr_per_release(void)
}
void gv11b_syncpt_add_incr_cmd(struct gk20a *g,
bool wfi_cmd, struct priv_cmd_entry *cmd,
u32 id, u64 gpu_va)
struct priv_cmd_entry *cmd,
u32 id, u64 gpu_va, bool wfi)
{
u32 off = cmd->off;
@@ -109,7 +109,7 @@ void gv11b_syncpt_add_incr_cmd(struct gk20a *g,
/* sema_execute : release | wfi | 32bit */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b);
nvgpu_mem_wr32(g, cmd->mem, off, (0x1U |
((u32)(wfi_cmd ? 0x1U : 0x0U) << 20U)));
((u32)(wfi ? 0x1U : 0x0U) << 20U)));
}
u32 gv11b_syncpt_get_incr_cmd_size(bool wfi_cmd)

View File

@@ -40,8 +40,8 @@ void gv11b_syncpt_add_wait_cmd(struct gk20a *g,
u32 gv11b_syncpt_get_wait_cmd_size(void);
u32 gv11b_syncpt_get_incr_per_release(void);
void gv11b_syncpt_add_incr_cmd(struct gk20a *g,
bool wfi_cmd, struct priv_cmd_entry *cmd,
u32 id, u64 gpu_va);
struct priv_cmd_entry *cmd,
u32 id, u64 gpu_va, bool wfi);
u32 gv11b_syncpt_get_incr_cmd_size(bool wfi_cmd);
#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */
@@ -71,8 +71,8 @@ static inline u32 gv11b_syncpt_get_incr_per_release(void)
return 0U;
}
static inline void gv11b_syncpt_add_incr_cmd(struct gk20a *g,
bool wfi_cmd, struct priv_cmd_entry *cmd,
u32 id, u64 gpu_va)
struct priv_cmd_entry *cmd,
u32 id, u64 gpu_va, bool wfi)
{
}
static inline u32 gv11b_syncpt_get_incr_cmd_size(bool wfi_cmd)

View File

@@ -541,9 +541,10 @@ static const struct gpu_ops vgpu_gp10b_ops = {
#endif /* CONFIG_TEGRA_GK20A_NVHOST */
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
.sema = {
.add_wait_cmd = gk20a_sema_add_wait_cmd,
.get_wait_cmd_size = gk20a_sema_get_wait_cmd_size,
.add_incr_cmd = gk20a_sema_add_incr_cmd,
.get_incr_cmd_size = gk20a_sema_get_incr_cmd_size,
.add_cmd = gk20a_sema_add_cmd,
},
#endif
},

View File

@@ -661,9 +661,10 @@ static const struct gpu_ops vgpu_gv11b_ops = {
#endif
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
.sema = {
.add_wait_cmd = gv11b_sema_add_wait_cmd,
.get_wait_cmd_size = gv11b_sema_get_wait_cmd_size,
.add_incr_cmd = gv11b_sema_add_incr_cmd,
.get_incr_cmd_size = gv11b_sema_get_incr_cmd_size,
.add_cmd = gv11b_sema_add_cmd,
},
#endif
},

View File

@@ -77,12 +77,12 @@ struct gops_sync {
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
void (*add_wait_cmd)(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
u32 id, u32 thresh, u64 gpu_va);
u32 id, u32 thresh, u64 gpu_va_base);
u32 (*get_wait_cmd_size)(void);
void (*add_incr_cmd)(struct gk20a *g,
bool wfi_cmd,
struct priv_cmd_entry *cmd,
u32 id, u64 gpu_va);
u32 id, u64 gpu_va,
bool wfi);
u32 (*get_incr_cmd_size)(bool wfi_cmd);
u32 (*get_incr_per_release)(void);
#endif
@@ -96,10 +96,13 @@ struct gops_sync {
struct gops_sync_sema {
u32 (*get_wait_cmd_size)(void);
u32 (*get_incr_cmd_size)(void);
void (*add_cmd)(struct gk20a *g,
struct nvgpu_semaphore *s, u64 sema_va,
void (*add_wait_cmd)(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
struct nvgpu_semaphore *s, u64 sema_va);
void (*add_incr_cmd)(struct gk20a *g,
struct priv_cmd_entry *cmd,
u32 off, bool acquire, bool wfi);
struct nvgpu_semaphore *s, u64 sema_va,
bool wfi);
} sema;
/** @endcond DOXYGEN_SHOULD_SKIP_THIS */
#endif