diff --git a/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c b/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c index 017d2601d..2868ba370 100644 --- a/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c +++ b/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c @@ -170,6 +170,7 @@ int nvgpu_channel_alloc_priv_cmdbuf(struct nvgpu_channel *c, u32 orig_size, return -EAGAIN; } + e->fill_off = 0; e->size = orig_size; e->mem = &q->mem; @@ -237,3 +238,21 @@ void nvgpu_channel_update_priv_cmd_q_and_free_entry( nvgpu_channel_free_priv_cmd_entry(ch, e); } + +void nvgpu_priv_cmdbuf_append(struct gk20a *g, struct priv_cmd_entry *e, + u32 *data, u32 entries) +{ + nvgpu_assert(e->fill_off + entries <= e->size); + nvgpu_mem_wr_n(g, e->mem, (e->off + e->fill_off) * sizeof(u32), + data, entries * sizeof(u32)); + e->fill_off += entries; +} + +void nvgpu_priv_cmdbuf_append_zeros(struct gk20a *g, struct priv_cmd_entry *e, + u32 entries) +{ + nvgpu_assert(e->fill_off + entries <= e->size); + nvgpu_memset(g, e->mem, (e->off + e->fill_off) * sizeof(u32), + 0, entries * sizeof(u32)); + e->fill_off += entries; +} diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c index 02ea03230..4d3b670ec 100644 --- a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c +++ b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c @@ -57,8 +57,7 @@ nvgpu_channel_sync_semaphore_from_base(struct nvgpu_channel_sync *base) } static void add_sema_wait_cmd(struct gk20a *g, struct nvgpu_channel *c, - struct nvgpu_semaphore *s, struct priv_cmd_entry *cmd, - u32 offset) + struct nvgpu_semaphore *s, struct priv_cmd_entry *cmd) { int ch = c->chid; u64 va; @@ -66,12 +65,12 @@ static void add_sema_wait_cmd(struct gk20a *g, struct nvgpu_channel *c, /* acquire just needs to read the mem. */ va = nvgpu_semaphore_gpu_ro_va(s); - g->ops.sync.sema.add_wait_cmd(g, cmd, offset, s, va); + g->ops.sync.sema.add_wait_cmd(g, cmd, s, va); gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u pool=%-3llu" - "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u", + "va=0x%llx cmd_mem=0x%llx b=0x%llx", ch, nvgpu_semaphore_get_value(s), nvgpu_semaphore_get_hw_pool_page_idx(s), - va, cmd->gva, cmd->mem->gpu_va, offset); + va, cmd->gva, cmd->mem->gpu_va); } static void add_sema_incr_cmd(struct gk20a *g, struct nvgpu_channel *c, @@ -98,20 +97,17 @@ static void add_sema_incr_cmd(struct gk20a *g, struct nvgpu_channel *c, static void channel_sync_semaphore_gen_wait_cmd(struct nvgpu_channel *c, struct nvgpu_semaphore *sema, struct priv_cmd_entry *wait_cmd, - u32 wait_cmd_size, u32 pos) + u32 wait_cmd_size) { bool has_incremented; if (sema == NULL) { - /* expired */ - nvgpu_memset(c->g, wait_cmd->mem, - (wait_cmd->off + pos * wait_cmd_size) * (u32)sizeof(u32), - 0, wait_cmd_size * (u32)sizeof(u32)); + /* came from an expired sync fence */ + nvgpu_priv_cmdbuf_append_zeros(c->g, wait_cmd, wait_cmd_size); } else { has_incremented = nvgpu_semaphore_can_wait(sema); nvgpu_assert(has_incremented); - add_sema_wait_cmd(c->g, c, sema, wait_cmd, - pos * wait_cmd_size); + add_sema_wait_cmd(c->g, c, sema, wait_cmd); nvgpu_semaphore_put(sema); } } @@ -163,7 +159,7 @@ static int channel_sync_semaphore_wait_fd( nvgpu_os_fence_sema_extract_nth_semaphore( &os_fence_sema, i, &semaphore); channel_sync_semaphore_gen_wait_cmd(c, semaphore, entry, - wait_cmd_size, i); + wait_cmd_size); } cleanup: diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c index f228a10ed..e91e94187 100644 --- a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c +++ b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c @@ -58,7 +58,7 @@ nvgpu_channel_sync_syncpt_from_base(struct nvgpu_channel_sync *base) static int channel_sync_syncpt_gen_wait_cmd(struct nvgpu_channel *c, u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd, - u32 wait_cmd_size, u32 pos, bool preallocated) + u32 wait_cmd_size, bool preallocated) { int err = 0; @@ -73,9 +73,8 @@ static int channel_sync_syncpt_gen_wait_cmd(struct nvgpu_channel *c, } nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx", id, c->vm->syncpt_ro_map_gpu_va); - c->g->ops.sync.syncpt.add_wait_cmd(c->g, wait_cmd, - pos * wait_cmd_size, id, thresh, - c->vm->syncpt_ro_map_gpu_va); + c->g->ops.sync.syncpt.add_wait_cmd(c->g, wait_cmd, id, thresh, + c->vm->syncpt_ro_map_gpu_va); return 0; } @@ -92,7 +91,7 @@ static int channel_sync_syncpt_wait_raw(struct nvgpu_channel_sync_syncpt *s, } err = channel_sync_syncpt_gen_wait_cmd(c, id, thresh, - wait_cmd, wait_cmd_size, 0, false); + wait_cmd, wait_cmd_size, false); return err; } @@ -154,7 +153,7 @@ static int channel_sync_syncpt_wait_fd(struct nvgpu_channel_sync *s, int fd, nvgpu_os_fence_syncpt_extract_nth_syncpt( &os_fence_syncpt, i, &syncpt_id, &syncpt_thresh); err = channel_sync_syncpt_gen_wait_cmd(c, syncpt_id, - syncpt_thresh, wait_cmd, wait_cmd_size, i, true); + syncpt_thresh, wait_cmd, wait_cmd_size, true); } cleanup: @@ -384,5 +383,3 @@ nvgpu_channel_sync_syncpt_create(struct nvgpu_channel *c) return &sp->base; } - - diff --git a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.c b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.c index f5b3db27f..4997ae32c 100644 --- a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.c +++ b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.c @@ -21,11 +21,9 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ + #include -#include #include -#include -#include #include #include "sema_cmdbuf_gk20a.h" @@ -40,66 +38,66 @@ u32 gk20a_sema_get_incr_cmd_size(void) return 10U; } -static u32 gk20a_sema_add_header(struct gk20a *g, - struct priv_cmd_entry *cmd, u32 off, - u64 sema_va) +static void gk20a_sema_add_header(struct gk20a *g, + struct priv_cmd_entry *cmd, u64 sema_va) { - /* semaphore_a */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004U); - /* offset_upper */ - nvgpu_mem_wr32(g, cmd->mem, off++, (u32)(sema_va >> 32) & 0xffU); - /* semaphore_b */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010005U); - /* offset */ - nvgpu_mem_wr32(g, cmd->mem, off++, (u32)sema_va & 0xffffffff); + u32 data[] = { + /* semaphore_a */ + 0x20010004U, + /* offset_upper */ + (u32)(sema_va >> 32) & 0xffU, + /* semaphore_b */ + 0x20010005U, + /* offset */ + (u32)sema_va & 0xffffffff, + }; - return off; + nvgpu_priv_cmdbuf_append(g, cmd, data, ARRAY_SIZE(data)); } void gk20a_sema_add_wait_cmd(struct gk20a *g, - struct priv_cmd_entry *cmd, u32 off, + struct priv_cmd_entry *cmd, struct nvgpu_semaphore *s, u64 sema_va) { + u32 data[] = { + /* semaphore_c */ + 0x20010006U, + /* payload */ + nvgpu_semaphore_get_value(s), + /* semaphore_d */ + 0x20010007U, + /* operation: acq_geq, switch_en */ + 0x4U | BIT32(12), + }; + nvgpu_log_fn(g, " "); - off = cmd->off + off; - off = gk20a_sema_add_header(g, cmd, off, sema_va); - - /* semaphore_c */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006U); - /* payload */ - nvgpu_mem_wr32(g, cmd->mem, off++, - nvgpu_semaphore_get_value(s)); - /* semaphore_d */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007U); - /* operation: acq_geq, switch_en */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x4U | BIT32(12)); + gk20a_sema_add_header(g, cmd, sema_va); + nvgpu_priv_cmdbuf_append(g, cmd, data, ARRAY_SIZE(data)); } void gk20a_sema_add_incr_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, struct nvgpu_semaphore *s, u64 sema_va, bool wfi) - { - u32 off = cmd->off; + u32 data[] = { + /* semaphore_c */ + 0x20010006U, + /* payload */ + nvgpu_semaphore_get_value(s), + /* semaphore_d */ + 0x20010007U, + /* operation: release, wfi */ + 0x2UL | ((wfi ? 0x0UL : 0x1UL) << 20), + /* non_stall_int */ + 0x20010008U, + /* ignored */ + 0U, + }; nvgpu_log_fn(g, " "); - off = gk20a_sema_add_header(g, cmd, off, sema_va); - - /* semaphore_c */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006U); - /* payload */ - nvgpu_mem_wr32(g, cmd->mem, off++, - nvgpu_semaphore_get_value(s)); - /* semaphore_d */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007U); - /* operation: release, wfi */ - nvgpu_mem_wr32(g, cmd->mem, off++, - 0x2UL | ((wfi ? 0x0UL : 0x1UL) << 20)); - /* non_stall_int */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008U); - /* ignored */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0U); + gk20a_sema_add_header(g, cmd, sema_va); + nvgpu_priv_cmdbuf_append(g, cmd, data, ARRAY_SIZE(data)); } diff --git a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.h b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.h index 1e80d7821..82cfc19ab 100644 --- a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.h +++ b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.h @@ -31,7 +31,7 @@ struct nvgpu_semaphore; u32 gk20a_sema_get_wait_cmd_size(void); u32 gk20a_sema_get_incr_cmd_size(void); void gk20a_sema_add_wait_cmd(struct gk20a *g, - struct priv_cmd_entry *cmd, u32 off, + struct priv_cmd_entry *cmd, struct nvgpu_semaphore *s, u64 sema_va); void gk20a_sema_add_incr_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, diff --git a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.c b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.c index 01c4dcbb2..d66dca521 100644 --- a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.c +++ b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.c @@ -22,10 +22,8 @@ * DEALINGS IN THE SOFTWARE. */ -#include +#include #include -#include -#include #include #include "sema_cmdbuf_gv11b.h" @@ -40,41 +38,45 @@ u32 gv11b_sema_get_incr_cmd_size(void) return 12U; } -static u32 gv11b_sema_add_header(struct gk20a *g, - struct priv_cmd_entry *cmd, u32 off, +static void gv11b_sema_add_header(struct gk20a *g, + struct priv_cmd_entry *cmd, struct nvgpu_semaphore *s, u64 sema_va) { - /* sema_addr_lo */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010017); - nvgpu_mem_wr32(g, cmd->mem, off++, sema_va & 0xffffffffULL); + u32 data[] = { + /* sema_addr_lo */ + 0x20010017, + sema_va & 0xffffffffULL, - /* sema_addr_hi */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010018); - nvgpu_mem_wr32(g, cmd->mem, off++, (sema_va >> 32ULL) & 0xffULL); + /* sema_addr_hi */ + 0x20010018, + (sema_va >> 32ULL) & 0xffULL, - /* payload_lo */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010019); - nvgpu_mem_wr32(g, cmd->mem, off++, nvgpu_semaphore_get_value(s)); + /* payload_lo */ + 0x20010019, + nvgpu_semaphore_get_value(s), - /* payload_hi : ignored */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001a); - nvgpu_mem_wr32(g, cmd->mem, off++, 0); + /* payload_hi : ignored */ + 0x2001001a, + 0, + }; - return off; + nvgpu_priv_cmdbuf_append(g, cmd, data, ARRAY_SIZE(data)); } void gv11b_sema_add_wait_cmd(struct gk20a *g, - struct priv_cmd_entry *cmd, u32 off, + struct priv_cmd_entry *cmd, struct nvgpu_semaphore *s, u64 sema_va) { + u32 data[] = { + /* sema_execute : acq_strict_geq | switch_en | 32bit */ + 0x2001001b, + U32(0x2) | BIT32(12), + }; + nvgpu_log_fn(g, " "); - off = cmd->off + off; - off = gv11b_sema_add_header(g, cmd, off, s, sema_va); - - /* sema_execute : acq_strict_geq | switch_en | 32bit */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b); - nvgpu_mem_wr32(g, cmd->mem, off++, U32(0x2) | BIT32(12)); + gv11b_sema_add_header(g, cmd, s, sema_va); + nvgpu_priv_cmdbuf_append(g, cmd, data, ARRAY_SIZE(data)); } void gv11b_sema_add_incr_cmd(struct gk20a *g, @@ -82,18 +84,18 @@ void gv11b_sema_add_incr_cmd(struct gk20a *g, struct nvgpu_semaphore *s, u64 sema_va, bool wfi) { - u32 off = cmd->off; + u32 data[] = { + /* sema_execute : release | wfi | 32bit */ + 0x2001001b, + U32(0x1) | ((wfi ? U32(0x1) : U32(0x0)) << 20U), + + /* non_stall_int : payload is ignored */ + 0x20010008, + 0, + }; nvgpu_log_fn(g, " "); - off = gv11b_sema_add_header(g, cmd, off, s, sema_va); - - /* sema_execute : release | wfi | 32bit */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b); - nvgpu_mem_wr32(g, cmd->mem, off++, - U32(0x1) | ((wfi ? U32(0x1) : U32(0x0)) << 20U)); - - /* non_stall_int : payload is ignored */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008); - nvgpu_mem_wr32(g, cmd->mem, off++, 0); + gv11b_sema_add_header(g, cmd, s, sema_va); + nvgpu_priv_cmdbuf_append(g, cmd, data, ARRAY_SIZE(data)); } diff --git a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.h b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.h index d7a1ee56a..7bd8e685b 100644 --- a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.h +++ b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.h @@ -31,7 +31,7 @@ struct nvgpu_semaphore; u32 gv11b_sema_get_wait_cmd_size(void); u32 gv11b_sema_get_incr_cmd_size(void); void gv11b_sema_add_wait_cmd(struct gk20a *g, - struct priv_cmd_entry *cmd, u32 off, + struct priv_cmd_entry *cmd, struct nvgpu_semaphore *s, u64 sema_va); void gv11b_sema_add_incr_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, diff --git a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.c b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.c index 07783510a..2ca44cad3 100644 --- a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.c +++ b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.c @@ -22,29 +22,30 @@ * DEALINGS IN THE SOFTWARE. */ -#include -#include -#include +#include #include #include "syncpt_cmdbuf_gk20a.h" #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT void gk20a_syncpt_add_wait_cmd(struct gk20a *g, - struct priv_cmd_entry *cmd, u32 off, + struct priv_cmd_entry *cmd, u32 id, u32 thresh, u64 gpu_va_base) { + u32 data[] = { + /* syncpoint_a */ + 0x2001001CU, + /* payload */ + thresh, + /* syncpoint_b */ + 0x2001001DU, + /* syncpt_id, switch_en, wait */ + (id << 8U) | 0x10U, + }; + nvgpu_log_fn(g, " "); - off = cmd->off + off; - /* syncpoint_a */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001CU); - /* payload */ - nvgpu_mem_wr32(g, cmd->mem, off++, thresh); - /* syncpoint_b */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001DU); - /* syncpt_id, switch_en, wait */ - nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8U) | 0x10U); + nvgpu_priv_cmdbuf_append(g, cmd, data, ARRAY_SIZE(data)); } u32 gk20a_syncpt_get_wait_cmd_size(void) @@ -61,28 +62,35 @@ void gk20a_syncpt_add_incr_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, u32 id, u64 gpu_va, bool wfi) { - u32 off = cmd->off; + u32 wfi_data[] = { + /* wfi */ + 0x2001001EU, + /* handle, ignored */ + 0x00000000U, + }; + + u32 incr_data[] = { + /* syncpoint_a */ + 0x2001001CU, + /* payload, ignored */ + 0U, + /* syncpoint_b */ + 0x2001001DU, + /* syncpt_id, incr */ + (id << 8U) | 0x1U, + /* syncpoint_b */ + 0x2001001DU, + /* syncpt_id, incr */ + (id << 8U) | 0x1U, + }; nvgpu_log_fn(g, " "); - if (wfi) { - /* wfi */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001EU); - /* handle, ignored */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x00000000U); - } - /* syncpoint_a */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001CU); - /* payload, ignored */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0U); - /* syncpoint_b */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001DU); - /* syncpt_id, incr */ - nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8U) | 0x1U); - /* syncpoint_b */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001DU); - /* syncpt_id, incr */ - nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8U) | 0x1U); + if (wfi) { + nvgpu_priv_cmdbuf_append(g, cmd, wfi_data, + ARRAY_SIZE(wfi_data)); + } + nvgpu_priv_cmdbuf_append(g, cmd, incr_data, ARRAY_SIZE(incr_data)); } u32 gk20a_syncpt_get_incr_cmd_size(bool wfi_cmd) diff --git a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.h b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.h index b4c46ca45..f2bdb756f 100644 --- a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.h +++ b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.h @@ -32,7 +32,7 @@ struct nvgpu_mem; #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT void gk20a_syncpt_add_wait_cmd(struct gk20a *g, - struct priv_cmd_entry *cmd, u32 off, + struct priv_cmd_entry *cmd, u32 id, u32 thresh, u64 gpu_va_base); u32 gk20a_syncpt_get_wait_cmd_size(void); u32 gk20a_syncpt_get_incr_per_release(void); @@ -52,7 +52,7 @@ int gk20a_syncpt_alloc_buf(struct nvgpu_channel *c, #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT static inline void gk20a_syncpt_add_wait_cmd(struct gk20a *g, - struct priv_cmd_entry *cmd, u32 off, + struct priv_cmd_entry *cmd, u32 id, u32 thresh, u64 gpu_va_base) { } diff --git a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.c b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.c index 3f9104533..a01e5981f 100644 --- a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.c +++ b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.c @@ -21,53 +21,45 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ -#include -#include -#include -#include -#include -#include -#include -#include -#include + +#include #include -#include +#include #include "syncpt_cmdbuf_gv11b.h" #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT void gv11b_syncpt_add_wait_cmd(struct gk20a *g, - struct priv_cmd_entry *cmd, u32 off, + struct priv_cmd_entry *cmd, u32 id, u32 thresh, u64 gpu_va_base) { u64 gpu_va = gpu_va_base + nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(id); + u32 data[] = { + /* sema_addr_lo */ + 0x20010017, + nvgpu_safe_cast_u64_to_u32(gpu_va & 0xffffffffU), + + /* sema_addr_hi */ + 0x20010018, + nvgpu_safe_cast_u64_to_u32((gpu_va >> 32U) & 0xffU), + + /* payload_lo */ + 0x20010019, + thresh, + + /* payload_hi : ignored */ + 0x2001001a, + 0U, + + /* sema_execute : acq_strict_geq | switch_en | 32bit */ + 0x2001001b, + 0x2U | ((u32)1U << 12U), + }; nvgpu_log_fn(g, " "); - off = cmd->off + off; - - /* sema_addr_lo */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010017); - nvgpu_mem_wr32(g, cmd->mem, off++, - nvgpu_safe_cast_u64_to_u32(gpu_va & 0xffffffffU)); - - /* sema_addr_hi */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010018); - nvgpu_mem_wr32(g, cmd->mem, off++, - nvgpu_safe_cast_u64_to_u32((gpu_va >> 32U) & 0xffU)); - - /* payload_lo */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010019); - nvgpu_mem_wr32(g, cmd->mem, off++, thresh); - - /* payload_hi : ignored */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001a); - nvgpu_mem_wr32(g, cmd->mem, off++, 0U); - - /* sema_execute : acq_strict_geq | switch_en | 32bit */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b); - nvgpu_mem_wr32(g, cmd->mem, off, 0x2U | ((u32)1U << 12U)); + nvgpu_priv_cmdbuf_append(g, cmd, data, ARRAY_SIZE(data)); } u32 gv11b_syncpt_get_wait_cmd_size(void) @@ -84,32 +76,31 @@ void gv11b_syncpt_add_incr_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, u32 id, u64 gpu_va, bool wfi) { - u32 off = cmd->off; + u32 data[] = { + /* sema_addr_lo */ + 0x20010017, + nvgpu_safe_cast_u64_to_u32(gpu_va & 0xffffffffU), + + /* sema_addr_hi */ + 0x20010018, + nvgpu_safe_cast_u64_to_u32((gpu_va >> 32U) & 0xffU), + + /* payload_lo */ + 0x20010019, + 0, + + /* payload_hi : ignored */ + 0x2001001a, + 0, + + /* sema_execute : release | wfi | 32bit */ + 0x2001001b, + (0x1U | ((u32)(wfi ? 0x1U : 0x0U) << 20U)), + }; nvgpu_log_fn(g, " "); - /* sema_addr_lo */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010017); - nvgpu_mem_wr32(g, cmd->mem, off++, - nvgpu_safe_cast_u64_to_u32(gpu_va & 0xffffffffU)); - - /* sema_addr_hi */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010018); - nvgpu_mem_wr32(g, cmd->mem, off++, - nvgpu_safe_cast_u64_to_u32((gpu_va >> 32U) & 0xffU)); - - /* payload_lo */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010019); - nvgpu_mem_wr32(g, cmd->mem, off++, 0); - - /* payload_hi : ignored */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001a); - nvgpu_mem_wr32(g, cmd->mem, off++, 0); - - /* sema_execute : release | wfi | 32bit */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b); - nvgpu_mem_wr32(g, cmd->mem, off, (0x1U | - ((u32)(wfi ? 0x1U : 0x0U) << 20U))); + nvgpu_priv_cmdbuf_append(g, cmd, data, ARRAY_SIZE(data)); } u32 gv11b_syncpt_get_incr_cmd_size(bool wfi_cmd) diff --git a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.h b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.h index dc71f9cfd..ad1ce1497 100644 --- a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.h +++ b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.h @@ -35,7 +35,7 @@ struct vm_gk20a; #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT void gv11b_syncpt_add_wait_cmd(struct gk20a *g, - struct priv_cmd_entry *cmd, u32 off, + struct priv_cmd_entry *cmd, u32 id, u32 thresh, u64 gpu_va_base); u32 gv11b_syncpt_get_wait_cmd_size(void); u32 gv11b_syncpt_get_incr_per_release(void); @@ -58,7 +58,7 @@ int gv11b_syncpt_get_sync_ro_map(struct vm_gk20a *vm, #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT static inline void gv11b_syncpt_add_wait_cmd(struct gk20a *g, - struct priv_cmd_entry *cmd, u32 off, + struct priv_cmd_entry *cmd, u32 id, u32 thresh, u64 gpu_va_base) { } diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops_sync.h b/drivers/gpu/nvgpu/include/nvgpu/gops_sync.h index 8331bec38..350ad9430 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops_sync.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops_sync.h @@ -76,7 +76,7 @@ struct gops_sync { struct nvgpu_mem *syncpt_buf); #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT void (*add_wait_cmd)(struct gk20a *g, - struct priv_cmd_entry *cmd, u32 off, + struct priv_cmd_entry *cmd, u32 id, u32 thresh, u64 gpu_va_base); u32 (*get_wait_cmd_size)(void); void (*add_incr_cmd)(struct gk20a *g, @@ -97,7 +97,7 @@ struct gops_sync { u32 (*get_wait_cmd_size)(void); u32 (*get_incr_cmd_size)(void); void (*add_wait_cmd)(struct gk20a *g, - struct priv_cmd_entry *cmd, u32 off, + struct priv_cmd_entry *cmd, struct nvgpu_semaphore *s, u64 sema_va); void (*add_incr_cmd)(struct gk20a *g, struct priv_cmd_entry *cmd, diff --git a/drivers/gpu/nvgpu/include/nvgpu/priv_cmdbuf.h b/drivers/gpu/nvgpu/include/nvgpu/priv_cmdbuf.h index 161dea16c..4d9b06d4e 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/priv_cmdbuf.h +++ b/drivers/gpu/nvgpu/include/nvgpu/priv_cmdbuf.h @@ -33,6 +33,7 @@ struct priv_cmd_entry { bool valid; struct nvgpu_mem *mem; u32 off; /* offset in mem, in u32 entries */ + u32 fill_off; /* write offset from off, in u32 entries */ u64 gva; u32 get; /* start of entry in queue */ u32 size; /* in words */ @@ -48,4 +49,9 @@ void nvgpu_channel_free_priv_cmd_entry(struct nvgpu_channel *c, void nvgpu_channel_update_priv_cmd_q_and_free_entry(struct nvgpu_channel *ch, struct priv_cmd_entry *e); +void nvgpu_priv_cmdbuf_append(struct gk20a *g, struct priv_cmd_entry *e, + u32 *data, u32 entries); +void nvgpu_priv_cmdbuf_append_zeros(struct gk20a *g, struct priv_cmd_entry *e, + u32 entries); + #endif