gpu: nvgpu: hide priv cmdbuf mem writes

Add an API to append data to a priv cmdbuf entry. Hold the write pointer
offset internally in the entry instead of having the user keep track of
where those words are written to.

This helps in eventually hiding struct priv_cmd_entry from users and
provides a more consistent interface in general. The wait and incr
commands are now slightly easier to read as well when they're just
arrays of data.

A syncfd-backed prefence may be composed of several individual fences.
Some of those (or even a fence backed by just one) may be already
expired, and currently the syncfd export design releases and nulls
semaphores when expired (see gk20a_sync_pt_has_signaled()) so for those
the wait cmdbuf is appended with zeros; the specific function is for
this purpose.

Jira NVGPU-4548

Change-Id: I1057f98c1b5b407460aa6e1dcba917da9c9aa9c9
Signed-off-by: Konsta Hölttä <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2325099
(cherry picked from commit 6a00a65a86d8249cfeb06a05682abb4771949f19)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2331336
Reviewed-by: automaticguardword <automaticguardword@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Konsta Hölttä
2020-04-03 11:04:02 +03:00
committed by Alex Waterman
parent 0c9f589f3f
commit 39844fb27c
13 changed files with 217 additions and 200 deletions

View File

@@ -170,6 +170,7 @@ int nvgpu_channel_alloc_priv_cmdbuf(struct nvgpu_channel *c, u32 orig_size,
return -EAGAIN;
}
e->fill_off = 0;
e->size = orig_size;
e->mem = &q->mem;
@@ -237,3 +238,21 @@ void nvgpu_channel_update_priv_cmd_q_and_free_entry(
nvgpu_channel_free_priv_cmd_entry(ch, e);
}
void nvgpu_priv_cmdbuf_append(struct gk20a *g, struct priv_cmd_entry *e,
u32 *data, u32 entries)
{
nvgpu_assert(e->fill_off + entries <= e->size);
nvgpu_mem_wr_n(g, e->mem, (e->off + e->fill_off) * sizeof(u32),
data, entries * sizeof(u32));
e->fill_off += entries;
}
void nvgpu_priv_cmdbuf_append_zeros(struct gk20a *g, struct priv_cmd_entry *e,
u32 entries)
{
nvgpu_assert(e->fill_off + entries <= e->size);
nvgpu_memset(g, e->mem, (e->off + e->fill_off) * sizeof(u32),
0, entries * sizeof(u32));
e->fill_off += entries;
}

View File

@@ -57,8 +57,7 @@ nvgpu_channel_sync_semaphore_from_base(struct nvgpu_channel_sync *base)
}
static void add_sema_wait_cmd(struct gk20a *g, struct nvgpu_channel *c,
struct nvgpu_semaphore *s, struct priv_cmd_entry *cmd,
u32 offset)
struct nvgpu_semaphore *s, struct priv_cmd_entry *cmd)
{
int ch = c->chid;
u64 va;
@@ -66,12 +65,12 @@ static void add_sema_wait_cmd(struct gk20a *g, struct nvgpu_channel *c,
/* acquire just needs to read the mem. */
va = nvgpu_semaphore_gpu_ro_va(s);
g->ops.sync.sema.add_wait_cmd(g, cmd, offset, s, va);
g->ops.sync.sema.add_wait_cmd(g, cmd, s, va);
gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u pool=%-3llu"
"va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u",
"va=0x%llx cmd_mem=0x%llx b=0x%llx",
ch, nvgpu_semaphore_get_value(s),
nvgpu_semaphore_get_hw_pool_page_idx(s),
va, cmd->gva, cmd->mem->gpu_va, offset);
va, cmd->gva, cmd->mem->gpu_va);
}
static void add_sema_incr_cmd(struct gk20a *g, struct nvgpu_channel *c,
@@ -98,20 +97,17 @@ static void add_sema_incr_cmd(struct gk20a *g, struct nvgpu_channel *c,
static void channel_sync_semaphore_gen_wait_cmd(struct nvgpu_channel *c,
struct nvgpu_semaphore *sema, struct priv_cmd_entry *wait_cmd,
u32 wait_cmd_size, u32 pos)
u32 wait_cmd_size)
{
bool has_incremented;
if (sema == NULL) {
/* expired */
nvgpu_memset(c->g, wait_cmd->mem,
(wait_cmd->off + pos * wait_cmd_size) * (u32)sizeof(u32),
0, wait_cmd_size * (u32)sizeof(u32));
/* came from an expired sync fence */
nvgpu_priv_cmdbuf_append_zeros(c->g, wait_cmd, wait_cmd_size);
} else {
has_incremented = nvgpu_semaphore_can_wait(sema);
nvgpu_assert(has_incremented);
add_sema_wait_cmd(c->g, c, sema, wait_cmd,
pos * wait_cmd_size);
add_sema_wait_cmd(c->g, c, sema, wait_cmd);
nvgpu_semaphore_put(sema);
}
}
@@ -163,7 +159,7 @@ static int channel_sync_semaphore_wait_fd(
nvgpu_os_fence_sema_extract_nth_semaphore(
&os_fence_sema, i, &semaphore);
channel_sync_semaphore_gen_wait_cmd(c, semaphore, entry,
wait_cmd_size, i);
wait_cmd_size);
}
cleanup:

View File

@@ -58,7 +58,7 @@ nvgpu_channel_sync_syncpt_from_base(struct nvgpu_channel_sync *base)
static int channel_sync_syncpt_gen_wait_cmd(struct nvgpu_channel *c,
u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd,
u32 wait_cmd_size, u32 pos, bool preallocated)
u32 wait_cmd_size, bool preallocated)
{
int err = 0;
@@ -73,9 +73,8 @@ static int channel_sync_syncpt_gen_wait_cmd(struct nvgpu_channel *c,
}
nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
id, c->vm->syncpt_ro_map_gpu_va);
c->g->ops.sync.syncpt.add_wait_cmd(c->g, wait_cmd,
pos * wait_cmd_size, id, thresh,
c->vm->syncpt_ro_map_gpu_va);
c->g->ops.sync.syncpt.add_wait_cmd(c->g, wait_cmd, id, thresh,
c->vm->syncpt_ro_map_gpu_va);
return 0;
}
@@ -92,7 +91,7 @@ static int channel_sync_syncpt_wait_raw(struct nvgpu_channel_sync_syncpt *s,
}
err = channel_sync_syncpt_gen_wait_cmd(c, id, thresh,
wait_cmd, wait_cmd_size, 0, false);
wait_cmd, wait_cmd_size, false);
return err;
}
@@ -154,7 +153,7 @@ static int channel_sync_syncpt_wait_fd(struct nvgpu_channel_sync *s, int fd,
nvgpu_os_fence_syncpt_extract_nth_syncpt(
&os_fence_syncpt, i, &syncpt_id, &syncpt_thresh);
err = channel_sync_syncpt_gen_wait_cmd(c, syncpt_id,
syncpt_thresh, wait_cmd, wait_cmd_size, i, true);
syncpt_thresh, wait_cmd, wait_cmd_size, true);
}
cleanup:
@@ -384,5 +383,3 @@ nvgpu_channel_sync_syncpt_create(struct nvgpu_channel *c)
return &sp->base;
}

View File

@@ -21,11 +21,9 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/log.h>
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/semaphore.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/channel.h>
#include <nvgpu/priv_cmdbuf.h>
#include "sema_cmdbuf_gk20a.h"
@@ -40,66 +38,66 @@ u32 gk20a_sema_get_incr_cmd_size(void)
return 10U;
}
static u32 gk20a_sema_add_header(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
u64 sema_va)
static void gk20a_sema_add_header(struct gk20a *g,
struct priv_cmd_entry *cmd, u64 sema_va)
{
/* semaphore_a */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004U);
/* offset_upper */
nvgpu_mem_wr32(g, cmd->mem, off++, (u32)(sema_va >> 32) & 0xffU);
/* semaphore_b */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010005U);
/* offset */
nvgpu_mem_wr32(g, cmd->mem, off++, (u32)sema_va & 0xffffffff);
u32 data[] = {
/* semaphore_a */
0x20010004U,
/* offset_upper */
(u32)(sema_va >> 32) & 0xffU,
/* semaphore_b */
0x20010005U,
/* offset */
(u32)sema_va & 0xffffffff,
};
return off;
nvgpu_priv_cmdbuf_append(g, cmd, data, ARRAY_SIZE(data));
}
void gk20a_sema_add_wait_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
struct priv_cmd_entry *cmd,
struct nvgpu_semaphore *s, u64 sema_va)
{
u32 data[] = {
/* semaphore_c */
0x20010006U,
/* payload */
nvgpu_semaphore_get_value(s),
/* semaphore_d */
0x20010007U,
/* operation: acq_geq, switch_en */
0x4U | BIT32(12),
};
nvgpu_log_fn(g, " ");
off = cmd->off + off;
off = gk20a_sema_add_header(g, cmd, off, sema_va);
/* semaphore_c */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006U);
/* payload */
nvgpu_mem_wr32(g, cmd->mem, off++,
nvgpu_semaphore_get_value(s));
/* semaphore_d */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007U);
/* operation: acq_geq, switch_en */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x4U | BIT32(12));
gk20a_sema_add_header(g, cmd, sema_va);
nvgpu_priv_cmdbuf_append(g, cmd, data, ARRAY_SIZE(data));
}
void gk20a_sema_add_incr_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd,
struct nvgpu_semaphore *s, u64 sema_va,
bool wfi)
{
u32 off = cmd->off;
u32 data[] = {
/* semaphore_c */
0x20010006U,
/* payload */
nvgpu_semaphore_get_value(s),
/* semaphore_d */
0x20010007U,
/* operation: release, wfi */
0x2UL | ((wfi ? 0x0UL : 0x1UL) << 20),
/* non_stall_int */
0x20010008U,
/* ignored */
0U,
};
nvgpu_log_fn(g, " ");
off = gk20a_sema_add_header(g, cmd, off, sema_va);
/* semaphore_c */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006U);
/* payload */
nvgpu_mem_wr32(g, cmd->mem, off++,
nvgpu_semaphore_get_value(s));
/* semaphore_d */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007U);
/* operation: release, wfi */
nvgpu_mem_wr32(g, cmd->mem, off++,
0x2UL | ((wfi ? 0x0UL : 0x1UL) << 20));
/* non_stall_int */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008U);
/* ignored */
nvgpu_mem_wr32(g, cmd->mem, off++, 0U);
gk20a_sema_add_header(g, cmd, sema_va);
nvgpu_priv_cmdbuf_append(g, cmd, data, ARRAY_SIZE(data));
}

View File

@@ -31,7 +31,7 @@ struct nvgpu_semaphore;
u32 gk20a_sema_get_wait_cmd_size(void);
u32 gk20a_sema_get_incr_cmd_size(void);
void gk20a_sema_add_wait_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
struct priv_cmd_entry *cmd,
struct nvgpu_semaphore *s, u64 sema_va);
void gk20a_sema_add_incr_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd,

View File

@@ -22,10 +22,8 @@
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/log.h>
#include <nvgpu/semaphore.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/channel.h>
#include <nvgpu/priv_cmdbuf.h>
#include "sema_cmdbuf_gv11b.h"
@@ -40,41 +38,45 @@ u32 gv11b_sema_get_incr_cmd_size(void)
return 12U;
}
static u32 gv11b_sema_add_header(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
static void gv11b_sema_add_header(struct gk20a *g,
struct priv_cmd_entry *cmd,
struct nvgpu_semaphore *s, u64 sema_va)
{
/* sema_addr_lo */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010017);
nvgpu_mem_wr32(g, cmd->mem, off++, sema_va & 0xffffffffULL);
u32 data[] = {
/* sema_addr_lo */
0x20010017,
sema_va & 0xffffffffULL,
/* sema_addr_hi */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010018);
nvgpu_mem_wr32(g, cmd->mem, off++, (sema_va >> 32ULL) & 0xffULL);
/* sema_addr_hi */
0x20010018,
(sema_va >> 32ULL) & 0xffULL,
/* payload_lo */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010019);
nvgpu_mem_wr32(g, cmd->mem, off++, nvgpu_semaphore_get_value(s));
/* payload_lo */
0x20010019,
nvgpu_semaphore_get_value(s),
/* payload_hi : ignored */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001a);
nvgpu_mem_wr32(g, cmd->mem, off++, 0);
/* payload_hi : ignored */
0x2001001a,
0,
};
return off;
nvgpu_priv_cmdbuf_append(g, cmd, data, ARRAY_SIZE(data));
}
void gv11b_sema_add_wait_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
struct priv_cmd_entry *cmd,
struct nvgpu_semaphore *s, u64 sema_va)
{
u32 data[] = {
/* sema_execute : acq_strict_geq | switch_en | 32bit */
0x2001001b,
U32(0x2) | BIT32(12),
};
nvgpu_log_fn(g, " ");
off = cmd->off + off;
off = gv11b_sema_add_header(g, cmd, off, s, sema_va);
/* sema_execute : acq_strict_geq | switch_en | 32bit */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b);
nvgpu_mem_wr32(g, cmd->mem, off++, U32(0x2) | BIT32(12));
gv11b_sema_add_header(g, cmd, s, sema_va);
nvgpu_priv_cmdbuf_append(g, cmd, data, ARRAY_SIZE(data));
}
void gv11b_sema_add_incr_cmd(struct gk20a *g,
@@ -82,18 +84,18 @@ void gv11b_sema_add_incr_cmd(struct gk20a *g,
struct nvgpu_semaphore *s, u64 sema_va,
bool wfi)
{
u32 off = cmd->off;
u32 data[] = {
/* sema_execute : release | wfi | 32bit */
0x2001001b,
U32(0x1) | ((wfi ? U32(0x1) : U32(0x0)) << 20U),
/* non_stall_int : payload is ignored */
0x20010008,
0,
};
nvgpu_log_fn(g, " ");
off = gv11b_sema_add_header(g, cmd, off, s, sema_va);
/* sema_execute : release | wfi | 32bit */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b);
nvgpu_mem_wr32(g, cmd->mem, off++,
U32(0x1) | ((wfi ? U32(0x1) : U32(0x0)) << 20U));
/* non_stall_int : payload is ignored */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008);
nvgpu_mem_wr32(g, cmd->mem, off++, 0);
gv11b_sema_add_header(g, cmd, s, sema_va);
nvgpu_priv_cmdbuf_append(g, cmd, data, ARRAY_SIZE(data));
}

View File

@@ -31,7 +31,7 @@ struct nvgpu_semaphore;
u32 gv11b_sema_get_wait_cmd_size(void);
u32 gv11b_sema_get_incr_cmd_size(void);
void gv11b_sema_add_wait_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
struct priv_cmd_entry *cmd,
struct nvgpu_semaphore *s, u64 sema_va);
void gv11b_sema_add_incr_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd,

View File

@@ -22,29 +22,30 @@
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/channel.h>
#include <nvgpu/log.h>
#include <nvgpu/priv_cmdbuf.h>
#include "syncpt_cmdbuf_gk20a.h"
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
void gk20a_syncpt_add_wait_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
struct priv_cmd_entry *cmd,
u32 id, u32 thresh, u64 gpu_va_base)
{
u32 data[] = {
/* syncpoint_a */
0x2001001CU,
/* payload */
thresh,
/* syncpoint_b */
0x2001001DU,
/* syncpt_id, switch_en, wait */
(id << 8U) | 0x10U,
};
nvgpu_log_fn(g, " ");
off = cmd->off + off;
/* syncpoint_a */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001CU);
/* payload */
nvgpu_mem_wr32(g, cmd->mem, off++, thresh);
/* syncpoint_b */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001DU);
/* syncpt_id, switch_en, wait */
nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8U) | 0x10U);
nvgpu_priv_cmdbuf_append(g, cmd, data, ARRAY_SIZE(data));
}
u32 gk20a_syncpt_get_wait_cmd_size(void)
@@ -61,28 +62,35 @@ void gk20a_syncpt_add_incr_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd,
u32 id, u64 gpu_va, bool wfi)
{
u32 off = cmd->off;
u32 wfi_data[] = {
/* wfi */
0x2001001EU,
/* handle, ignored */
0x00000000U,
};
u32 incr_data[] = {
/* syncpoint_a */
0x2001001CU,
/* payload, ignored */
0U,
/* syncpoint_b */
0x2001001DU,
/* syncpt_id, incr */
(id << 8U) | 0x1U,
/* syncpoint_b */
0x2001001DU,
/* syncpt_id, incr */
(id << 8U) | 0x1U,
};
nvgpu_log_fn(g, " ");
if (wfi) {
/* wfi */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001EU);
/* handle, ignored */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x00000000U);
}
/* syncpoint_a */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001CU);
/* payload, ignored */
nvgpu_mem_wr32(g, cmd->mem, off++, 0U);
/* syncpoint_b */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001DU);
/* syncpt_id, incr */
nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8U) | 0x1U);
/* syncpoint_b */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001DU);
/* syncpt_id, incr */
nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8U) | 0x1U);
if (wfi) {
nvgpu_priv_cmdbuf_append(g, cmd, wfi_data,
ARRAY_SIZE(wfi_data));
}
nvgpu_priv_cmdbuf_append(g, cmd, incr_data, ARRAY_SIZE(incr_data));
}
u32 gk20a_syncpt_get_incr_cmd_size(bool wfi_cmd)

View File

@@ -32,7 +32,7 @@ struct nvgpu_mem;
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
void gk20a_syncpt_add_wait_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
struct priv_cmd_entry *cmd,
u32 id, u32 thresh, u64 gpu_va_base);
u32 gk20a_syncpt_get_wait_cmd_size(void);
u32 gk20a_syncpt_get_incr_per_release(void);
@@ -52,7 +52,7 @@ int gk20a_syncpt_alloc_buf(struct nvgpu_channel *c,
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
static inline void gk20a_syncpt_add_wait_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
struct priv_cmd_entry *cmd,
u32 id, u32 thresh, u64 gpu_va_base)
{
}

View File

@@ -21,53 +21,45 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/mm.h>
#include <nvgpu/vm.h>
#include <nvgpu/gmmu.h>
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/dma.h>
#include <nvgpu/lock.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/channel.h>
#include <nvgpu/priv_cmdbuf.h>
#include <nvgpu/log.h>
#include <nvgpu/nvhost.h>
#include <nvgpu/static_analysis.h>
#include <nvgpu/priv_cmdbuf.h>
#include "syncpt_cmdbuf_gv11b.h"
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
void gv11b_syncpt_add_wait_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
struct priv_cmd_entry *cmd,
u32 id, u32 thresh, u64 gpu_va_base)
{
u64 gpu_va = gpu_va_base +
nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(id);
u32 data[] = {
/* sema_addr_lo */
0x20010017,
nvgpu_safe_cast_u64_to_u32(gpu_va & 0xffffffffU),
/* sema_addr_hi */
0x20010018,
nvgpu_safe_cast_u64_to_u32((gpu_va >> 32U) & 0xffU),
/* payload_lo */
0x20010019,
thresh,
/* payload_hi : ignored */
0x2001001a,
0U,
/* sema_execute : acq_strict_geq | switch_en | 32bit */
0x2001001b,
0x2U | ((u32)1U << 12U),
};
nvgpu_log_fn(g, " ");
off = cmd->off + off;
/* sema_addr_lo */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010017);
nvgpu_mem_wr32(g, cmd->mem, off++,
nvgpu_safe_cast_u64_to_u32(gpu_va & 0xffffffffU));
/* sema_addr_hi */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010018);
nvgpu_mem_wr32(g, cmd->mem, off++,
nvgpu_safe_cast_u64_to_u32((gpu_va >> 32U) & 0xffU));
/* payload_lo */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010019);
nvgpu_mem_wr32(g, cmd->mem, off++, thresh);
/* payload_hi : ignored */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001a);
nvgpu_mem_wr32(g, cmd->mem, off++, 0U);
/* sema_execute : acq_strict_geq | switch_en | 32bit */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b);
nvgpu_mem_wr32(g, cmd->mem, off, 0x2U | ((u32)1U << 12U));
nvgpu_priv_cmdbuf_append(g, cmd, data, ARRAY_SIZE(data));
}
u32 gv11b_syncpt_get_wait_cmd_size(void)
@@ -84,32 +76,31 @@ void gv11b_syncpt_add_incr_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd,
u32 id, u64 gpu_va, bool wfi)
{
u32 off = cmd->off;
u32 data[] = {
/* sema_addr_lo */
0x20010017,
nvgpu_safe_cast_u64_to_u32(gpu_va & 0xffffffffU),
/* sema_addr_hi */
0x20010018,
nvgpu_safe_cast_u64_to_u32((gpu_va >> 32U) & 0xffU),
/* payload_lo */
0x20010019,
0,
/* payload_hi : ignored */
0x2001001a,
0,
/* sema_execute : release | wfi | 32bit */
0x2001001b,
(0x1U | ((u32)(wfi ? 0x1U : 0x0U) << 20U)),
};
nvgpu_log_fn(g, " ");
/* sema_addr_lo */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010017);
nvgpu_mem_wr32(g, cmd->mem, off++,
nvgpu_safe_cast_u64_to_u32(gpu_va & 0xffffffffU));
/* sema_addr_hi */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010018);
nvgpu_mem_wr32(g, cmd->mem, off++,
nvgpu_safe_cast_u64_to_u32((gpu_va >> 32U) & 0xffU));
/* payload_lo */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010019);
nvgpu_mem_wr32(g, cmd->mem, off++, 0);
/* payload_hi : ignored */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001a);
nvgpu_mem_wr32(g, cmd->mem, off++, 0);
/* sema_execute : release | wfi | 32bit */
nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b);
nvgpu_mem_wr32(g, cmd->mem, off, (0x1U |
((u32)(wfi ? 0x1U : 0x0U) << 20U)));
nvgpu_priv_cmdbuf_append(g, cmd, data, ARRAY_SIZE(data));
}
u32 gv11b_syncpt_get_incr_cmd_size(bool wfi_cmd)

View File

@@ -35,7 +35,7 @@ struct vm_gk20a;
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
void gv11b_syncpt_add_wait_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
struct priv_cmd_entry *cmd,
u32 id, u32 thresh, u64 gpu_va_base);
u32 gv11b_syncpt_get_wait_cmd_size(void);
u32 gv11b_syncpt_get_incr_per_release(void);
@@ -58,7 +58,7 @@ int gv11b_syncpt_get_sync_ro_map(struct vm_gk20a *vm,
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
static inline void gv11b_syncpt_add_wait_cmd(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
struct priv_cmd_entry *cmd,
u32 id, u32 thresh, u64 gpu_va_base)
{
}

View File

@@ -76,7 +76,7 @@ struct gops_sync {
struct nvgpu_mem *syncpt_buf);
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
void (*add_wait_cmd)(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
struct priv_cmd_entry *cmd,
u32 id, u32 thresh, u64 gpu_va_base);
u32 (*get_wait_cmd_size)(void);
void (*add_incr_cmd)(struct gk20a *g,
@@ -97,7 +97,7 @@ struct gops_sync {
u32 (*get_wait_cmd_size)(void);
u32 (*get_incr_cmd_size)(void);
void (*add_wait_cmd)(struct gk20a *g,
struct priv_cmd_entry *cmd, u32 off,
struct priv_cmd_entry *cmd,
struct nvgpu_semaphore *s, u64 sema_va);
void (*add_incr_cmd)(struct gk20a *g,
struct priv_cmd_entry *cmd,

View File

@@ -33,6 +33,7 @@ struct priv_cmd_entry {
bool valid;
struct nvgpu_mem *mem;
u32 off; /* offset in mem, in u32 entries */
u32 fill_off; /* write offset from off, in u32 entries */
u64 gva;
u32 get; /* start of entry in queue */
u32 size; /* in words */
@@ -48,4 +49,9 @@ void nvgpu_channel_free_priv_cmd_entry(struct nvgpu_channel *c,
void nvgpu_channel_update_priv_cmd_q_and_free_entry(struct nvgpu_channel *ch,
struct priv_cmd_entry *e);
void nvgpu_priv_cmdbuf_append(struct gk20a *g, struct priv_cmd_entry *e,
u32 *data, u32 entries);
void nvgpu_priv_cmdbuf_append_zeros(struct gk20a *g, struct priv_cmd_entry *e,
u32 entries);
#endif