gpu: nvgpu: add profiler apis to manage PMA stream

Support new IOCTL to manage PMA stream meta data by adding below API
nvgpu_prof_ioctl_pma_stream_update_get_put()

Add nvgpu_perfbuf_update_get_put() to handle all the updates coming
from userspace and to pass all required information.

Add gops.perf.update_get_put() to handle all HW accesses required in
perf HW unit.

Add gops.perf.bind_mem_bytes_buffer_addr() to bind the available bytes
buffer while binding HWPM streamout.

Bug 2510974
Jira NVGPU-5360

Change-Id: Ibacc2299b845e47776babc081759dfc4afde34fe
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2406484
Reviewed-by: automaticguardword <automaticguardword@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Antony Clince Alex <aalex@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Deepak Nibade
2020-08-06 11:34:34 +05:30
committed by Alex Waterman
parent 5844151a93
commit 221475f753
11 changed files with 159 additions and 0 deletions

View File

@@ -112,3 +112,47 @@ void nvgpu_perfbuf_deinit_vm(struct gk20a *g)
g->ops.perfbuf.deinit_inst_block(g); g->ops.perfbuf.deinit_inst_block(g);
nvgpu_vm_put(g->mm.perfbuf.vm); nvgpu_vm_put(g->mm.perfbuf.vm);
} }
int nvgpu_perfbuf_update_get_put(struct gk20a *g, u64 bytes_consumed,
u64 *bytes_available, void *cpuva, bool wait,
u64 *put_ptr, bool *overflowed)
{
struct nvgpu_timeout timeout;
int err;
bool update_available_bytes = (bytes_available == NULL) ? false : true;
volatile u32 *available_bytes_va = (u32 *)cpuva;
if (update_available_bytes) {
*available_bytes_va = 0xffffffff;
}
err = g->ops.perf.update_get_put(g, bytes_consumed,
update_available_bytes, put_ptr, overflowed);
if (err != 0) {
return err;
}
if (update_available_bytes && wait) {
err = nvgpu_timeout_init(g, &timeout, 10000, NVGPU_TIMER_CPU_TIMER);
if (err != 0) {
nvgpu_err(g, "nvgpu_timeout_init() failed err=%d", err);
return err;
}
do {
if (*available_bytes_va != 0xffffffff) {
break;
}
nvgpu_msleep(10);
} while (nvgpu_timeout_expired(&timeout) == 0);
if (*available_bytes_va == 0xffffffff) {
return -ETIMEDOUT;
}
*bytes_available = *available_bytes_va;
}
return 0;
}

View File

@@ -345,6 +345,7 @@ static int nvgpu_profiler_bind_hwpm_streamout(struct nvgpu_profiler_object *prof
return err; return err;
} }
g->ops.perf.bind_mem_bytes_buffer_addr(g, prof->pma_bytes_available_buffer_va);
return 0; return 0;
} }
@@ -353,6 +354,8 @@ static int nvgpu_profiler_unbind_hwpm_streamout(struct nvgpu_profiler_object *pr
struct gk20a *g = prof->g; struct gk20a *g = prof->g;
int err; int err;
g->ops.perf.bind_mem_bytes_buffer_addr(g, 0ULL);
err = g->ops.perfbuf.perfbuf_disable(g); err = g->ops.perfbuf.perfbuf_disable(g);
if (err) { if (err) {
return err; return err;

View File

@@ -1214,6 +1214,7 @@ static const struct gops_debugger gv11b_ops_debugger = {
static const struct gops_perf gv11b_ops_perf = { static const struct gops_perf gv11b_ops_perf = {
.enable_membuf = gv11b_perf_enable_membuf, .enable_membuf = gv11b_perf_enable_membuf,
.disable_membuf = gv11b_perf_disable_membuf, .disable_membuf = gv11b_perf_disable_membuf,
.bind_mem_bytes_buffer_addr = gv11b_perf_bind_mem_bytes_buffer_addr,
.init_inst_block = gv11b_perf_init_inst_block, .init_inst_block = gv11b_perf_init_inst_block,
.deinit_inst_block = gv11b_perf_deinit_inst_block, .deinit_inst_block = gv11b_perf_deinit_inst_block,
.membuf_reset_streaming = gv11b_perf_membuf_reset_streaming, .membuf_reset_streaming = gv11b_perf_membuf_reset_streaming,
@@ -1223,6 +1224,7 @@ static const struct gops_perf gv11b_ops_perf = {
.get_pmmsys_per_chiplet_offset = gv11b_perf_get_pmmsys_per_chiplet_offset, .get_pmmsys_per_chiplet_offset = gv11b_perf_get_pmmsys_per_chiplet_offset,
.get_pmmgpc_per_chiplet_offset = gv11b_perf_get_pmmgpc_per_chiplet_offset, .get_pmmgpc_per_chiplet_offset = gv11b_perf_get_pmmgpc_per_chiplet_offset,
.get_pmmfbp_per_chiplet_offset = gv11b_perf_get_pmmfbp_per_chiplet_offset, .get_pmmfbp_per_chiplet_offset = gv11b_perf_get_pmmfbp_per_chiplet_offset,
.update_get_put = gv11b_perf_update_get_put,
}; };
#endif #endif
@@ -1232,6 +1234,7 @@ static const struct gops_perfbuf gv11b_ops_perfbuf = {
.perfbuf_disable = nvgpu_perfbuf_disable_locked, .perfbuf_disable = nvgpu_perfbuf_disable_locked,
.init_inst_block = nvgpu_perfbuf_init_inst_block, .init_inst_block = nvgpu_perfbuf_init_inst_block,
.deinit_inst_block = nvgpu_perfbuf_deinit_inst_block, .deinit_inst_block = nvgpu_perfbuf_deinit_inst_block,
.update_get_put = nvgpu_perfbuf_update_get_put,
}; };
#endif #endif

View File

@@ -1282,6 +1282,7 @@ static const struct gops_debugger tu104_ops_debugger = {
static const struct gops_perf tu104_ops_perf = { static const struct gops_perf tu104_ops_perf = {
.enable_membuf = gv11b_perf_enable_membuf, .enable_membuf = gv11b_perf_enable_membuf,
.disable_membuf = gv11b_perf_disable_membuf, .disable_membuf = gv11b_perf_disable_membuf,
.bind_mem_bytes_buffer_addr = gv11b_perf_bind_mem_bytes_buffer_addr,
.init_inst_block = gv11b_perf_init_inst_block, .init_inst_block = gv11b_perf_init_inst_block,
.deinit_inst_block = gv11b_perf_deinit_inst_block, .deinit_inst_block = gv11b_perf_deinit_inst_block,
.membuf_reset_streaming = gv11b_perf_membuf_reset_streaming, .membuf_reset_streaming = gv11b_perf_membuf_reset_streaming,
@@ -1291,6 +1292,7 @@ static const struct gops_perf tu104_ops_perf = {
.get_pmmsys_per_chiplet_offset = gv11b_perf_get_pmmsys_per_chiplet_offset, .get_pmmsys_per_chiplet_offset = gv11b_perf_get_pmmsys_per_chiplet_offset,
.get_pmmgpc_per_chiplet_offset = gv11b_perf_get_pmmgpc_per_chiplet_offset, .get_pmmgpc_per_chiplet_offset = gv11b_perf_get_pmmgpc_per_chiplet_offset,
.get_pmmfbp_per_chiplet_offset = gv11b_perf_get_pmmfbp_per_chiplet_offset, .get_pmmfbp_per_chiplet_offset = gv11b_perf_get_pmmfbp_per_chiplet_offset,
.update_get_put = gv11b_perf_update_get_put,
}; };
#endif #endif
@@ -1300,6 +1302,7 @@ static const struct gops_perfbuf tu104_ops_perfbuf = {
.perfbuf_disable = nvgpu_perfbuf_disable_locked, .perfbuf_disable = nvgpu_perfbuf_disable_locked,
.init_inst_block = nvgpu_perfbuf_init_inst_block, .init_inst_block = nvgpu_perfbuf_init_inst_block,
.deinit_inst_block = nvgpu_perfbuf_deinit_inst_block, .deinit_inst_block = nvgpu_perfbuf_deinit_inst_block,
.update_get_put = nvgpu_perfbuf_update_get_put,
}; };
#endif #endif

View File

@@ -88,6 +88,43 @@ void gv11b_perf_disable_membuf(struct gk20a *g)
nvgpu_writel(g, perf_pmasys_outsize_r(), 0); nvgpu_writel(g, perf_pmasys_outsize_r(), 0);
} }
void gv11b_perf_bind_mem_bytes_buffer_addr(struct gk20a *g, u64 buf_addr)
{
u32 addr_lo;
buf_addr = buf_addr >> perf_pmasys_mem_bytes_addr_ptr_b();
addr_lo = nvgpu_safe_cast_u64_to_u32(buf_addr);
nvgpu_writel(g, perf_pmasys_mem_bytes_addr_r(),
perf_pmasys_mem_bytes_addr_ptr_f(addr_lo));
}
int gv11b_perf_update_get_put(struct gk20a *g, u64 bytes_consumed,
bool update_available_bytes, u64 *put_ptr,
bool *overflowed)
{
u32 val;
nvgpu_writel(g, perf_pmasys_mem_bump_r(), bytes_consumed);
if (update_available_bytes) {
val = nvgpu_readl(g, perf_pmasys_control_r());
val = set_field(val, perf_pmasys_control_update_bytes_m(),
perf_pmasys_control_update_bytes_doit_f());
nvgpu_writel(g, perf_pmasys_control_r(), val);
}
if (put_ptr) {
*put_ptr = (u64)nvgpu_readl(g, perf_pmasys_mem_head_r());
}
if (overflowed) {
*overflowed = g->ops.perf.get_membuf_overflow_status(g);
}
return 0;
}
void gv11b_perf_init_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block) void gv11b_perf_init_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
{ {
u32 inst_block_ptr = nvgpu_inst_block_ptr(g, inst_block); u32 inst_block_ptr = nvgpu_inst_block_ptr(g, inst_block);

View File

@@ -40,6 +40,11 @@ void gv11b_perf_membuf_reset_streaming(struct gk20a *g);
void gv11b_perf_enable_membuf(struct gk20a *g, u32 size, u64 buf_addr); void gv11b_perf_enable_membuf(struct gk20a *g, u32 size, u64 buf_addr);
void gv11b_perf_disable_membuf(struct gk20a *g); void gv11b_perf_disable_membuf(struct gk20a *g);
void gv11b_perf_bind_mem_bytes_buffer_addr(struct gk20a *g, u64 buf_addr);
int gv11b_perf_update_get_put(struct gk20a *g, u64 bytes_consumed, bool update_available_bytes,
u64 *put_ptr, bool *overflowed);
void gv11b_perf_init_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block); void gv11b_perf_init_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block);
void gv11b_perf_deinit_inst_block(struct gk20a *g); void gv11b_perf_deinit_inst_block(struct gk20a *g);

View File

@@ -46,6 +46,7 @@ struct gops_debugger {
struct gops_perf { struct gops_perf {
void (*enable_membuf)(struct gk20a *g, u32 size, u64 buf_addr); void (*enable_membuf)(struct gk20a *g, u32 size, u64 buf_addr);
void (*disable_membuf)(struct gk20a *g); void (*disable_membuf)(struct gk20a *g);
void (*bind_mem_bytes_buffer_addr)(struct gk20a *g, u64 buf_addr);
void (*init_inst_block)(struct gk20a *g, void (*init_inst_block)(struct gk20a *g,
struct nvgpu_mem *inst_block); struct nvgpu_mem *inst_block);
void (*deinit_inst_block)(struct gk20a *g); void (*deinit_inst_block)(struct gk20a *g);
@@ -57,12 +58,16 @@ struct gops_perf {
u32 (*get_pmmsys_per_chiplet_offset)(void); u32 (*get_pmmsys_per_chiplet_offset)(void);
u32 (*get_pmmgpc_per_chiplet_offset)(void); u32 (*get_pmmgpc_per_chiplet_offset)(void);
u32 (*get_pmmfbp_per_chiplet_offset)(void); u32 (*get_pmmfbp_per_chiplet_offset)(void);
int (*update_get_put)(struct gk20a *g, u64 bytes_consumed,
bool update_available_bytes, u64 *put_ptr, bool *overflowed);
}; };
struct gops_perfbuf { struct gops_perfbuf {
int (*perfbuf_enable)(struct gk20a *g, u64 offset, u32 size); int (*perfbuf_enable)(struct gk20a *g, u64 offset, u32 size);
int (*perfbuf_disable)(struct gk20a *g); int (*perfbuf_disable)(struct gk20a *g);
int (*init_inst_block)(struct gk20a *g); int (*init_inst_block)(struct gk20a *g);
void (*deinit_inst_block)(struct gk20a *g); void (*deinit_inst_block)(struct gk20a *g);
int (*update_get_put)(struct gk20a *g, u64 bytes_consumed, u64 *bytes_available,
void *cpuva, bool wait, u64 *put_ptr, bool *overflowed);
}; };
#endif #endif

View File

@@ -75,6 +75,8 @@
#define perf_pmasys_control_membuf_clear_status_v(r) (((r) >> 5U) & 0x1U) #define perf_pmasys_control_membuf_clear_status_v(r) (((r) >> 5U) & 0x1U)
#define perf_pmasys_control_membuf_clear_status_doit_v() (0x00000001U) #define perf_pmasys_control_membuf_clear_status_doit_v() (0x00000001U)
#define perf_pmasys_control_membuf_clear_status_doit_f() (0x20U) #define perf_pmasys_control_membuf_clear_status_doit_f() (0x20U)
#define perf_pmasys_control_update_bytes_m() (U32(0x1U) << 3U)
#define perf_pmasys_control_update_bytes_doit_f() (0x8U)
#define perf_pmasys_mem_block_r() (0x0024a070U) #define perf_pmasys_mem_block_r() (0x0024a070U)
#define perf_pmasys_mem_block_base_f(v) ((U32(v) & 0xfffffffU) << 0U) #define perf_pmasys_mem_block_base_f(v) ((U32(v) & 0xfffffffU) << 0U)
#define perf_pmasys_mem_block_target_f(v) ((U32(v) & 0x3U) << 28U) #define perf_pmasys_mem_block_target_f(v) ((U32(v) & 0x3U) << 28U)
@@ -101,6 +103,10 @@
#define perf_pmasys_mem_bytes_numbytes_f(v) ((U32(v) & 0xfffffffU) << 4U) #define perf_pmasys_mem_bytes_numbytes_f(v) ((U32(v) & 0xfffffffU) << 4U)
#define perf_pmasys_mem_bump_r() (0x0024a088U) #define perf_pmasys_mem_bump_r() (0x0024a088U)
#define perf_pmasys_mem_bump_numbytes_f(v) ((U32(v) & 0xfffffffU) << 4U) #define perf_pmasys_mem_bump_numbytes_f(v) ((U32(v) & 0xfffffffU) << 4U)
#define perf_pmasys_mem_head_r() (0x0024a080U)
#define perf_pmasys_mem_bytes_addr_r() (0x0024a08cU)
#define perf_pmasys_mem_bytes_addr_ptr_f(v) ((U32(v) & 0x3fffffffU) << 2U)
#define perf_pmasys_mem_bytes_addr_ptr_b() (2U)
#define perf_pmasys_enginestatus_r() (0x0024a0a4U) #define perf_pmasys_enginestatus_r() (0x0024a0a4U)
#define perf_pmasys_enginestatus_rbufempty_f(v) ((U32(v) & 0x1U) << 4U) #define perf_pmasys_enginestatus_rbufempty_f(v) ((U32(v) & 0x1U) << 4U)
#define perf_pmasys_enginestatus_rbufempty_empty_v() (0x00000001U) #define perf_pmasys_enginestatus_rbufempty_empty_v() (0x00000001U)

View File

@@ -75,6 +75,8 @@
#define perf_pmasys_control_membuf_clear_status_v(r) (((r) >> 5U) & 0x1U) #define perf_pmasys_control_membuf_clear_status_v(r) (((r) >> 5U) & 0x1U)
#define perf_pmasys_control_membuf_clear_status_doit_v() (0x00000001U) #define perf_pmasys_control_membuf_clear_status_doit_v() (0x00000001U)
#define perf_pmasys_control_membuf_clear_status_doit_f() (0x20U) #define perf_pmasys_control_membuf_clear_status_doit_f() (0x20U)
#define perf_pmasys_control_update_bytes_m() (U32(0x1U) << 3U)
#define perf_pmasys_control_update_bytes_doit_f() (0x8U)
#define perf_pmasys_mem_block_r() (0x0024a070U) #define perf_pmasys_mem_block_r() (0x0024a070U)
#define perf_pmasys_mem_block_base_f(v) ((U32(v) & 0xfffffffU) << 0U) #define perf_pmasys_mem_block_base_f(v) ((U32(v) & 0xfffffffU) << 0U)
#define perf_pmasys_mem_block_target_f(v) ((U32(v) & 0x3U) << 28U) #define perf_pmasys_mem_block_target_f(v) ((U32(v) & 0x3U) << 28U)
@@ -101,6 +103,10 @@
#define perf_pmasys_mem_bytes_numbytes_f(v) ((U32(v) & 0xfffffffU) << 4U) #define perf_pmasys_mem_bytes_numbytes_f(v) ((U32(v) & 0xfffffffU) << 4U)
#define perf_pmasys_mem_bump_r() (0x0024a088U) #define perf_pmasys_mem_bump_r() (0x0024a088U)
#define perf_pmasys_mem_bump_numbytes_f(v) ((U32(v) & 0xfffffffU) << 4U) #define perf_pmasys_mem_bump_numbytes_f(v) ((U32(v) & 0xfffffffU) << 4U)
#define perf_pmasys_mem_head_r() (0x0024a080U)
#define perf_pmasys_mem_bytes_addr_r() (0x0024a08cU)
#define perf_pmasys_mem_bytes_addr_ptr_f(v) ((U32(v) & 0x3fffffffU) << 2U)
#define perf_pmasys_mem_bytes_addr_ptr_b() (2U)
#define perf_pmasys_enginestatus_r() (0x0024a0a4U) #define perf_pmasys_enginestatus_r() (0x0024a0a4U)
#define perf_pmasys_enginestatus_rbufempty_f(v) ((U32(v) & 0x1U) << 4U) #define perf_pmasys_enginestatus_rbufempty_f(v) ((U32(v) & 0x1U) << 4U)
#define perf_pmasys_enginestatus_rbufempty_empty_v() (0x00000001U) #define perf_pmasys_enginestatus_rbufempty_empty_v() (0x00000001U)

View File

@@ -38,5 +38,8 @@ void nvgpu_perfbuf_deinit_vm(struct gk20a *g);
int nvgpu_perfbuf_init_inst_block(struct gk20a *g); int nvgpu_perfbuf_init_inst_block(struct gk20a *g);
void nvgpu_perfbuf_deinit_inst_block(struct gk20a *g); void nvgpu_perfbuf_deinit_inst_block(struct gk20a *g);
int nvgpu_perfbuf_update_get_put(struct gk20a *g, u64 bytes_consumed, u64 *bytes_available,
void *cpuva, bool wait, u64 *put_ptr, bool *overflowed);
#endif /* CONFIG_NVGPU_DEBUGGER */ #endif /* CONFIG_NVGPU_DEBUGGER */
#endif #endif

View File

@@ -648,6 +648,45 @@ static int nvgpu_prof_ioctl_exec_reg_ops(struct nvgpu_profiler_object_priv *priv
return err; return err;
} }
static int nvgpu_prof_ioctl_pma_stream_update_get_put(struct nvgpu_profiler_object *prof,
struct nvgpu_profiler_pma_stream_update_get_put_args *args)
{
bool update_bytes_available = args->flags &
NVGPU_PROFILER_PMA_STREAM_UPDATE_GET_PUT_ARG_FLAG_UPDATE_AVAILABLE_BYTES;
bool wait = args->flags &
NVGPU_PROFILER_PMA_STREAM_UPDATE_GET_PUT_ARG_FLAG_WAIT_FOR_UPDATE;
bool update_put_ptr = args->flags &
NVGPU_PROFILER_PMA_STREAM_UPDATE_GET_PUT_ARG_FLAG_RETURN_PUT_PTR;
struct gk20a *g = prof->g;
bool overflowed;
int err;
nvgpu_log(g, gpu_dbg_prof,
"Update PMA stream request %u: flags = 0x%x bytes_consumed=%llu",
prof->prof_handle, args->flags, args->bytes_consumed);
err = nvgpu_perfbuf_update_get_put(prof->g, args->bytes_consumed,
update_bytes_available ? &args->bytes_available : NULL,
prof->pma_bytes_available_buffer_cpuva, wait,
update_put_ptr ? &args->put_ptr : NULL,
&overflowed);
if (err != 0) {
return err;
}
if (overflowed) {
args->flags |=
NVGPU_PROFILER_PMA_STREAM_UPDATE_GET_PUT_ARG_FLAG_OVERFLOW_TRIGGERED;
}
nvgpu_log(g, gpu_dbg_prof,
"Update PMA stream request %u complete: flags = 0x%x"
"bytes_available=%llu put_ptr=%llu",
prof->prof_handle, args->flags, args->bytes_available, args->put_ptr);
return 0;
}
long nvgpu_prof_fops_ioctl(struct file *filp, unsigned int cmd, long nvgpu_prof_fops_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg) unsigned long arg)
{ {
@@ -720,6 +759,11 @@ long nvgpu_prof_fops_ioctl(struct file *filp, unsigned int cmd,
(struct nvgpu_profiler_exec_reg_ops_args *)buf); (struct nvgpu_profiler_exec_reg_ops_args *)buf);
break; break;
case NVGPU_PROFILER_IOCTL_PMA_STREAM_UPDATE_GET_PUT:
err = nvgpu_prof_ioctl_pma_stream_update_get_put(prof,
(struct nvgpu_profiler_pma_stream_update_get_put_args *)buf);
break;
default: default:
nvgpu_err(g, "unrecognized profiler ioctl cmd: 0x%x", cmd); nvgpu_err(g, "unrecognized profiler ioctl cmd: 0x%x", cmd);
err = -ENOTTY; err = -ENOTTY;