mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: add profiler apis to manage PMA stream
Support new IOCTL to manage PMA stream meta data by adding below API nvgpu_prof_ioctl_pma_stream_update_get_put() Add nvgpu_perfbuf_update_get_put() to handle all the updates coming from userspace and to pass all required information. Add gops.perf.update_get_put() to handle all HW accesses required in perf HW unit. Add gops.perf.bind_mem_bytes_buffer_addr() to bind the available bytes buffer while binding HWPM streamout. Bug 2510974 Jira NVGPU-5360 Change-Id: Ibacc2299b845e47776babc081759dfc4afde34fe Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2406484 Reviewed-by: automaticguardword <automaticguardword@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: Antony Clince Alex <aalex@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
Alex Waterman
parent
5844151a93
commit
221475f753
@@ -112,3 +112,47 @@ void nvgpu_perfbuf_deinit_vm(struct gk20a *g)
|
|||||||
g->ops.perfbuf.deinit_inst_block(g);
|
g->ops.perfbuf.deinit_inst_block(g);
|
||||||
nvgpu_vm_put(g->mm.perfbuf.vm);
|
nvgpu_vm_put(g->mm.perfbuf.vm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int nvgpu_perfbuf_update_get_put(struct gk20a *g, u64 bytes_consumed,
|
||||||
|
u64 *bytes_available, void *cpuva, bool wait,
|
||||||
|
u64 *put_ptr, bool *overflowed)
|
||||||
|
{
|
||||||
|
struct nvgpu_timeout timeout;
|
||||||
|
int err;
|
||||||
|
bool update_available_bytes = (bytes_available == NULL) ? false : true;
|
||||||
|
volatile u32 *available_bytes_va = (u32 *)cpuva;
|
||||||
|
|
||||||
|
if (update_available_bytes) {
|
||||||
|
*available_bytes_va = 0xffffffff;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = g->ops.perf.update_get_put(g, bytes_consumed,
|
||||||
|
update_available_bytes, put_ptr, overflowed);
|
||||||
|
if (err != 0) {
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (update_available_bytes && wait) {
|
||||||
|
err = nvgpu_timeout_init(g, &timeout, 10000, NVGPU_TIMER_CPU_TIMER);
|
||||||
|
if (err != 0) {
|
||||||
|
nvgpu_err(g, "nvgpu_timeout_init() failed err=%d", err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
do {
|
||||||
|
if (*available_bytes_va != 0xffffffff) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
nvgpu_msleep(10);
|
||||||
|
} while (nvgpu_timeout_expired(&timeout) == 0);
|
||||||
|
|
||||||
|
if (*available_bytes_va == 0xffffffff) {
|
||||||
|
return -ETIMEDOUT;
|
||||||
|
}
|
||||||
|
|
||||||
|
*bytes_available = *available_bytes_va;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|||||||
@@ -345,6 +345,7 @@ static int nvgpu_profiler_bind_hwpm_streamout(struct nvgpu_profiler_object *prof
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
g->ops.perf.bind_mem_bytes_buffer_addr(g, prof->pma_bytes_available_buffer_va);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -353,6 +354,8 @@ static int nvgpu_profiler_unbind_hwpm_streamout(struct nvgpu_profiler_object *pr
|
|||||||
struct gk20a *g = prof->g;
|
struct gk20a *g = prof->g;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
|
g->ops.perf.bind_mem_bytes_buffer_addr(g, 0ULL);
|
||||||
|
|
||||||
err = g->ops.perfbuf.perfbuf_disable(g);
|
err = g->ops.perfbuf.perfbuf_disable(g);
|
||||||
if (err) {
|
if (err) {
|
||||||
return err;
|
return err;
|
||||||
|
|||||||
@@ -1214,6 +1214,7 @@ static const struct gops_debugger gv11b_ops_debugger = {
|
|||||||
static const struct gops_perf gv11b_ops_perf = {
|
static const struct gops_perf gv11b_ops_perf = {
|
||||||
.enable_membuf = gv11b_perf_enable_membuf,
|
.enable_membuf = gv11b_perf_enable_membuf,
|
||||||
.disable_membuf = gv11b_perf_disable_membuf,
|
.disable_membuf = gv11b_perf_disable_membuf,
|
||||||
|
.bind_mem_bytes_buffer_addr = gv11b_perf_bind_mem_bytes_buffer_addr,
|
||||||
.init_inst_block = gv11b_perf_init_inst_block,
|
.init_inst_block = gv11b_perf_init_inst_block,
|
||||||
.deinit_inst_block = gv11b_perf_deinit_inst_block,
|
.deinit_inst_block = gv11b_perf_deinit_inst_block,
|
||||||
.membuf_reset_streaming = gv11b_perf_membuf_reset_streaming,
|
.membuf_reset_streaming = gv11b_perf_membuf_reset_streaming,
|
||||||
@@ -1223,6 +1224,7 @@ static const struct gops_perf gv11b_ops_perf = {
|
|||||||
.get_pmmsys_per_chiplet_offset = gv11b_perf_get_pmmsys_per_chiplet_offset,
|
.get_pmmsys_per_chiplet_offset = gv11b_perf_get_pmmsys_per_chiplet_offset,
|
||||||
.get_pmmgpc_per_chiplet_offset = gv11b_perf_get_pmmgpc_per_chiplet_offset,
|
.get_pmmgpc_per_chiplet_offset = gv11b_perf_get_pmmgpc_per_chiplet_offset,
|
||||||
.get_pmmfbp_per_chiplet_offset = gv11b_perf_get_pmmfbp_per_chiplet_offset,
|
.get_pmmfbp_per_chiplet_offset = gv11b_perf_get_pmmfbp_per_chiplet_offset,
|
||||||
|
.update_get_put = gv11b_perf_update_get_put,
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -1232,6 +1234,7 @@ static const struct gops_perfbuf gv11b_ops_perfbuf = {
|
|||||||
.perfbuf_disable = nvgpu_perfbuf_disable_locked,
|
.perfbuf_disable = nvgpu_perfbuf_disable_locked,
|
||||||
.init_inst_block = nvgpu_perfbuf_init_inst_block,
|
.init_inst_block = nvgpu_perfbuf_init_inst_block,
|
||||||
.deinit_inst_block = nvgpu_perfbuf_deinit_inst_block,
|
.deinit_inst_block = nvgpu_perfbuf_deinit_inst_block,
|
||||||
|
.update_get_put = nvgpu_perfbuf_update_get_put,
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@@ -1282,6 +1282,7 @@ static const struct gops_debugger tu104_ops_debugger = {
|
|||||||
static const struct gops_perf tu104_ops_perf = {
|
static const struct gops_perf tu104_ops_perf = {
|
||||||
.enable_membuf = gv11b_perf_enable_membuf,
|
.enable_membuf = gv11b_perf_enable_membuf,
|
||||||
.disable_membuf = gv11b_perf_disable_membuf,
|
.disable_membuf = gv11b_perf_disable_membuf,
|
||||||
|
.bind_mem_bytes_buffer_addr = gv11b_perf_bind_mem_bytes_buffer_addr,
|
||||||
.init_inst_block = gv11b_perf_init_inst_block,
|
.init_inst_block = gv11b_perf_init_inst_block,
|
||||||
.deinit_inst_block = gv11b_perf_deinit_inst_block,
|
.deinit_inst_block = gv11b_perf_deinit_inst_block,
|
||||||
.membuf_reset_streaming = gv11b_perf_membuf_reset_streaming,
|
.membuf_reset_streaming = gv11b_perf_membuf_reset_streaming,
|
||||||
@@ -1291,6 +1292,7 @@ static const struct gops_perf tu104_ops_perf = {
|
|||||||
.get_pmmsys_per_chiplet_offset = gv11b_perf_get_pmmsys_per_chiplet_offset,
|
.get_pmmsys_per_chiplet_offset = gv11b_perf_get_pmmsys_per_chiplet_offset,
|
||||||
.get_pmmgpc_per_chiplet_offset = gv11b_perf_get_pmmgpc_per_chiplet_offset,
|
.get_pmmgpc_per_chiplet_offset = gv11b_perf_get_pmmgpc_per_chiplet_offset,
|
||||||
.get_pmmfbp_per_chiplet_offset = gv11b_perf_get_pmmfbp_per_chiplet_offset,
|
.get_pmmfbp_per_chiplet_offset = gv11b_perf_get_pmmfbp_per_chiplet_offset,
|
||||||
|
.update_get_put = gv11b_perf_update_get_put,
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -1300,6 +1302,7 @@ static const struct gops_perfbuf tu104_ops_perfbuf = {
|
|||||||
.perfbuf_disable = nvgpu_perfbuf_disable_locked,
|
.perfbuf_disable = nvgpu_perfbuf_disable_locked,
|
||||||
.init_inst_block = nvgpu_perfbuf_init_inst_block,
|
.init_inst_block = nvgpu_perfbuf_init_inst_block,
|
||||||
.deinit_inst_block = nvgpu_perfbuf_deinit_inst_block,
|
.deinit_inst_block = nvgpu_perfbuf_deinit_inst_block,
|
||||||
|
.update_get_put = nvgpu_perfbuf_update_get_put,
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@@ -88,6 +88,43 @@ void gv11b_perf_disable_membuf(struct gk20a *g)
|
|||||||
nvgpu_writel(g, perf_pmasys_outsize_r(), 0);
|
nvgpu_writel(g, perf_pmasys_outsize_r(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void gv11b_perf_bind_mem_bytes_buffer_addr(struct gk20a *g, u64 buf_addr)
|
||||||
|
{
|
||||||
|
u32 addr_lo;
|
||||||
|
|
||||||
|
buf_addr = buf_addr >> perf_pmasys_mem_bytes_addr_ptr_b();
|
||||||
|
addr_lo = nvgpu_safe_cast_u64_to_u32(buf_addr);
|
||||||
|
|
||||||
|
nvgpu_writel(g, perf_pmasys_mem_bytes_addr_r(),
|
||||||
|
perf_pmasys_mem_bytes_addr_ptr_f(addr_lo));
|
||||||
|
}
|
||||||
|
|
||||||
|
int gv11b_perf_update_get_put(struct gk20a *g, u64 bytes_consumed,
|
||||||
|
bool update_available_bytes, u64 *put_ptr,
|
||||||
|
bool *overflowed)
|
||||||
|
{
|
||||||
|
u32 val;
|
||||||
|
|
||||||
|
nvgpu_writel(g, perf_pmasys_mem_bump_r(), bytes_consumed);
|
||||||
|
|
||||||
|
if (update_available_bytes) {
|
||||||
|
val = nvgpu_readl(g, perf_pmasys_control_r());
|
||||||
|
val = set_field(val, perf_pmasys_control_update_bytes_m(),
|
||||||
|
perf_pmasys_control_update_bytes_doit_f());
|
||||||
|
nvgpu_writel(g, perf_pmasys_control_r(), val);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (put_ptr) {
|
||||||
|
*put_ptr = (u64)nvgpu_readl(g, perf_pmasys_mem_head_r());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (overflowed) {
|
||||||
|
*overflowed = g->ops.perf.get_membuf_overflow_status(g);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
void gv11b_perf_init_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
|
void gv11b_perf_init_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
|
||||||
{
|
{
|
||||||
u32 inst_block_ptr = nvgpu_inst_block_ptr(g, inst_block);
|
u32 inst_block_ptr = nvgpu_inst_block_ptr(g, inst_block);
|
||||||
|
|||||||
@@ -40,6 +40,11 @@ void gv11b_perf_membuf_reset_streaming(struct gk20a *g);
|
|||||||
void gv11b_perf_enable_membuf(struct gk20a *g, u32 size, u64 buf_addr);
|
void gv11b_perf_enable_membuf(struct gk20a *g, u32 size, u64 buf_addr);
|
||||||
void gv11b_perf_disable_membuf(struct gk20a *g);
|
void gv11b_perf_disable_membuf(struct gk20a *g);
|
||||||
|
|
||||||
|
void gv11b_perf_bind_mem_bytes_buffer_addr(struct gk20a *g, u64 buf_addr);
|
||||||
|
|
||||||
|
int gv11b_perf_update_get_put(struct gk20a *g, u64 bytes_consumed, bool update_available_bytes,
|
||||||
|
u64 *put_ptr, bool *overflowed);
|
||||||
|
|
||||||
void gv11b_perf_init_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block);
|
void gv11b_perf_init_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block);
|
||||||
void gv11b_perf_deinit_inst_block(struct gk20a *g);
|
void gv11b_perf_deinit_inst_block(struct gk20a *g);
|
||||||
|
|
||||||
|
|||||||
@@ -46,6 +46,7 @@ struct gops_debugger {
|
|||||||
struct gops_perf {
|
struct gops_perf {
|
||||||
void (*enable_membuf)(struct gk20a *g, u32 size, u64 buf_addr);
|
void (*enable_membuf)(struct gk20a *g, u32 size, u64 buf_addr);
|
||||||
void (*disable_membuf)(struct gk20a *g);
|
void (*disable_membuf)(struct gk20a *g);
|
||||||
|
void (*bind_mem_bytes_buffer_addr)(struct gk20a *g, u64 buf_addr);
|
||||||
void (*init_inst_block)(struct gk20a *g,
|
void (*init_inst_block)(struct gk20a *g,
|
||||||
struct nvgpu_mem *inst_block);
|
struct nvgpu_mem *inst_block);
|
||||||
void (*deinit_inst_block)(struct gk20a *g);
|
void (*deinit_inst_block)(struct gk20a *g);
|
||||||
@@ -57,12 +58,16 @@ struct gops_perf {
|
|||||||
u32 (*get_pmmsys_per_chiplet_offset)(void);
|
u32 (*get_pmmsys_per_chiplet_offset)(void);
|
||||||
u32 (*get_pmmgpc_per_chiplet_offset)(void);
|
u32 (*get_pmmgpc_per_chiplet_offset)(void);
|
||||||
u32 (*get_pmmfbp_per_chiplet_offset)(void);
|
u32 (*get_pmmfbp_per_chiplet_offset)(void);
|
||||||
|
int (*update_get_put)(struct gk20a *g, u64 bytes_consumed,
|
||||||
|
bool update_available_bytes, u64 *put_ptr, bool *overflowed);
|
||||||
};
|
};
|
||||||
struct gops_perfbuf {
|
struct gops_perfbuf {
|
||||||
int (*perfbuf_enable)(struct gk20a *g, u64 offset, u32 size);
|
int (*perfbuf_enable)(struct gk20a *g, u64 offset, u32 size);
|
||||||
int (*perfbuf_disable)(struct gk20a *g);
|
int (*perfbuf_disable)(struct gk20a *g);
|
||||||
int (*init_inst_block)(struct gk20a *g);
|
int (*init_inst_block)(struct gk20a *g);
|
||||||
void (*deinit_inst_block)(struct gk20a *g);
|
void (*deinit_inst_block)(struct gk20a *g);
|
||||||
|
int (*update_get_put)(struct gk20a *g, u64 bytes_consumed, u64 *bytes_available,
|
||||||
|
void *cpuva, bool wait, u64 *put_ptr, bool *overflowed);
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@@ -75,6 +75,8 @@
|
|||||||
#define perf_pmasys_control_membuf_clear_status_v(r) (((r) >> 5U) & 0x1U)
|
#define perf_pmasys_control_membuf_clear_status_v(r) (((r) >> 5U) & 0x1U)
|
||||||
#define perf_pmasys_control_membuf_clear_status_doit_v() (0x00000001U)
|
#define perf_pmasys_control_membuf_clear_status_doit_v() (0x00000001U)
|
||||||
#define perf_pmasys_control_membuf_clear_status_doit_f() (0x20U)
|
#define perf_pmasys_control_membuf_clear_status_doit_f() (0x20U)
|
||||||
|
#define perf_pmasys_control_update_bytes_m() (U32(0x1U) << 3U)
|
||||||
|
#define perf_pmasys_control_update_bytes_doit_f() (0x8U)
|
||||||
#define perf_pmasys_mem_block_r() (0x0024a070U)
|
#define perf_pmasys_mem_block_r() (0x0024a070U)
|
||||||
#define perf_pmasys_mem_block_base_f(v) ((U32(v) & 0xfffffffU) << 0U)
|
#define perf_pmasys_mem_block_base_f(v) ((U32(v) & 0xfffffffU) << 0U)
|
||||||
#define perf_pmasys_mem_block_target_f(v) ((U32(v) & 0x3U) << 28U)
|
#define perf_pmasys_mem_block_target_f(v) ((U32(v) & 0x3U) << 28U)
|
||||||
@@ -101,6 +103,10 @@
|
|||||||
#define perf_pmasys_mem_bytes_numbytes_f(v) ((U32(v) & 0xfffffffU) << 4U)
|
#define perf_pmasys_mem_bytes_numbytes_f(v) ((U32(v) & 0xfffffffU) << 4U)
|
||||||
#define perf_pmasys_mem_bump_r() (0x0024a088U)
|
#define perf_pmasys_mem_bump_r() (0x0024a088U)
|
||||||
#define perf_pmasys_mem_bump_numbytes_f(v) ((U32(v) & 0xfffffffU) << 4U)
|
#define perf_pmasys_mem_bump_numbytes_f(v) ((U32(v) & 0xfffffffU) << 4U)
|
||||||
|
#define perf_pmasys_mem_head_r() (0x0024a080U)
|
||||||
|
#define perf_pmasys_mem_bytes_addr_r() (0x0024a08cU)
|
||||||
|
#define perf_pmasys_mem_bytes_addr_ptr_f(v) ((U32(v) & 0x3fffffffU) << 2U)
|
||||||
|
#define perf_pmasys_mem_bytes_addr_ptr_b() (2U)
|
||||||
#define perf_pmasys_enginestatus_r() (0x0024a0a4U)
|
#define perf_pmasys_enginestatus_r() (0x0024a0a4U)
|
||||||
#define perf_pmasys_enginestatus_rbufempty_f(v) ((U32(v) & 0x1U) << 4U)
|
#define perf_pmasys_enginestatus_rbufempty_f(v) ((U32(v) & 0x1U) << 4U)
|
||||||
#define perf_pmasys_enginestatus_rbufempty_empty_v() (0x00000001U)
|
#define perf_pmasys_enginestatus_rbufempty_empty_v() (0x00000001U)
|
||||||
|
|||||||
@@ -75,6 +75,8 @@
|
|||||||
#define perf_pmasys_control_membuf_clear_status_v(r) (((r) >> 5U) & 0x1U)
|
#define perf_pmasys_control_membuf_clear_status_v(r) (((r) >> 5U) & 0x1U)
|
||||||
#define perf_pmasys_control_membuf_clear_status_doit_v() (0x00000001U)
|
#define perf_pmasys_control_membuf_clear_status_doit_v() (0x00000001U)
|
||||||
#define perf_pmasys_control_membuf_clear_status_doit_f() (0x20U)
|
#define perf_pmasys_control_membuf_clear_status_doit_f() (0x20U)
|
||||||
|
#define perf_pmasys_control_update_bytes_m() (U32(0x1U) << 3U)
|
||||||
|
#define perf_pmasys_control_update_bytes_doit_f() (0x8U)
|
||||||
#define perf_pmasys_mem_block_r() (0x0024a070U)
|
#define perf_pmasys_mem_block_r() (0x0024a070U)
|
||||||
#define perf_pmasys_mem_block_base_f(v) ((U32(v) & 0xfffffffU) << 0U)
|
#define perf_pmasys_mem_block_base_f(v) ((U32(v) & 0xfffffffU) << 0U)
|
||||||
#define perf_pmasys_mem_block_target_f(v) ((U32(v) & 0x3U) << 28U)
|
#define perf_pmasys_mem_block_target_f(v) ((U32(v) & 0x3U) << 28U)
|
||||||
@@ -101,6 +103,10 @@
|
|||||||
#define perf_pmasys_mem_bytes_numbytes_f(v) ((U32(v) & 0xfffffffU) << 4U)
|
#define perf_pmasys_mem_bytes_numbytes_f(v) ((U32(v) & 0xfffffffU) << 4U)
|
||||||
#define perf_pmasys_mem_bump_r() (0x0024a088U)
|
#define perf_pmasys_mem_bump_r() (0x0024a088U)
|
||||||
#define perf_pmasys_mem_bump_numbytes_f(v) ((U32(v) & 0xfffffffU) << 4U)
|
#define perf_pmasys_mem_bump_numbytes_f(v) ((U32(v) & 0xfffffffU) << 4U)
|
||||||
|
#define perf_pmasys_mem_head_r() (0x0024a080U)
|
||||||
|
#define perf_pmasys_mem_bytes_addr_r() (0x0024a08cU)
|
||||||
|
#define perf_pmasys_mem_bytes_addr_ptr_f(v) ((U32(v) & 0x3fffffffU) << 2U)
|
||||||
|
#define perf_pmasys_mem_bytes_addr_ptr_b() (2U)
|
||||||
#define perf_pmasys_enginestatus_r() (0x0024a0a4U)
|
#define perf_pmasys_enginestatus_r() (0x0024a0a4U)
|
||||||
#define perf_pmasys_enginestatus_rbufempty_f(v) ((U32(v) & 0x1U) << 4U)
|
#define perf_pmasys_enginestatus_rbufempty_f(v) ((U32(v) & 0x1U) << 4U)
|
||||||
#define perf_pmasys_enginestatus_rbufempty_empty_v() (0x00000001U)
|
#define perf_pmasys_enginestatus_rbufempty_empty_v() (0x00000001U)
|
||||||
|
|||||||
@@ -38,5 +38,8 @@ void nvgpu_perfbuf_deinit_vm(struct gk20a *g);
|
|||||||
int nvgpu_perfbuf_init_inst_block(struct gk20a *g);
|
int nvgpu_perfbuf_init_inst_block(struct gk20a *g);
|
||||||
void nvgpu_perfbuf_deinit_inst_block(struct gk20a *g);
|
void nvgpu_perfbuf_deinit_inst_block(struct gk20a *g);
|
||||||
|
|
||||||
|
int nvgpu_perfbuf_update_get_put(struct gk20a *g, u64 bytes_consumed, u64 *bytes_available,
|
||||||
|
void *cpuva, bool wait, u64 *put_ptr, bool *overflowed);
|
||||||
|
|
||||||
#endif /* CONFIG_NVGPU_DEBUGGER */
|
#endif /* CONFIG_NVGPU_DEBUGGER */
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -648,6 +648,45 @@ static int nvgpu_prof_ioctl_exec_reg_ops(struct nvgpu_profiler_object_priv *priv
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int nvgpu_prof_ioctl_pma_stream_update_get_put(struct nvgpu_profiler_object *prof,
|
||||||
|
struct nvgpu_profiler_pma_stream_update_get_put_args *args)
|
||||||
|
{
|
||||||
|
bool update_bytes_available = args->flags &
|
||||||
|
NVGPU_PROFILER_PMA_STREAM_UPDATE_GET_PUT_ARG_FLAG_UPDATE_AVAILABLE_BYTES;
|
||||||
|
bool wait = args->flags &
|
||||||
|
NVGPU_PROFILER_PMA_STREAM_UPDATE_GET_PUT_ARG_FLAG_WAIT_FOR_UPDATE;
|
||||||
|
bool update_put_ptr = args->flags &
|
||||||
|
NVGPU_PROFILER_PMA_STREAM_UPDATE_GET_PUT_ARG_FLAG_RETURN_PUT_PTR;
|
||||||
|
struct gk20a *g = prof->g;
|
||||||
|
bool overflowed;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
nvgpu_log(g, gpu_dbg_prof,
|
||||||
|
"Update PMA stream request %u: flags = 0x%x bytes_consumed=%llu",
|
||||||
|
prof->prof_handle, args->flags, args->bytes_consumed);
|
||||||
|
|
||||||
|
err = nvgpu_perfbuf_update_get_put(prof->g, args->bytes_consumed,
|
||||||
|
update_bytes_available ? &args->bytes_available : NULL,
|
||||||
|
prof->pma_bytes_available_buffer_cpuva, wait,
|
||||||
|
update_put_ptr ? &args->put_ptr : NULL,
|
||||||
|
&overflowed);
|
||||||
|
if (err != 0) {
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (overflowed) {
|
||||||
|
args->flags |=
|
||||||
|
NVGPU_PROFILER_PMA_STREAM_UPDATE_GET_PUT_ARG_FLAG_OVERFLOW_TRIGGERED;
|
||||||
|
}
|
||||||
|
|
||||||
|
nvgpu_log(g, gpu_dbg_prof,
|
||||||
|
"Update PMA stream request %u complete: flags = 0x%x"
|
||||||
|
"bytes_available=%llu put_ptr=%llu",
|
||||||
|
prof->prof_handle, args->flags, args->bytes_available, args->put_ptr);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
long nvgpu_prof_fops_ioctl(struct file *filp, unsigned int cmd,
|
long nvgpu_prof_fops_ioctl(struct file *filp, unsigned int cmd,
|
||||||
unsigned long arg)
|
unsigned long arg)
|
||||||
{
|
{
|
||||||
@@ -720,6 +759,11 @@ long nvgpu_prof_fops_ioctl(struct file *filp, unsigned int cmd,
|
|||||||
(struct nvgpu_profiler_exec_reg_ops_args *)buf);
|
(struct nvgpu_profiler_exec_reg_ops_args *)buf);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case NVGPU_PROFILER_IOCTL_PMA_STREAM_UPDATE_GET_PUT:
|
||||||
|
err = nvgpu_prof_ioctl_pma_stream_update_get_put(prof,
|
||||||
|
(struct nvgpu_profiler_pma_stream_update_get_put_args *)buf);
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
nvgpu_err(g, "unrecognized profiler ioctl cmd: 0x%x", cmd);
|
nvgpu_err(g, "unrecognized profiler ioctl cmd: 0x%x", cmd);
|
||||||
err = -ENOTTY;
|
err = -ENOTTY;
|
||||||
|
|||||||
Reference in New Issue
Block a user