diff --git a/drivers/gpu/nvgpu/common/perf/perfbuf.c b/drivers/gpu/nvgpu/common/perf/perfbuf.c index 20e77f51c..b2a4d7637 100644 --- a/drivers/gpu/nvgpu/common/perf/perfbuf.c +++ b/drivers/gpu/nvgpu/common/perf/perfbuf.c @@ -112,3 +112,47 @@ void nvgpu_perfbuf_deinit_vm(struct gk20a *g) g->ops.perfbuf.deinit_inst_block(g); nvgpu_vm_put(g->mm.perfbuf.vm); } + +int nvgpu_perfbuf_update_get_put(struct gk20a *g, u64 bytes_consumed, + u64 *bytes_available, void *cpuva, bool wait, + u64 *put_ptr, bool *overflowed) +{ + struct nvgpu_timeout timeout; + int err; + bool update_available_bytes = (bytes_available == NULL) ? false : true; + volatile u32 *available_bytes_va = (u32 *)cpuva; + + if (update_available_bytes) { + *available_bytes_va = 0xffffffff; + } + + err = g->ops.perf.update_get_put(g, bytes_consumed, + update_available_bytes, put_ptr, overflowed); + if (err != 0) { + return err; + } + + if (update_available_bytes && wait) { + err = nvgpu_timeout_init(g, &timeout, 10000, NVGPU_TIMER_CPU_TIMER); + if (err != 0) { + nvgpu_err(g, "nvgpu_timeout_init() failed err=%d", err); + return err; + } + + do { + if (*available_bytes_va != 0xffffffff) { + break; + } + + nvgpu_msleep(10); + } while (nvgpu_timeout_expired(&timeout) == 0); + + if (*available_bytes_va == 0xffffffff) { + return -ETIMEDOUT; + } + + *bytes_available = *available_bytes_va; + } + + return 0; +} diff --git a/drivers/gpu/nvgpu/common/profiler/profiler.c b/drivers/gpu/nvgpu/common/profiler/profiler.c index 92332e04f..4186b4dd5 100644 --- a/drivers/gpu/nvgpu/common/profiler/profiler.c +++ b/drivers/gpu/nvgpu/common/profiler/profiler.c @@ -345,6 +345,7 @@ static int nvgpu_profiler_bind_hwpm_streamout(struct nvgpu_profiler_object *prof return err; } + g->ops.perf.bind_mem_bytes_buffer_addr(g, prof->pma_bytes_available_buffer_va); return 0; } @@ -353,6 +354,8 @@ static int nvgpu_profiler_unbind_hwpm_streamout(struct nvgpu_profiler_object *pr struct gk20a *g = prof->g; int err; + g->ops.perf.bind_mem_bytes_buffer_addr(g, 0ULL); + err = g->ops.perfbuf.perfbuf_disable(g); if (err) { return err; diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c index db83bc771..7c4da0d72 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c @@ -1214,6 +1214,7 @@ static const struct gops_debugger gv11b_ops_debugger = { static const struct gops_perf gv11b_ops_perf = { .enable_membuf = gv11b_perf_enable_membuf, .disable_membuf = gv11b_perf_disable_membuf, + .bind_mem_bytes_buffer_addr = gv11b_perf_bind_mem_bytes_buffer_addr, .init_inst_block = gv11b_perf_init_inst_block, .deinit_inst_block = gv11b_perf_deinit_inst_block, .membuf_reset_streaming = gv11b_perf_membuf_reset_streaming, @@ -1223,6 +1224,7 @@ static const struct gops_perf gv11b_ops_perf = { .get_pmmsys_per_chiplet_offset = gv11b_perf_get_pmmsys_per_chiplet_offset, .get_pmmgpc_per_chiplet_offset = gv11b_perf_get_pmmgpc_per_chiplet_offset, .get_pmmfbp_per_chiplet_offset = gv11b_perf_get_pmmfbp_per_chiplet_offset, + .update_get_put = gv11b_perf_update_get_put, }; #endif @@ -1232,6 +1234,7 @@ static const struct gops_perfbuf gv11b_ops_perfbuf = { .perfbuf_disable = nvgpu_perfbuf_disable_locked, .init_inst_block = nvgpu_perfbuf_init_inst_block, .deinit_inst_block = nvgpu_perfbuf_deinit_inst_block, + .update_get_put = nvgpu_perfbuf_update_get_put, }; #endif diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c index 4c3187a0c..d2f532952 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c +++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c @@ -1282,6 +1282,7 @@ static const struct gops_debugger tu104_ops_debugger = { static const struct gops_perf tu104_ops_perf = { .enable_membuf = gv11b_perf_enable_membuf, .disable_membuf = gv11b_perf_disable_membuf, + .bind_mem_bytes_buffer_addr = gv11b_perf_bind_mem_bytes_buffer_addr, .init_inst_block = gv11b_perf_init_inst_block, .deinit_inst_block = gv11b_perf_deinit_inst_block, .membuf_reset_streaming = gv11b_perf_membuf_reset_streaming, @@ -1291,6 +1292,7 @@ static const struct gops_perf tu104_ops_perf = { .get_pmmsys_per_chiplet_offset = gv11b_perf_get_pmmsys_per_chiplet_offset, .get_pmmgpc_per_chiplet_offset = gv11b_perf_get_pmmgpc_per_chiplet_offset, .get_pmmfbp_per_chiplet_offset = gv11b_perf_get_pmmfbp_per_chiplet_offset, + .update_get_put = gv11b_perf_update_get_put, }; #endif @@ -1300,6 +1302,7 @@ static const struct gops_perfbuf tu104_ops_perfbuf = { .perfbuf_disable = nvgpu_perfbuf_disable_locked, .init_inst_block = nvgpu_perfbuf_init_inst_block, .deinit_inst_block = nvgpu_perfbuf_deinit_inst_block, + .update_get_put = nvgpu_perfbuf_update_get_put, }; #endif diff --git a/drivers/gpu/nvgpu/hal/perf/perf_gv11b.c b/drivers/gpu/nvgpu/hal/perf/perf_gv11b.c index 435ada63c..3946970da 100644 --- a/drivers/gpu/nvgpu/hal/perf/perf_gv11b.c +++ b/drivers/gpu/nvgpu/hal/perf/perf_gv11b.c @@ -88,6 +88,43 @@ void gv11b_perf_disable_membuf(struct gk20a *g) nvgpu_writel(g, perf_pmasys_outsize_r(), 0); } +void gv11b_perf_bind_mem_bytes_buffer_addr(struct gk20a *g, u64 buf_addr) +{ + u32 addr_lo; + + buf_addr = buf_addr >> perf_pmasys_mem_bytes_addr_ptr_b(); + addr_lo = nvgpu_safe_cast_u64_to_u32(buf_addr); + + nvgpu_writel(g, perf_pmasys_mem_bytes_addr_r(), + perf_pmasys_mem_bytes_addr_ptr_f(addr_lo)); +} + +int gv11b_perf_update_get_put(struct gk20a *g, u64 bytes_consumed, + bool update_available_bytes, u64 *put_ptr, + bool *overflowed) +{ + u32 val; + + nvgpu_writel(g, perf_pmasys_mem_bump_r(), bytes_consumed); + + if (update_available_bytes) { + val = nvgpu_readl(g, perf_pmasys_control_r()); + val = set_field(val, perf_pmasys_control_update_bytes_m(), + perf_pmasys_control_update_bytes_doit_f()); + nvgpu_writel(g, perf_pmasys_control_r(), val); + } + + if (put_ptr) { + *put_ptr = (u64)nvgpu_readl(g, perf_pmasys_mem_head_r()); + } + + if (overflowed) { + *overflowed = g->ops.perf.get_membuf_overflow_status(g); + } + + return 0; +} + void gv11b_perf_init_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block) { u32 inst_block_ptr = nvgpu_inst_block_ptr(g, inst_block); diff --git a/drivers/gpu/nvgpu/hal/perf/perf_gv11b.h b/drivers/gpu/nvgpu/hal/perf/perf_gv11b.h index 419b3678a..b98ceae06 100644 --- a/drivers/gpu/nvgpu/hal/perf/perf_gv11b.h +++ b/drivers/gpu/nvgpu/hal/perf/perf_gv11b.h @@ -40,6 +40,11 @@ void gv11b_perf_membuf_reset_streaming(struct gk20a *g); void gv11b_perf_enable_membuf(struct gk20a *g, u32 size, u64 buf_addr); void gv11b_perf_disable_membuf(struct gk20a *g); +void gv11b_perf_bind_mem_bytes_buffer_addr(struct gk20a *g, u64 buf_addr); + +int gv11b_perf_update_get_put(struct gk20a *g, u64 bytes_consumed, bool update_available_bytes, + u64 *put_ptr, bool *overflowed); + void gv11b_perf_init_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block); void gv11b_perf_deinit_inst_block(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/debugger.h b/drivers/gpu/nvgpu/include/nvgpu/gops/debugger.h index 2dd63402f..e58cd9ebb 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops/debugger.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops/debugger.h @@ -46,6 +46,7 @@ struct gops_debugger { struct gops_perf { void (*enable_membuf)(struct gk20a *g, u32 size, u64 buf_addr); void (*disable_membuf)(struct gk20a *g); + void (*bind_mem_bytes_buffer_addr)(struct gk20a *g, u64 buf_addr); void (*init_inst_block)(struct gk20a *g, struct nvgpu_mem *inst_block); void (*deinit_inst_block)(struct gk20a *g); @@ -57,12 +58,16 @@ struct gops_perf { u32 (*get_pmmsys_per_chiplet_offset)(void); u32 (*get_pmmgpc_per_chiplet_offset)(void); u32 (*get_pmmfbp_per_chiplet_offset)(void); + int (*update_get_put)(struct gk20a *g, u64 bytes_consumed, + bool update_available_bytes, u64 *put_ptr, bool *overflowed); }; struct gops_perfbuf { int (*perfbuf_enable)(struct gk20a *g, u64 offset, u32 size); int (*perfbuf_disable)(struct gk20a *g); int (*init_inst_block)(struct gk20a *g); void (*deinit_inst_block)(struct gk20a *g); + int (*update_get_put)(struct gk20a *g, u64 bytes_consumed, u64 *bytes_available, + void *cpuva, bool wait, u64 *put_ptr, bool *overflowed); }; #endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_perf_gv11b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_perf_gv11b.h index 6753f6c2d..49333af22 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_perf_gv11b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_perf_gv11b.h @@ -75,6 +75,8 @@ #define perf_pmasys_control_membuf_clear_status_v(r) (((r) >> 5U) & 0x1U) #define perf_pmasys_control_membuf_clear_status_doit_v() (0x00000001U) #define perf_pmasys_control_membuf_clear_status_doit_f() (0x20U) +#define perf_pmasys_control_update_bytes_m() (U32(0x1U) << 3U) +#define perf_pmasys_control_update_bytes_doit_f() (0x8U) #define perf_pmasys_mem_block_r() (0x0024a070U) #define perf_pmasys_mem_block_base_f(v) ((U32(v) & 0xfffffffU) << 0U) #define perf_pmasys_mem_block_target_f(v) ((U32(v) & 0x3U) << 28U) @@ -101,6 +103,10 @@ #define perf_pmasys_mem_bytes_numbytes_f(v) ((U32(v) & 0xfffffffU) << 4U) #define perf_pmasys_mem_bump_r() (0x0024a088U) #define perf_pmasys_mem_bump_numbytes_f(v) ((U32(v) & 0xfffffffU) << 4U) +#define perf_pmasys_mem_head_r() (0x0024a080U) +#define perf_pmasys_mem_bytes_addr_r() (0x0024a08cU) +#define perf_pmasys_mem_bytes_addr_ptr_f(v) ((U32(v) & 0x3fffffffU) << 2U) +#define perf_pmasys_mem_bytes_addr_ptr_b() (2U) #define perf_pmasys_enginestatus_r() (0x0024a0a4U) #define perf_pmasys_enginestatus_rbufempty_f(v) ((U32(v) & 0x1U) << 4U) #define perf_pmasys_enginestatus_rbufempty_empty_v() (0x00000001U) diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_perf_tu104.h b/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_perf_tu104.h index 38ba92e30..0f9ea8bc1 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_perf_tu104.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_perf_tu104.h @@ -75,6 +75,8 @@ #define perf_pmasys_control_membuf_clear_status_v(r) (((r) >> 5U) & 0x1U) #define perf_pmasys_control_membuf_clear_status_doit_v() (0x00000001U) #define perf_pmasys_control_membuf_clear_status_doit_f() (0x20U) +#define perf_pmasys_control_update_bytes_m() (U32(0x1U) << 3U) +#define perf_pmasys_control_update_bytes_doit_f() (0x8U) #define perf_pmasys_mem_block_r() (0x0024a070U) #define perf_pmasys_mem_block_base_f(v) ((U32(v) & 0xfffffffU) << 0U) #define perf_pmasys_mem_block_target_f(v) ((U32(v) & 0x3U) << 28U) @@ -101,6 +103,10 @@ #define perf_pmasys_mem_bytes_numbytes_f(v) ((U32(v) & 0xfffffffU) << 4U) #define perf_pmasys_mem_bump_r() (0x0024a088U) #define perf_pmasys_mem_bump_numbytes_f(v) ((U32(v) & 0xfffffffU) << 4U) +#define perf_pmasys_mem_head_r() (0x0024a080U) +#define perf_pmasys_mem_bytes_addr_r() (0x0024a08cU) +#define perf_pmasys_mem_bytes_addr_ptr_f(v) ((U32(v) & 0x3fffffffU) << 2U) +#define perf_pmasys_mem_bytes_addr_ptr_b() (2U) #define perf_pmasys_enginestatus_r() (0x0024a0a4U) #define perf_pmasys_enginestatus_rbufempty_f(v) ((U32(v) & 0x1U) << 4U) #define perf_pmasys_enginestatus_rbufempty_empty_v() (0x00000001U) diff --git a/drivers/gpu/nvgpu/include/nvgpu/perfbuf.h b/drivers/gpu/nvgpu/include/nvgpu/perfbuf.h index ddd27399f..8ad13fc39 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/perfbuf.h +++ b/drivers/gpu/nvgpu/include/nvgpu/perfbuf.h @@ -38,5 +38,8 @@ void nvgpu_perfbuf_deinit_vm(struct gk20a *g); int nvgpu_perfbuf_init_inst_block(struct gk20a *g); void nvgpu_perfbuf_deinit_inst_block(struct gk20a *g); +int nvgpu_perfbuf_update_get_put(struct gk20a *g, u64 bytes_consumed, u64 *bytes_available, + void *cpuva, bool wait, u64 *put_ptr, bool *overflowed); + #endif /* CONFIG_NVGPU_DEBUGGER */ #endif diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_prof.c b/drivers/gpu/nvgpu/os/linux/ioctl_prof.c index 6810d28d7..96f6fde13 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_prof.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_prof.c @@ -648,6 +648,45 @@ static int nvgpu_prof_ioctl_exec_reg_ops(struct nvgpu_profiler_object_priv *priv return err; } +static int nvgpu_prof_ioctl_pma_stream_update_get_put(struct nvgpu_profiler_object *prof, + struct nvgpu_profiler_pma_stream_update_get_put_args *args) +{ + bool update_bytes_available = args->flags & + NVGPU_PROFILER_PMA_STREAM_UPDATE_GET_PUT_ARG_FLAG_UPDATE_AVAILABLE_BYTES; + bool wait = args->flags & + NVGPU_PROFILER_PMA_STREAM_UPDATE_GET_PUT_ARG_FLAG_WAIT_FOR_UPDATE; + bool update_put_ptr = args->flags & + NVGPU_PROFILER_PMA_STREAM_UPDATE_GET_PUT_ARG_FLAG_RETURN_PUT_PTR; + struct gk20a *g = prof->g; + bool overflowed; + int err; + + nvgpu_log(g, gpu_dbg_prof, + "Update PMA stream request %u: flags = 0x%x bytes_consumed=%llu", + prof->prof_handle, args->flags, args->bytes_consumed); + + err = nvgpu_perfbuf_update_get_put(prof->g, args->bytes_consumed, + update_bytes_available ? &args->bytes_available : NULL, + prof->pma_bytes_available_buffer_cpuva, wait, + update_put_ptr ? &args->put_ptr : NULL, + &overflowed); + if (err != 0) { + return err; + } + + if (overflowed) { + args->flags |= + NVGPU_PROFILER_PMA_STREAM_UPDATE_GET_PUT_ARG_FLAG_OVERFLOW_TRIGGERED; + } + + nvgpu_log(g, gpu_dbg_prof, + "Update PMA stream request %u complete: flags = 0x%x" + "bytes_available=%llu put_ptr=%llu", + prof->prof_handle, args->flags, args->bytes_available, args->put_ptr); + + return 0; +} + long nvgpu_prof_fops_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -720,6 +759,11 @@ long nvgpu_prof_fops_ioctl(struct file *filp, unsigned int cmd, (struct nvgpu_profiler_exec_reg_ops_args *)buf); break; + case NVGPU_PROFILER_IOCTL_PMA_STREAM_UPDATE_GET_PUT: + err = nvgpu_prof_ioctl_pma_stream_update_get_put(prof, + (struct nvgpu_profiler_pma_stream_update_get_put_args *)buf); + break; + default: nvgpu_err(g, "unrecognized profiler ioctl cmd: 0x%x", cmd); err = -ENOTTY;