gpu: nvgpu: vgpu: perfbuffer support

Add vgpu support for ModeE perfbuffers

- VM allocation is handled by the kernel, with final mapping
  handled by the RM server
- Enabling/disabling the perfbuffer is handled by the RM server

Bug 1880196
JIRA EVLR-1074

Change-Id: Ifbeb5ede6b07e2e112b930c602c22b66a58ac920
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: https://git-master/r/1506747
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Peter Daifuku
2017-06-21 16:44:55 -07:00
committed by mobile promotions
parent cadd5120d3
commit f7e37e6847
4 changed files with 94 additions and 37 deletions

View File

@@ -1834,6 +1834,51 @@ static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s,
return nvgpu_profiler_reserve_release(dbg_s, args->profiler_handle); return nvgpu_profiler_reserve_release(dbg_s, args->profiler_handle);
} }
static int gk20a_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size)
{
struct mm_gk20a *mm = &g->mm;
u32 virt_addr_lo;
u32 virt_addr_hi;
u32 inst_pa_page;
int err;
err = gk20a_busy(g);
if (err) {
nvgpu_err(g, "failed to poweron");
return err;
}
err = gk20a_alloc_inst_block(g, &mm->perfbuf.inst_block);
if (err)
return err;
g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, mm->perfbuf.vm, 0);
virt_addr_lo = u64_lo32(offset);
virt_addr_hi = u64_hi32(offset);
/* address and size are aligned to 32 bytes, the lowest bits read back
* as zeros */
gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo);
gk20a_writel(g, perf_pmasys_outbaseupper_r(),
perf_pmasys_outbaseupper_ptr_f(virt_addr_hi));
gk20a_writel(g, perf_pmasys_outsize_r(), size);
/* this field is aligned to 4K */
inst_pa_page = gk20a_mm_inst_block_addr(g,
&mm->perfbuf.inst_block) >> 12;
/* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
* should be written last */
gk20a_writel(g, perf_pmasys_mem_block_r(),
perf_pmasys_mem_block_base_f(inst_pa_page) |
perf_pmasys_mem_block_valid_true_f() |
perf_pmasys_mem_block_target_lfb_f());
gk20a_idle(g);
return 0;
}
static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_perfbuf_map_args *args) struct nvgpu_dbg_gpu_perfbuf_map_args *args)
{ {
@@ -1841,9 +1886,6 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
struct mm_gk20a *mm = &g->mm; struct mm_gk20a *mm = &g->mm;
int err; int err;
u32 virt_size; u32 virt_size;
u32 virt_addr_lo;
u32 virt_addr_hi;
u32 inst_pa_page;
u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
nvgpu_mutex_acquire(&g->dbg_sessions_lock); nvgpu_mutex_acquire(&g->dbg_sessions_lock);
@@ -1863,12 +1905,6 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
return -ENOMEM; return -ENOMEM;
} }
err = gk20a_alloc_inst_block(g, &mm->perfbuf.inst_block);
if (err)
goto err_remove_vm;
g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, mm->perfbuf.vm, 0);
err = nvgpu_vm_map_buffer(mm->perfbuf.vm, err = nvgpu_vm_map_buffer(mm->perfbuf.vm,
args->dmabuf_fd, args->dmabuf_fd,
&args->offset, &args->offset,
@@ -1882,38 +1918,15 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
/* perf output buffer may not cross a 4GB boundary */ /* perf output buffer may not cross a 4GB boundary */
virt_size = u64_lo32(args->mapping_size); virt_size = u64_lo32(args->mapping_size);
virt_addr_lo = u64_lo32(args->offset);
virt_addr_hi = u64_hi32(args->offset);
if (u64_hi32(args->offset) != u64_hi32(args->offset + virt_size)) { if (u64_hi32(args->offset) != u64_hi32(args->offset + virt_size)) {
err = -EINVAL; err = -EINVAL;
goto err_unmap; goto err_unmap;
} }
err = gk20a_busy(g); err = g->ops.dbg_session_ops.perfbuffer_enable(g,
if (err) { args->offset, virt_size);
nvgpu_err(g, "failed to poweron"); if (err)
goto err_unmap; goto err_unmap;
}
/* address and size are aligned to 32 bytes, the lowest bits read back
* as zeros */
gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo);
gk20a_writel(g, perf_pmasys_outbaseupper_r(),
perf_pmasys_outbaseupper_ptr_f(virt_addr_hi));
gk20a_writel(g, perf_pmasys_outsize_r(), virt_size);
/* this field is aligned to 4K */
inst_pa_page = gk20a_mm_inst_block_addr(g,
&mm->perfbuf.inst_block) >> 12;
/* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
* should be written last */
gk20a_writel(g, perf_pmasys_mem_block_r(),
perf_pmasys_mem_block_base_f(inst_pa_page) |
perf_pmasys_mem_block_valid_true_f() |
perf_pmasys_mem_block_target_lfb_f());
gk20a_idle(g);
g->perfbuf.owner = dbg_s; g->perfbuf.owner = dbg_s;
g->perfbuf.offset = args->offset; g->perfbuf.offset = args->offset;
@@ -1924,7 +1937,6 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
err_unmap: err_unmap:
nvgpu_vm_unmap_buffer(mm->perfbuf.vm, args->offset, NULL); nvgpu_vm_unmap_buffer(mm->perfbuf.vm, args->offset, NULL);
err_remove_vm: err_remove_vm:
gk20a_free_inst_block(g, &mm->perfbuf.inst_block);
nvgpu_vm_put(mm->perfbuf.vm); nvgpu_vm_put(mm->perfbuf.vm);
nvgpu_mutex_release(&g->dbg_sessions_lock); nvgpu_mutex_release(&g->dbg_sessions_lock);
return err; return err;
@@ -1960,7 +1972,7 @@ static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset)
struct vm_gk20a *vm = mm->perfbuf.vm; struct vm_gk20a *vm = mm->perfbuf.vm;
int err; int err;
err = gk20a_perfbuf_disable_locked(g); err = g->ops.dbg_session_ops.perfbuffer_disable(g);
nvgpu_vm_unmap_buffer(vm, offset, NULL); nvgpu_vm_unmap_buffer(vm, offset, NULL);
gk20a_free_inst_block(g, &mm->perfbuf.inst_block); gk20a_free_inst_block(g, &mm->perfbuf.inst_block);
@@ -2001,4 +2013,6 @@ void gk20a_init_dbg_session_ops(struct gpu_ops *gops)
nvgpu_check_and_set_context_reservation; nvgpu_check_and_set_context_reservation;
gops->dbg_session_ops.release_profiler_reservation = gops->dbg_session_ops.release_profiler_reservation =
nvgpu_release_profiler_reservation; nvgpu_release_profiler_reservation;
gops->dbg_session_ops.perfbuffer_enable = gk20a_perfbuf_enable_locked;
gops->dbg_session_ops.perfbuffer_disable = gk20a_perfbuf_disable_locked;
}; };

View File

@@ -869,6 +869,8 @@ struct gpu_ops {
void (*release_profiler_reservation)( void (*release_profiler_reservation)(
struct dbg_session_gk20a *dbg_s, struct dbg_session_gk20a *dbg_s,
struct dbg_profiler_object_data *prof_obj); struct dbg_profiler_object_data *prof_obj);
int (*perfbuffer_enable)(struct gk20a *g, u64 offset, u32 size);
int (*perfbuffer_disable)(struct gk20a *g);
} dbg_session_ops; } dbg_session_ops;
struct { struct {
void (*get_program_numbers)(struct gk20a *g, void (*get_program_numbers)(struct gk20a *g,

View File

@@ -178,6 +178,37 @@ static void vgpu_release_profiler_reservation(
vgpu_sendrecv_prof_cmd(dbg_s, TEGRA_VGPU_PROF_RELEASE); vgpu_sendrecv_prof_cmd(dbg_s, TEGRA_VGPU_PROF_RELEASE);
} }
static int vgpu_sendrecv_perfbuf_cmd(struct gk20a *g, u64 offset, u32 size)
{
struct mm_gk20a *mm = &g->mm;
struct vm_gk20a *vm = mm->perfbuf.vm;
struct tegra_vgpu_cmd_msg msg;
struct tegra_vgpu_perfbuf_mgt_params *p =
&msg.params.perfbuf_management;
int err;
msg.cmd = TEGRA_VGPU_CMD_PERFBUF_MGT;
msg.handle = vgpu_get_handle(g);
p->vm_handle = vm->handle;
p->offset = offset;
p->size = size;
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
err = err ? err : msg.ret;
return err;
}
static int vgpu_perfbuffer_enable(struct gk20a *g, u64 offset, u32 size)
{
return vgpu_sendrecv_perfbuf_cmd(g, offset, size);
}
static int vgpu_perfbuffer_disable(struct gk20a *g)
{
return vgpu_sendrecv_perfbuf_cmd(g, 0, 0);
}
void vgpu_init_dbg_session_ops(struct gpu_ops *gops) void vgpu_init_dbg_session_ops(struct gpu_ops *gops)
{ {
gops->dbg_session_ops.exec_reg_ops = vgpu_exec_regops; gops->dbg_session_ops.exec_reg_ops = vgpu_exec_regops;
@@ -188,4 +219,6 @@ void vgpu_init_dbg_session_ops(struct gpu_ops *gops)
vgpu_check_and_set_context_reservation; vgpu_check_and_set_context_reservation;
gops->dbg_session_ops.release_profiler_reservation = gops->dbg_session_ops.release_profiler_reservation =
vgpu_release_profiler_reservation; vgpu_release_profiler_reservation;
gops->dbg_session_ops.perfbuffer_enable = vgpu_perfbuffer_enable;
gops->dbg_session_ops.perfbuffer_disable = vgpu_perfbuffer_disable;
} }

View File

@@ -104,6 +104,7 @@ enum {
TEGRA_VGPU_CMD_GET_GPU_FREQ_TABLE = 70, TEGRA_VGPU_CMD_GET_GPU_FREQ_TABLE = 70,
TEGRA_VGPU_CMD_CAP_GPU_CLK_RATE = 71, TEGRA_VGPU_CMD_CAP_GPU_CLK_RATE = 71,
TEGRA_VGPU_CMD_PROF_MGT = 72, TEGRA_VGPU_CMD_PROF_MGT = 72,
TEGRA_VGPU_CMD_PERFBUF_MGT = 73,
TEGRA_VGPU_CMD_GET_TIMESTAMPS_ZIPPER = 74, TEGRA_VGPU_CMD_GET_TIMESTAMPS_ZIPPER = 74,
}; };
@@ -488,6 +489,12 @@ struct tegra_vgpu_prof_mgt_params {
u32 mode; u32 mode;
}; };
struct tegra_vgpu_perfbuf_mgt_params {
u64 vm_handle;
u64 offset;
u32 size;
};
#define TEGRA_VGPU_GPU_FREQ_TABLE_SIZE 25 #define TEGRA_VGPU_GPU_FREQ_TABLE_SIZE 25
struct tegra_vgpu_get_gpu_freq_table_params { struct tegra_vgpu_get_gpu_freq_table_params {
@@ -545,6 +552,7 @@ struct tegra_vgpu_cmd_msg {
struct tegra_vgpu_suspend_resume_contexts resume_contexts; struct tegra_vgpu_suspend_resume_contexts resume_contexts;
struct tegra_vgpu_clear_sm_error_state clear_sm_error_state; struct tegra_vgpu_clear_sm_error_state clear_sm_error_state;
struct tegra_vgpu_prof_mgt_params prof_management; struct tegra_vgpu_prof_mgt_params prof_management;
struct tegra_vgpu_perfbuf_mgt_params perfbuf_management;
struct tegra_vgpu_get_timestamps_zipper_params get_timestamps_zipper; struct tegra_vgpu_get_timestamps_zipper_params get_timestamps_zipper;
struct tegra_vgpu_get_gpu_freq_table_params get_gpu_freq_table; struct tegra_vgpu_get_gpu_freq_table_params get_gpu_freq_table;
char padding[192]; char padding[192];