mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: ModeE perfbuffer feature development
perfbuf mapping fixes: - Allocate VM specifically for perfbuf use: using the PMU's results in mmu faults for larger buffers where 64k pages are used. - Make 4GB boundary check work for large address spaces - remove requirement to have allow_all flag set - track perfbuf ownership and clean up appropriately Bug 1880196 JIRA EVLR-1074 Change-Id: Ieee4eb17b64acf9b6ede37bf8e6a91892cda4a7e Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com> Reviewed-on: http://git-master/r/1460809 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
6df49a63ca
commit
0d8f5f3fdb
@@ -514,6 +514,8 @@ static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
|
||||
return err;
|
||||
}
|
||||
|
||||
static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset);
|
||||
|
||||
int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct dbg_session_gk20a *dbg_s = filp->private_data;
|
||||
@@ -534,6 +536,10 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
|
||||
NVGPU_DBG_GPU_POWERGATE_MODE_ENABLE);
|
||||
nvgpu_dbg_timeout_enable(dbg_s, NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE);
|
||||
|
||||
/* If this session owned the perf buffer, release it */
|
||||
if (g->perfbuf.owner == dbg_s)
|
||||
gk20a_perfbuf_release_locked(g, g->perfbuf.offset);
|
||||
|
||||
/* Per-context profiler objects were released when we called
|
||||
* dbg_unbind_all_channels. We could still have global ones.
|
||||
*/
|
||||
@@ -1821,16 +1827,39 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
|
||||
struct nvgpu_dbg_gpu_perfbuf_map_args *args)
|
||||
{
|
||||
struct gk20a *g = dbg_s->g;
|
||||
struct mm_gk20a *mm = &g->mm;
|
||||
struct vm_gk20a *vm = &mm->perfbuf.vm;
|
||||
int err;
|
||||
u32 virt_size;
|
||||
u32 virt_addr_lo;
|
||||
u32 virt_addr_hi;
|
||||
u32 inst_pa_page;
|
||||
u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
|
||||
|
||||
if (!g->allow_all)
|
||||
return -EACCES;
|
||||
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
||||
|
||||
err = gk20a_vm_map_buffer(&g->mm.pmu.vm,
|
||||
if (g->perfbuf.owner) {
|
||||
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
err = gk20a_init_vm(mm, vm, big_page_size,
|
||||
big_page_size << 10,
|
||||
NV_MM_DEFAULT_KERNEL_SIZE,
|
||||
NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
|
||||
false, false, "perfbuf");
|
||||
if (err) {
|
||||
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
err = gk20a_alloc_inst_block(g, &mm->perfbuf.inst_block);
|
||||
if (err)
|
||||
goto err_remove_vm;
|
||||
|
||||
g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, vm, 0);
|
||||
|
||||
err = gk20a_vm_map_buffer(vm,
|
||||
args->dmabuf_fd,
|
||||
&args->offset,
|
||||
0,
|
||||
@@ -1839,23 +1868,21 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
|
||||
args->mapping_size,
|
||||
NULL);
|
||||
if (err)
|
||||
return err;
|
||||
goto err_remove_vm;
|
||||
|
||||
/* perf output buffer may not cross a 4GB boundary - with a separate va
|
||||
* smaller than that, it won't */
|
||||
/* perf output buffer may not cross a 4GB boundary */
|
||||
virt_size = u64_lo32(args->mapping_size);
|
||||
virt_addr_lo = u64_lo32(args->offset);
|
||||
virt_addr_hi = u64_hi32(args->offset);
|
||||
/* but check anyway */
|
||||
if (args->offset + virt_size > SZ_4G) {
|
||||
if (u64_hi32(args->offset) != u64_hi32(args->offset + virt_size)) {
|
||||
err = -EINVAL;
|
||||
goto fail_unmap;
|
||||
goto err_unmap;
|
||||
}
|
||||
|
||||
err = gk20a_busy(g);
|
||||
if (err) {
|
||||
nvgpu_err(g, "failed to poweron");
|
||||
goto fail_unmap;
|
||||
goto err_unmap;
|
||||
}
|
||||
|
||||
/* address and size are aligned to 32 bytes, the lowest bits read back
|
||||
@@ -1866,7 +1893,8 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
|
||||
gk20a_writel(g, perf_pmasys_outsize_r(), virt_size);
|
||||
|
||||
/* this field is aligned to 4K */
|
||||
inst_pa_page = gk20a_mm_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12;
|
||||
inst_pa_page = gk20a_mm_inst_block_addr(g,
|
||||
&mm->perfbuf.inst_block) >> 12;
|
||||
|
||||
/* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
|
||||
* should be written last */
|
||||
@@ -1877,23 +1905,24 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
|
||||
|
||||
gk20a_idle(g);
|
||||
|
||||
g->perfbuf.owner = dbg_s;
|
||||
g->perfbuf.offset = args->offset;
|
||||
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
||||
|
||||
return 0;
|
||||
|
||||
fail_unmap:
|
||||
gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset, NULL);
|
||||
err_unmap:
|
||||
gk20a_vm_unmap_buffer(vm, args->offset, NULL);
|
||||
err_remove_vm:
|
||||
gk20a_remove_vm(vm, &mm->perfbuf.inst_block);
|
||||
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
|
||||
struct nvgpu_dbg_gpu_perfbuf_unmap_args *args)
|
||||
/* must be called with dbg_sessions_lock held */
|
||||
static int gk20a_perfbuf_disable_locked(struct gk20a *g)
|
||||
{
|
||||
struct gk20a *g = dbg_s->g;
|
||||
int err;
|
||||
|
||||
if (!g->allow_all)
|
||||
return -EACCES;
|
||||
|
||||
err = gk20a_busy(g);
|
||||
int err = gk20a_busy(g);
|
||||
if (err) {
|
||||
nvgpu_err(g, "failed to poweron");
|
||||
return err;
|
||||
@@ -1911,11 +1940,45 @@ static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
|
||||
|
||||
gk20a_idle(g);
|
||||
|
||||
gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset)
|
||||
{
|
||||
struct mm_gk20a *mm = &g->mm;
|
||||
struct vm_gk20a *vm = &mm->perfbuf.vm;
|
||||
int err;
|
||||
|
||||
err = gk20a_perfbuf_disable_locked(g);
|
||||
|
||||
gk20a_vm_unmap_buffer(vm, offset, NULL);
|
||||
gk20a_remove_vm(vm, &mm->perfbuf.inst_block);
|
||||
|
||||
g->perfbuf.owner = NULL;
|
||||
g->perfbuf.offset = 0;
|
||||
return err;
|
||||
}
|
||||
|
||||
static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
|
||||
struct nvgpu_dbg_gpu_perfbuf_unmap_args *args)
|
||||
{
|
||||
struct gk20a *g = dbg_s->g;
|
||||
int err;
|
||||
|
||||
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
||||
if ((g->perfbuf.owner != dbg_s) ||
|
||||
(g->perfbuf.offset != args->offset)) {
|
||||
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = gk20a_perfbuf_release_locked(g, args->offset);
|
||||
|
||||
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void gk20a_init_dbg_session_ops(struct gpu_ops *gops)
|
||||
{
|
||||
gops->dbg_session_ops.exec_reg_ops = exec_regops_gk20a;
|
||||
|
||||
@@ -1027,6 +1027,12 @@ struct gk20a {
|
||||
struct nvgpu_dbg_gpu_reg_op *dbg_regops_tmp_buf;
|
||||
u32 dbg_regops_tmp_buf_ops;
|
||||
|
||||
/* For perfbuf mapping */
|
||||
struct {
|
||||
struct dbg_session_gk20a *owner;
|
||||
u64 offset;
|
||||
} perfbuf;
|
||||
|
||||
/* For profiler reservations */
|
||||
struct nvgpu_list_node profiler_objects;
|
||||
bool global_profiler_reservation_held;
|
||||
|
||||
@@ -361,6 +361,11 @@ struct mm_gk20a {
|
||||
struct nvgpu_mem inst_block;
|
||||
} hwpm;
|
||||
|
||||
struct {
|
||||
struct vm_gk20a vm;
|
||||
struct nvgpu_mem inst_block;
|
||||
} perfbuf;
|
||||
|
||||
struct {
|
||||
struct vm_gk20a vm;
|
||||
} cde;
|
||||
|
||||
Reference in New Issue
Block a user