mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 01:50:07 +03:00
gpu: nvgpu: add hw perfmon buffer mapping ioctls
Map/unmap buffers for HWPM and deal with its instance block, the minimum work required to run the HWPM via regops from userspace. Bug 1517458 Bug 1573150 Change-Id: If14086a88b54bf434843d7c2fee8a9113023a3b0 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/673689 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
Dan Willemsen
parent
f93a8cc36b
commit
3877adcd65
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Tegra GK20A GPU Debugger/Profiler Driver
|
||||
*
|
||||
* Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2013-2015, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
@@ -29,6 +29,7 @@
|
||||
#include "regops_gk20a.h"
|
||||
#include "hw_therm_gk20a.h"
|
||||
#include "hw_gr_gk20a.h"
|
||||
#include "hw_perf_gk20a.h"
|
||||
|
||||
struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a = {
|
||||
.exec_reg_ops = exec_regops_gk20a,
|
||||
@@ -370,6 +371,11 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
|
||||
struct dbg_session_gk20a *dbg_s,
|
||||
struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args);
|
||||
|
||||
static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
|
||||
struct nvgpu_dbg_gpu_perfbuf_map_args *args);
|
||||
|
||||
static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
|
||||
struct nvgpu_dbg_gpu_perfbuf_unmap_args *args);
|
||||
|
||||
long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
@@ -436,6 +442,16 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
|
||||
(struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf);
|
||||
break;
|
||||
|
||||
case NVGPU_DBG_GPU_IOCTL_PERFBUF_MAP:
|
||||
err = gk20a_perfbuf_map(dbg_s,
|
||||
(struct nvgpu_dbg_gpu_perfbuf_map_args *)buf);
|
||||
break;
|
||||
|
||||
case NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP:
|
||||
err = gk20a_perfbuf_unmap(dbg_s,
|
||||
(struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf);
|
||||
break;
|
||||
|
||||
default:
|
||||
gk20a_err(dev_from_gk20a(g),
|
||||
"unrecognized dbg gpu ioctl cmd: 0x%x",
|
||||
@@ -775,3 +791,80 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
|
||||
mutex_unlock(&g->dbg_sessions_lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
|
||||
struct nvgpu_dbg_gpu_perfbuf_map_args *args)
|
||||
{
|
||||
struct gk20a *g = dbg_s->g;
|
||||
int err;
|
||||
u32 virt_size;
|
||||
u32 virt_addr_lo;
|
||||
u32 virt_addr_hi;
|
||||
u32 inst_pa_page;
|
||||
|
||||
if (!g->allow_all)
|
||||
return -EACCES;
|
||||
|
||||
err = gk20a_vm_map_buffer(&g->mm.pmu.vm,
|
||||
args->dmabuf_fd,
|
||||
&args->offset,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
args->mapping_size);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* perf output buffer may not cross a 4GB boundary - with a separate va
|
||||
* smaller than that, it won't */
|
||||
virt_size = u64_lo32(args->mapping_size);
|
||||
virt_addr_lo = u64_lo32(args->offset);
|
||||
virt_addr_hi = u64_hi32(args->offset);
|
||||
/* but check anyway */
|
||||
if (args->offset + virt_size > SZ_4G) {
|
||||
gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* address and size are aligned to 32 bytes, the lowest bits read back
|
||||
* as zeros */
|
||||
gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo);
|
||||
gk20a_writel(g, perf_pmasys_outbaseupper_r(),
|
||||
perf_pmasys_outbaseupper_ptr_f(virt_addr_hi));
|
||||
gk20a_writel(g, perf_pmasys_outsize_r(), virt_size);
|
||||
|
||||
/* this field is aligned to 4K */
|
||||
inst_pa_page = g->mm.hwpm.inst_block.cpu_pa >> 12;
|
||||
|
||||
/* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
|
||||
* should be written last */
|
||||
gk20a_writel(g, perf_pmasys_mem_block_r(),
|
||||
perf_pmasys_mem_block_base_f(inst_pa_page) |
|
||||
perf_pmasys_mem_block_valid_true_f() |
|
||||
perf_pmasys_mem_block_target_lfb_f());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
|
||||
struct nvgpu_dbg_gpu_perfbuf_unmap_args *args)
|
||||
{
|
||||
struct gk20a *g = dbg_s->g;
|
||||
|
||||
if (!g->allow_all)
|
||||
return -EACCES;
|
||||
|
||||
gk20a_writel(g, perf_pmasys_outbase_r(), 0);
|
||||
gk20a_writel(g, perf_pmasys_outbaseupper_r(),
|
||||
perf_pmasys_outbaseupper_ptr_f(0));
|
||||
gk20a_writel(g, perf_pmasys_outsize_r(), 0);
|
||||
|
||||
gk20a_writel(g, perf_pmasys_mem_block_r(),
|
||||
perf_pmasys_mem_block_base_f(0) |
|
||||
perf_pmasys_mem_block_valid_false_f() |
|
||||
perf_pmasys_mem_block_target_f(0));
|
||||
|
||||
gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
117
drivers/gpu/nvgpu/gk20a/hw_perf_gk20a.h
Normal file
117
drivers/gpu/nvgpu/gk20a/hw_perf_gk20a.h
Normal file
@@ -0,0 +1,117 @@
|
||||
/*
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
/*
|
||||
* Function naming determines intended use:
|
||||
*
|
||||
* <x>_r(void) : Returns the offset for register <x>.
|
||||
*
|
||||
* <x>_o(void) : Returns the offset for element <x>.
|
||||
*
|
||||
* <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
|
||||
*
|
||||
* <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
|
||||
*
|
||||
* <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
|
||||
* and masked to place it at field <y> of register <x>. This value
|
||||
* can be |'d with others to produce a full register value for
|
||||
* register <x>.
|
||||
*
|
||||
* <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
|
||||
* value can be ~'d and then &'d to clear the value of field <y> for
|
||||
* register <x>.
|
||||
*
|
||||
* <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
|
||||
* to place it at field <y> of register <x>. This value can be |'d
|
||||
* with others to produce a full register value for <x>.
|
||||
*
|
||||
* <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
|
||||
* <x> value 'r' after being shifted to place its LSB at bit 0.
|
||||
* This value is suitable for direct comparison with other unshifted
|
||||
* values appropriate for use in field <y> of register <x>.
|
||||
*
|
||||
* <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
|
||||
* field <y> of register <x>. This value is suitable for direct
|
||||
* comparison with unshifted values appropriate for use in field <y>
|
||||
* of register <x>.
|
||||
*/
|
||||
#ifndef _hw_perf_gk20a_h_
|
||||
#define _hw_perf_gk20a_h_
|
||||
|
||||
static inline u32 perf_pmasys_mem_block_r(void)
|
||||
{
|
||||
return 0x001b4070;
|
||||
}
|
||||
static inline u32 perf_pmasys_mem_block_base_f(u32 v)
|
||||
{
|
||||
return (v & 0xfffffff) << 0;
|
||||
}
|
||||
static inline u32 perf_pmasys_mem_block_target_f(u32 v)
|
||||
{
|
||||
return (v & 0x3) << 28;
|
||||
}
|
||||
static inline u32 perf_pmasys_mem_block_target_v(u32 r)
|
||||
{
|
||||
return (r >> 28) & 0x3;
|
||||
}
|
||||
static inline u32 perf_pmasys_mem_block_target_lfb_v(void)
|
||||
{
|
||||
return 0x00000000;
|
||||
}
|
||||
static inline u32 perf_pmasys_mem_block_target_lfb_f(void)
|
||||
{
|
||||
return 0x0;
|
||||
}
|
||||
static inline u32 perf_pmasys_mem_block_valid_f(u32 v)
|
||||
{
|
||||
return (v & 0x1) << 31;
|
||||
}
|
||||
static inline u32 perf_pmasys_mem_block_valid_v(u32 r)
|
||||
{
|
||||
return (r >> 31) & 0x1;
|
||||
}
|
||||
static inline u32 perf_pmasys_mem_block_valid_true_v(void)
|
||||
{
|
||||
return 0x00000001;
|
||||
}
|
||||
static inline u32 perf_pmasys_mem_block_valid_true_f(void)
|
||||
{
|
||||
return 0x80000000;
|
||||
}
|
||||
static inline u32 perf_pmasys_mem_block_valid_false_v(void)
|
||||
{
|
||||
return 0x00000000;
|
||||
}
|
||||
static inline u32 perf_pmasys_mem_block_valid_false_f(void)
|
||||
{
|
||||
return 0x0;
|
||||
}
|
||||
static inline u32 perf_pmasys_outbase_r(void)
|
||||
{
|
||||
return 0x001b4074;
|
||||
}
|
||||
static inline u32 perf_pmasys_outbaseupper_r(void)
|
||||
{
|
||||
return 0x001b4078;
|
||||
}
|
||||
static inline u32 perf_pmasys_outbaseupper_ptr_f(u32 v)
|
||||
{
|
||||
return (v & 0xff) << 0;
|
||||
}
|
||||
static inline u32 perf_pmasys_outsize_r(void)
|
||||
{
|
||||
return 0x001b407c;
|
||||
}
|
||||
#endif
|
||||
@@ -101,6 +101,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
|
||||
int rw_flag);
|
||||
static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm);
|
||||
static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm);
|
||||
static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm);
|
||||
|
||||
|
||||
struct gk20a_dmabuf_priv {
|
||||
@@ -280,6 +281,7 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm)
|
||||
{
|
||||
gk20a_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block);
|
||||
gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block);
|
||||
gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block);
|
||||
}
|
||||
|
||||
int gk20a_init_mm_setup_sw(struct gk20a *g)
|
||||
@@ -315,6 +317,10 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = gk20a_init_hwpm(mm);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* set vm_alloc_share op here as gk20a_as_alloc_share needs it */
|
||||
g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share;
|
||||
mm->remove_support = gk20a_remove_mm_support;
|
||||
@@ -2720,6 +2726,21 @@ clean_up_va:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int gk20a_init_hwpm(struct mm_gk20a *mm)
|
||||
{
|
||||
int err;
|
||||
struct vm_gk20a *vm = &mm->pmu.vm;
|
||||
struct gk20a *g = gk20a_from_mm(mm);
|
||||
struct inst_desc *inst_block = &mm->hwpm.inst_block;
|
||||
|
||||
err = gk20a_alloc_inst_block(g, inst_block);
|
||||
if (err)
|
||||
return err;
|
||||
gk20a_init_inst_block(inst_block, vm, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void gk20a_init_inst_block(struct inst_desc *inst_block, struct vm_gk20a *vm,
|
||||
u32 big_page_size)
|
||||
{
|
||||
|
||||
@@ -342,6 +342,12 @@ struct mm_gk20a {
|
||||
struct inst_desc inst_block;
|
||||
} pmu;
|
||||
|
||||
struct {
|
||||
/* using pmu vm currently */
|
||||
struct inst_desc inst_block;
|
||||
} hwpm;
|
||||
|
||||
|
||||
struct mutex l2_op_lock;
|
||||
|
||||
void (*remove_support)(struct mm_gk20a *mm);
|
||||
|
||||
@@ -432,10 +432,26 @@ struct nvgpu_dbg_gpu_suspend_resume_all_sms_args {
|
||||
#define NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS \
|
||||
_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 6, struct nvgpu_dbg_gpu_suspend_resume_all_sms_args)
|
||||
|
||||
struct nvgpu_dbg_gpu_perfbuf_map_args {
|
||||
__u32 dmabuf_fd; /* in */
|
||||
__u32 reserved;
|
||||
__u64 mapping_size; /* in, size of mapped buffer region */
|
||||
__u64 offset; /* out, virtual address of the mapping */
|
||||
};
|
||||
|
||||
struct nvgpu_dbg_gpu_perfbuf_unmap_args {
|
||||
__u64 offset;
|
||||
};
|
||||
|
||||
#define NVGPU_DBG_GPU_IOCTL_PERFBUF_MAP \
|
||||
_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 7, struct nvgpu_dbg_gpu_perfbuf_map_args)
|
||||
#define NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP \
|
||||
_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 8, struct nvgpu_dbg_gpu_perfbuf_unmap_args)
|
||||
|
||||
#define NVGPU_DBG_GPU_IOCTL_LAST \
|
||||
_IOC_NR(NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS)
|
||||
_IOC_NR(NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP)
|
||||
#define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \
|
||||
sizeof(struct nvgpu_dbg_gpu_exec_reg_ops_args)
|
||||
sizeof(struct nvgpu_dbg_gpu_perfbuf_map_args)
|
||||
|
||||
/*
|
||||
* /dev/nvhost-gpu device
|
||||
|
||||
Reference in New Issue
Block a user