diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index daed29670..5bee34fc4 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -1,7 +1,7 @@
/*
* Tegra GK20A GPU Debugger/Profiler Driver
*
- * Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2013-2015, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -29,6 +29,7 @@
#include "regops_gk20a.h"
#include "hw_therm_gk20a.h"
#include "hw_gr_gk20a.h"
+#include "hw_perf_gk20a.h"
struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a = {
.exec_reg_ops = exec_regops_gk20a,
@@ -370,6 +371,11 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args);
+static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
+ struct nvgpu_dbg_gpu_perfbuf_map_args *args);
+
+static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
+ struct nvgpu_dbg_gpu_perfbuf_unmap_args *args);
long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
@@ -436,6 +442,16 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
(struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf);
break;
+ case NVGPU_DBG_GPU_IOCTL_PERFBUF_MAP:
+ err = gk20a_perfbuf_map(dbg_s,
+ (struct nvgpu_dbg_gpu_perfbuf_map_args *)buf);
+ break;
+
+ case NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP:
+ err = gk20a_perfbuf_unmap(dbg_s,
+ (struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf);
+ break;
+
default:
gk20a_err(dev_from_gk20a(g),
"unrecognized dbg gpu ioctl cmd: 0x%x",
@@ -775,3 +791,80 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
mutex_unlock(&g->dbg_sessions_lock);
return err;
}
+
+static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
+ struct nvgpu_dbg_gpu_perfbuf_map_args *args)
+{
+ struct gk20a *g = dbg_s->g;
+ int err;
+ u32 virt_size;
+ u32 virt_addr_lo;
+ u32 virt_addr_hi;
+ u32 inst_pa_page;
+
+ if (!g->allow_all)
+ return -EACCES;
+
+ err = gk20a_vm_map_buffer(&g->mm.pmu.vm,
+ args->dmabuf_fd,
+ &args->offset,
+ 0,
+ 0,
+ 0,
+ args->mapping_size);
+ if (err)
+ return err;
+
+ /* perf output buffer may not cross a 4GB boundary - with a separate va
+ * smaller than that, it won't */
+ virt_size = u64_lo32(args->mapping_size);
+ virt_addr_lo = u64_lo32(args->offset);
+ virt_addr_hi = u64_hi32(args->offset);
+ /* but check anyway */
+ if (args->offset + virt_size > SZ_4G) {
+ gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset);
+ return -EINVAL;
+ }
+
+ /* address and size are aligned to 32 bytes, the lowest bits read back
+ * as zeros */
+ gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo);
+ gk20a_writel(g, perf_pmasys_outbaseupper_r(),
+ perf_pmasys_outbaseupper_ptr_f(virt_addr_hi));
+ gk20a_writel(g, perf_pmasys_outsize_r(), virt_size);
+
+ /* this field is aligned to 4K */
+ inst_pa_page = g->mm.hwpm.inst_block.cpu_pa >> 12;
+
+ /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
+ * should be written last */
+ gk20a_writel(g, perf_pmasys_mem_block_r(),
+ perf_pmasys_mem_block_base_f(inst_pa_page) |
+ perf_pmasys_mem_block_valid_true_f() |
+ perf_pmasys_mem_block_target_lfb_f());
+
+ return 0;
+}
+
+static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
+ struct nvgpu_dbg_gpu_perfbuf_unmap_args *args)
+{
+ struct gk20a *g = dbg_s->g;
+
+ if (!g->allow_all)
+ return -EACCES;
+
+ gk20a_writel(g, perf_pmasys_outbase_r(), 0);
+ gk20a_writel(g, perf_pmasys_outbaseupper_r(),
+ perf_pmasys_outbaseupper_ptr_f(0));
+ gk20a_writel(g, perf_pmasys_outsize_r(), 0);
+
+ gk20a_writel(g, perf_pmasys_mem_block_r(),
+ perf_pmasys_mem_block_base_f(0) |
+ perf_pmasys_mem_block_valid_false_f() |
+ perf_pmasys_mem_block_target_f(0));
+
+ gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset);
+
+ return 0;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/hw_perf_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_perf_gk20a.h
new file mode 100644
index 000000000..65d91de6c
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_perf_gk20a.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+/*
+ * Function naming determines intended use:
+ *
+ * _r(void) : Returns the offset for register .
+ *
+ * _o(void) : Returns the offset for element .
+ *
+ * _w(void) : Returns the word offset for word (4 byte) element .
+ *
+ * __s(void) : Returns size of field of register in bits.
+ *
+ * __f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field of register . This value
+ * can be |'d with others to produce a full register value for
+ * register .
+ *
+ * __m(void) : Returns a mask for field of register . This
+ * value can be ~'d and then &'d to clear the value of field for
+ * register .
+ *
+ * ___f(void) : Returns the constant value after being shifted
+ * to place it at field of register . This value can be |'d
+ * with others to produce a full register value for .
+ *
+ * __v(u32 r) : Returns the value of field from a full register
+ * value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field of register .
+ *
+ * ___v(void) : Returns the constant value for defined for
+ * field of register . This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field
+ * of register .
+ */
+#ifndef _hw_perf_gk20a_h_
+#define _hw_perf_gk20a_h_
+
+static inline u32 perf_pmasys_mem_block_r(void)
+{
+ return 0x001b4070;
+}
+static inline u32 perf_pmasys_mem_block_base_f(u32 v)
+{
+ return (v & 0xfffffff) << 0;
+}
+static inline u32 perf_pmasys_mem_block_target_f(u32 v)
+{
+ return (v & 0x3) << 28;
+}
+static inline u32 perf_pmasys_mem_block_target_v(u32 r)
+{
+ return (r >> 28) & 0x3;
+}
+static inline u32 perf_pmasys_mem_block_target_lfb_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 perf_pmasys_mem_block_target_lfb_f(void)
+{
+ return 0x0;
+}
+static inline u32 perf_pmasys_mem_block_valid_f(u32 v)
+{
+ return (v & 0x1) << 31;
+}
+static inline u32 perf_pmasys_mem_block_valid_v(u32 r)
+{
+ return (r >> 31) & 0x1;
+}
+static inline u32 perf_pmasys_mem_block_valid_true_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 perf_pmasys_mem_block_valid_true_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 perf_pmasys_mem_block_valid_false_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 perf_pmasys_mem_block_valid_false_f(void)
+{
+ return 0x0;
+}
+static inline u32 perf_pmasys_outbase_r(void)
+{
+ return 0x001b4074;
+}
+static inline u32 perf_pmasys_outbaseupper_r(void)
+{
+ return 0x001b4078;
+}
+static inline u32 perf_pmasys_outbaseupper_ptr_f(u32 v)
+{
+ return (v & 0xff) << 0;
+}
+static inline u32 perf_pmasys_outsize_r(void)
+{
+ return 0x001b407c;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 8d9488fd5..80c766b6f 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -101,6 +101,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
int rw_flag);
static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm);
static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm);
+static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm);
struct gk20a_dmabuf_priv {
@@ -280,6 +281,7 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm)
{
gk20a_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block);
gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block);
+ gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block);
}
int gk20a_init_mm_setup_sw(struct gk20a *g)
@@ -315,6 +317,10 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
if (err)
return err;
+ err = gk20a_init_hwpm(mm);
+ if (err)
+ return err;
+
/* set vm_alloc_share op here as gk20a_as_alloc_share needs it */
g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share;
mm->remove_support = gk20a_remove_mm_support;
@@ -2720,6 +2726,21 @@ clean_up_va:
return err;
}
+static int gk20a_init_hwpm(struct mm_gk20a *mm)
+{
+ int err;
+ struct vm_gk20a *vm = &mm->pmu.vm;
+ struct gk20a *g = gk20a_from_mm(mm);
+ struct inst_desc *inst_block = &mm->hwpm.inst_block;
+
+ err = gk20a_alloc_inst_block(g, inst_block);
+ if (err)
+ return err;
+ gk20a_init_inst_block(inst_block, vm, 0);
+
+ return 0;
+}
+
void gk20a_init_inst_block(struct inst_desc *inst_block, struct vm_gk20a *vm,
u32 big_page_size)
{
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 40e9488d6..7b3554368 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -342,6 +342,12 @@ struct mm_gk20a {
struct inst_desc inst_block;
} pmu;
+ struct {
+ /* using pmu vm currently */
+ struct inst_desc inst_block;
+ } hwpm;
+
+
struct mutex l2_op_lock;
void (*remove_support)(struct mm_gk20a *mm);
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index 1e4387754..ebeacf9bc 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -432,10 +432,26 @@ struct nvgpu_dbg_gpu_suspend_resume_all_sms_args {
#define NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS \
_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 6, struct nvgpu_dbg_gpu_suspend_resume_all_sms_args)
+struct nvgpu_dbg_gpu_perfbuf_map_args {
+ __u32 dmabuf_fd; /* in */
+ __u32 reserved;
+ __u64 mapping_size; /* in, size of mapped buffer region */
+ __u64 offset; /* out, virtual address of the mapping */
+};
+
+struct nvgpu_dbg_gpu_perfbuf_unmap_args {
+ __u64 offset;
+};
+
+#define NVGPU_DBG_GPU_IOCTL_PERFBUF_MAP \
+ _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 7, struct nvgpu_dbg_gpu_perfbuf_map_args)
+#define NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP \
+ _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 8, struct nvgpu_dbg_gpu_perfbuf_unmap_args)
+
#define NVGPU_DBG_GPU_IOCTL_LAST \
- _IOC_NR(NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS)
+ _IOC_NR(NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP)
#define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \
- sizeof(struct nvgpu_dbg_gpu_exec_reg_ops_args)
+ sizeof(struct nvgpu_dbg_gpu_perfbuf_map_args)
/*
* /dev/nvhost-gpu device