gpu: nvgpu: split perfbuf initialization

gk20a_perfbuf_map() allocates perfbuf VM, maps the user buffer into new VM, and then triggers gops.perfbuf.perfbuf_enable(). This HAL then does following : - Allocate perfbuf instance block - Initialize perfbuf instance block - Reset stream buffer - Program instance block address in PMA registers - Program user buffer address into PMA registers New profiler interface will have it's own API to setup PMA strem, and it requires above setup to be done in two phases of perfbuf initialization and then user buffer setup. Split above functionalities into below functions - nvgpu_perfbuf_init_vm() - Allocate perfbuf VM - Call gops.perfbuf.init_inst_block() to initialize perfbuf instance block - gops.perfbuf.init_inst_block() - Allocate perfbuf instance block - Initialize perfbuf instance block - Program instance block address in PMA registers using gops.perf.init_inst_block() - In case of vGPU, trigger TEGRA_VGPU_CMD_PERFBUF_INST_BLOCK_MGT command to gpu server - gops.perf.init_inst_block() - Reset stream buffer - Program user buffer address into PMA registers Also add corresponding cleanup functions as below : gops.perf.deinit_inst_block() gops.perfbuf.deinit_inst_block() nvgpu_perfbuf_deinit_vm() Bug 2510974 Jira NVGPU-5360 Change-Id: I486370f21012cbb7fea84fe46fb16db95bc16790 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2372984 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2020-05-20 12:03:30 +05:30
parent 12e71f22f8
commit f34711d3de
18 changed files with 188 additions and 57 deletions
--- a/drivers/gpu/nvgpu/common/perf/cyclestats_snapshot.c
+++ b/drivers/gpu/nvgpu/common/perf/cyclestats_snapshot.c
@@ -1,7 +1,7 @@
 /*
 * Cycle stats snapshots support
 *
- * Copyright (c) 2015-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -141,8 +141,8 @@ int nvgpu_css_enable_snapshot(struct nvgpu_channel *ch,
 	(void) memset(data->hw_snapshot, 0xff, snapshot_size);

 	g->ops.perf.membuf_reset_streaming(g);
-	g->ops.perf.enable_membuf(g, snapshot_size, data->hw_memdesc.gpu_va,
-				  &g->mm.hwpm.inst_block);
+	g->ops.perf.init_inst_block(g, &g->mm.hwpm.inst_block);
+	g->ops.perf.enable_membuf(g, snapshot_size, data->hw_memdesc.gpu_va);

 	nvgpu_log_info(g, "cyclestats: buffer for hardware snapshots enabled\n");

@@ -168,6 +168,7 @@ void nvgpu_css_disable_snapshot(struct gk20a *g)

 	g->ops.perf.membuf_reset_streaming(g);
 	g->ops.perf.disable_membuf(g);
+	g->ops.perf.deinit_inst_block(g);

 	nvgpu_dma_unmap_free(g->mm.pmu.vm, &data->hw_memdesc);
 	(void) memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc));
--- a/drivers/gpu/nvgpu/common/perf/perfbuf.c
+++ b/drivers/gpu/nvgpu/common/perf/perfbuf.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2018-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -28,7 +28,6 @@

 int nvgpu_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size)
 {
-	struct mm_gk20a *mm = &g->mm;
 	int err;

 	err = gk20a_busy(g);
@@ -37,15 +36,8 @@ int nvgpu_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size)
 		return err;
 	}

-	err = nvgpu_alloc_inst_block(g, &mm->perfbuf.inst_block);
-	if (err != 0) {
-		return err;
-	}
-
-	g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, mm->perfbuf.vm, 0);
-
 	g->ops.perf.membuf_reset_streaming(g);
-	g->ops.perf.enable_membuf(g, size, offset, &mm->perfbuf.inst_block);
+	g->ops.perf.enable_membuf(g, size, offset);

 	gk20a_idle(g);

@@ -67,3 +59,55 @@ int nvgpu_perfbuf_disable_locked(struct gk20a *g)

 	return 0;
 }
+
+int nvgpu_perfbuf_init_inst_block(struct gk20a *g)
+{
+	struct mm_gk20a *mm = &g->mm;
+	int err;
+
+	err = nvgpu_alloc_inst_block(g, &mm->perfbuf.inst_block);
+	if (err != 0) {
+		return err;
+	}
+
+	g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, mm->perfbuf.vm, 0);
+	g->ops.perf.init_inst_block(g, &mm->perfbuf.inst_block);
+
+	return 0;
+}
+
+int nvgpu_perfbuf_init_vm(struct gk20a *g)
+{
+	struct mm_gk20a *mm = &g->mm;
+	u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
+	int err;
+
+	mm->perfbuf.vm = nvgpu_vm_init(g, big_page_size,
+			big_page_size << 10,
+			NV_MM_DEFAULT_KERNEL_SIZE,
+			NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
+			false, false, false, "perfbuf");
+	if (mm->perfbuf.vm == NULL) {
+		return -ENOMEM;
+	}
+
+	err = g->ops.perfbuf.init_inst_block(g);
+	if (err != 0) {
+		nvgpu_vm_put(mm->perfbuf.vm);
+		return err;
+	}
+
+	return 0;
+}
+
+void nvgpu_perfbuf_deinit_inst_block(struct gk20a *g)
+{
+	g->ops.perf.deinit_inst_block(g);
+	nvgpu_free_inst_block(g, &g->mm.perfbuf.inst_block);
+}
+
+void nvgpu_perfbuf_deinit_vm(struct gk20a *g)
+{
+	g->ops.perfbuf.deinit_inst_block(g);
+	nvgpu_vm_put(g->mm.perfbuf.vm);
+}
--- a/drivers/gpu/nvgpu/common/vgpu/perf/perf_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/perf/perf_vgpu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -58,3 +58,35 @@ int vgpu_perfbuffer_disable(struct gk20a *g)
 {
 	return vgpu_sendrecv_perfbuf_cmd(g, 0, 0);
 }
+
+static int vgpu_sendrecv_perfbuf_inst_block_cmd(struct gk20a *g, u32 mode)
+{
+	struct mm_gk20a *mm = &g->mm;
+	struct vm_gk20a *vm = mm->perfbuf.vm;
+	struct tegra_vgpu_cmd_msg msg;
+	struct tegra_vgpu_perfbuf_inst_block_mgt_params *p =
+				&msg.params.perfbuf_inst_block_management;
+	int err;
+
+	msg.cmd = TEGRA_VGPU_CMD_PERFBUF_INST_BLOCK_MGT;
+	msg.handle = vgpu_get_handle(g);
+
+	p->vm_handle = vm->handle;
+	p->mode = mode;
+
+	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+	err = err ? err : msg.ret;
+	return err;
+}
+
+int vgpu_perfbuffer_init_inst_block(struct gk20a *g)
+{
+	return vgpu_sendrecv_perfbuf_inst_block_cmd(g,
+			TEGRA_VGPU_PROF_PERFBUF_INST_BLOCK_INIT);
+}
+
+void vgpu_perfbuffer_deinit_inst_block(struct gk20a *g)
+{
+	vgpu_sendrecv_perfbuf_inst_block_cmd(g,
+			TEGRA_VGPU_PROF_PERFBUF_INST_BLOCK_DEINIT);
+}
--- a/drivers/gpu/nvgpu/common/vgpu/perf/perf_vgpu.h
+++ b/drivers/gpu/nvgpu/common/vgpu/perf/perf_vgpu.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -24,3 +24,6 @@ struct gk20a;

 int vgpu_perfbuffer_enable(struct gk20a *g, u64 offset, u32 size);
 int vgpu_perfbuffer_disable(struct gk20a *g);
+
+int vgpu_perfbuffer_init_inst_block(struct gk20a *g);
+void vgpu_perfbuffer_deinit_inst_block(struct gk20a *g);
--- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
@@ -998,6 +998,8 @@ static const struct gpu_ops gm20b_ops = {
 	.perf = {
 		.enable_membuf = gm20b_perf_enable_membuf,
 		.disable_membuf = gm20b_perf_disable_membuf,
+		.init_inst_block = gm20b_perf_init_inst_block,
+		.deinit_inst_block = gm20b_perf_deinit_inst_block,
 		.membuf_reset_streaming = gm20b_perf_membuf_reset_streaming,
 		.get_membuf_pending_bytes = gm20b_perf_get_membuf_pending_bytes,
 		.set_membuf_handled_bytes = gm20b_perf_set_membuf_handled_bytes,
@@ -1009,6 +1011,8 @@ static const struct gpu_ops gm20b_ops = {
 	.perfbuf = {
 		.perfbuf_enable = nvgpu_perfbuf_enable_locked,
 		.perfbuf_disable = nvgpu_perfbuf_disable_locked,
+		.init_inst_block = nvgpu_perfbuf_init_inst_block,
+		.deinit_inst_block = nvgpu_perfbuf_deinit_inst_block,
 	},
 #endif
 #ifdef CONFIG_NVGPU_PROFILER
--- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
@@ -1100,6 +1100,8 @@ static const struct gpu_ops gp10b_ops = {
 	.perf = {
 		.enable_membuf = gm20b_perf_enable_membuf,
 		.disable_membuf = gm20b_perf_disable_membuf,
+		.init_inst_block = gm20b_perf_init_inst_block,
+		.deinit_inst_block = gm20b_perf_deinit_inst_block,
 		.membuf_reset_streaming = gm20b_perf_membuf_reset_streaming,
 		.get_membuf_pending_bytes = gm20b_perf_get_membuf_pending_bytes,
 		.set_membuf_handled_bytes = gm20b_perf_set_membuf_handled_bytes,
@@ -1111,6 +1113,8 @@ static const struct gpu_ops gp10b_ops = {
 	.perfbuf = {
 		.perfbuf_enable = nvgpu_perfbuf_enable_locked,
 		.perfbuf_disable = nvgpu_perfbuf_disable_locked,
+		.init_inst_block = nvgpu_perfbuf_init_inst_block,
+		.deinit_inst_block = nvgpu_perfbuf_deinit_inst_block,
 	},
 #endif
 #ifdef CONFIG_NVGPU_PROFILER
--- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
@@ -1361,6 +1361,8 @@ NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 8_7))
 	.perf = {
 		.enable_membuf = gv11b_perf_enable_membuf,
 		.disable_membuf = gv11b_perf_disable_membuf,
+		.init_inst_block = gv11b_perf_init_inst_block,
+		.deinit_inst_block = gv11b_perf_deinit_inst_block,
 		.membuf_reset_streaming = gv11b_perf_membuf_reset_streaming,
 		.get_membuf_pending_bytes = gv11b_perf_get_membuf_pending_bytes,
 		.set_membuf_handled_bytes = gv11b_perf_set_membuf_handled_bytes,
@@ -1372,6 +1374,8 @@ NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 8_7))
 	.perfbuf = {
 		.perfbuf_enable = nvgpu_perfbuf_enable_locked,
 		.perfbuf_disable = nvgpu_perfbuf_disable_locked,
+		.init_inst_block = nvgpu_perfbuf_init_inst_block,
+		.deinit_inst_block = nvgpu_perfbuf_deinit_inst_block,
 	},
 #endif
 #ifdef CONFIG_NVGPU_PROFILER
--- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
@@ -1404,6 +1404,8 @@ static const struct gpu_ops tu104_ops = {
 	.perf = {
 		.enable_membuf = gv11b_perf_enable_membuf,
 		.disable_membuf = gv11b_perf_disable_membuf,
+		.init_inst_block = gv11b_perf_init_inst_block,
+		.deinit_inst_block = gv11b_perf_deinit_inst_block,
 		.membuf_reset_streaming = gv11b_perf_membuf_reset_streaming,
 		.get_membuf_pending_bytes = gv11b_perf_get_membuf_pending_bytes,
 		.set_membuf_handled_bytes = gv11b_perf_set_membuf_handled_bytes,
@@ -1415,6 +1417,8 @@ static const struct gpu_ops tu104_ops = {
 	.perfbuf = {
 		.perfbuf_enable = nvgpu_perfbuf_enable_locked,
 		.perfbuf_disable = nvgpu_perfbuf_disable_locked,
+		.init_inst_block = nvgpu_perfbuf_init_inst_block,
+		.deinit_inst_block = nvgpu_perfbuf_deinit_inst_block,
 	},
 #endif
 #ifdef CONFIG_NVGPU_PROFILER
--- a/drivers/gpu/nvgpu/hal/perf/perf_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/perf/perf_gm20b.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2018-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -70,12 +70,10 @@ void gm20b_perf_membuf_reset_streaming(struct gk20a *g)
 	}
 }

-void gm20b_perf_enable_membuf(struct gk20a *g, u32 size,
-	u64 buf_addr, struct nvgpu_mem *inst_block)
+void gm20b_perf_enable_membuf(struct gk20a *g, u32 size, u64 buf_addr)
 {
 	u32 addr_lo;
 	u32 addr_hi;
-	u32 inst_block_ptr;

 	addr_lo = u64_lo32(buf_addr);
 	addr_hi = u64_hi32(buf_addr);
@@ -85,7 +83,19 @@ void gm20b_perf_enable_membuf(struct gk20a *g, u32 size,
 		perf_pmasys_outbaseupper_ptr_f(addr_hi));
 	nvgpu_writel(g, perf_pmasys_outsize_r(), size);

-	inst_block_ptr = nvgpu_inst_block_ptr(g, inst_block);
+}
+
+void gm20b_perf_disable_membuf(struct gk20a *g)
+{
+	nvgpu_writel(g, perf_pmasys_outbase_r(), 0);
+	nvgpu_writel(g, perf_pmasys_outbaseupper_r(),
+			perf_pmasys_outbaseupper_ptr_f(0));
+	nvgpu_writel(g, perf_pmasys_outsize_r(), 0);
+}
+
+void gm20b_perf_init_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
+{
+	u32 inst_block_ptr = nvgpu_inst_block_ptr(g, inst_block);

 	nvgpu_writel(g, perf_pmasys_mem_block_r(),
 		     perf_pmasys_mem_block_base_f(inst_block_ptr) |
@@ -96,13 +106,8 @@ void gm20b_perf_enable_membuf(struct gk20a *g, u32 size,
 				perf_pmasys_mem_block_target_lfb_f()));
 }

-void gm20b_perf_disable_membuf(struct gk20a *g)
+void gm20b_perf_deinit_inst_block(struct gk20a *g)
 {
-	nvgpu_writel(g, perf_pmasys_outbase_r(), 0);
-	nvgpu_writel(g, perf_pmasys_outbaseupper_r(),
-			perf_pmasys_outbaseupper_ptr_f(0));
-	nvgpu_writel(g, perf_pmasys_outsize_r(), 0);
-
 	nvgpu_writel(g, perf_pmasys_mem_block_r(),
 			perf_pmasys_mem_block_base_f(0) |
 			perf_pmasys_mem_block_valid_false_f() |
--- a/drivers/gpu/nvgpu/hal/perf/perf_gm20b.h
+++ b/drivers/gpu/nvgpu/hal/perf/perf_gm20b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2018-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -37,10 +37,12 @@ void gm20b_perf_set_membuf_handled_bytes(struct gk20a *g,

 void gm20b_perf_membuf_reset_streaming(struct gk20a *g);

-void gm20b_perf_enable_membuf(struct gk20a *g, u32 size,
-	u64 buf_addr, struct nvgpu_mem *inst_block);
+void gm20b_perf_enable_membuf(struct gk20a *g, u32 size, u64 buf_addr);
 void gm20b_perf_disable_membuf(struct gk20a *g);

+void gm20b_perf_init_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block);
+void gm20b_perf_deinit_inst_block(struct gk20a *g);
+
 u32 gm20b_perf_get_pmm_per_chiplet_offset(void);

 #endif /* CONFIG_NVGPU_DEBUGGER */
--- a/drivers/gpu/nvgpu/hal/perf/perf_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/perf/perf_gv11b.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2018-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -70,12 +70,10 @@ void gv11b_perf_membuf_reset_streaming(struct gk20a *g)
 	}
 }

-void gv11b_perf_enable_membuf(struct gk20a *g, u32 size,
-	u64 buf_addr, struct nvgpu_mem *inst_block)
+void gv11b_perf_enable_membuf(struct gk20a *g, u32 size, u64 buf_addr)
 {
 	u32 addr_lo;
 	u32 addr_hi;
-	u32 inst_block_ptr;

 	addr_lo = u64_lo32(buf_addr);
 	addr_hi = u64_hi32(buf_addr);
@@ -84,8 +82,19 @@ void gv11b_perf_enable_membuf(struct gk20a *g, u32 size,
 	nvgpu_writel(g, perf_pmasys_outbaseupper_r(),
 		perf_pmasys_outbaseupper_ptr_f(addr_hi));
 	nvgpu_writel(g, perf_pmasys_outsize_r(), size);
+}

-	inst_block_ptr = nvgpu_inst_block_ptr(g, inst_block);
+void gv11b_perf_disable_membuf(struct gk20a *g)
+{
+	nvgpu_writel(g, perf_pmasys_outbase_r(), 0);
+	nvgpu_writel(g, perf_pmasys_outbaseupper_r(),
+			perf_pmasys_outbaseupper_ptr_f(0));
+	nvgpu_writel(g, perf_pmasys_outsize_r(), 0);
+}
+
+void gv11b_perf_init_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
+{
+	u32 inst_block_ptr = nvgpu_inst_block_ptr(g, inst_block);

 	nvgpu_writel(g, perf_pmasys_mem_block_r(),
 		     perf_pmasys_mem_block_base_f(inst_block_ptr) |
@@ -96,13 +105,8 @@ void gv11b_perf_enable_membuf(struct gk20a *g, u32 size,
 				perf_pmasys_mem_block_target_lfb_f()));
 }

-void gv11b_perf_disable_membuf(struct gk20a *g)
+void gv11b_perf_deinit_inst_block(struct gk20a *g)
 {
-	nvgpu_writel(g, perf_pmasys_outbase_r(), 0);
-	nvgpu_writel(g, perf_pmasys_outbaseupper_r(),
-			perf_pmasys_outbaseupper_ptr_f(0));
-	nvgpu_writel(g, perf_pmasys_outsize_r(), 0);
-
 	nvgpu_writel(g, perf_pmasys_mem_block_r(),
 			perf_pmasys_mem_block_base_f(0) |
 			perf_pmasys_mem_block_valid_false_f() |
--- a/drivers/gpu/nvgpu/hal/perf/perf_gv11b.h
+++ b/drivers/gpu/nvgpu/hal/perf/perf_gv11b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2018-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -37,10 +37,12 @@ void gv11b_perf_set_membuf_handled_bytes(struct gk20a *g,

 void gv11b_perf_membuf_reset_streaming(struct gk20a *g);

-void gv11b_perf_enable_membuf(struct gk20a *g, u32 size,
-	u64 buf_addr, struct nvgpu_mem *inst_block);
+void gv11b_perf_enable_membuf(struct gk20a *g, u32 size, u64 buf_addr);
 void gv11b_perf_disable_membuf(struct gk20a *g);

+void gv11b_perf_init_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block);
+void gv11b_perf_deinit_inst_block(struct gk20a *g);
+
 u32 gv11b_perf_get_pmm_per_chiplet_offset(void);

 #endif /* CONFIG_NVGPU_DEBUGGER */
--- a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gp10b.c
@@ -784,6 +784,8 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 	.perfbuf = {
 		.perfbuf_enable = vgpu_perfbuffer_enable,
 		.perfbuf_disable = vgpu_perfbuffer_disable,
+		.init_inst_block = vgpu_perfbuffer_init_inst_block,
+		.deinit_inst_block = vgpu_perfbuffer_deinit_inst_block,
 	},
 #endif
 #ifdef CONFIG_NVGPU_PROFILER
--- a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c
@@ -910,6 +910,8 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 	.perfbuf = {
 		.perfbuf_enable = vgpu_perfbuffer_enable,
 		.perfbuf_disable = vgpu_perfbuffer_disable,
+		.init_inst_block = vgpu_perfbuffer_init_inst_block,
+		.deinit_inst_block = vgpu_perfbuffer_deinit_inst_block,
 	},
 #endif
 #ifdef CONFIG_NVGPU_PROFILER
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -443,9 +443,11 @@ struct gpu_ops {
 					bool disable_powergate);
 	} debugger;
 	struct {
-		void (*enable_membuf)(struct gk20a *g, u32 size,
-			u64 buf_addr, struct nvgpu_mem *inst_block);
+		void (*enable_membuf)(struct gk20a *g, u32 size, u64 buf_addr);
 		void (*disable_membuf)(struct gk20a *g);
+		void (*init_inst_block)(struct gk20a *g,
+			struct nvgpu_mem *inst_block);
+		void (*deinit_inst_block)(struct gk20a *g);
 		void (*membuf_reset_streaming)(struct gk20a *g);
 		u32 (*get_membuf_pending_bytes)(struct gk20a *g);
 		void (*set_membuf_handled_bytes)(struct gk20a *g,
@@ -456,6 +458,8 @@ struct gpu_ops {
 	struct {
 		int (*perfbuf_enable)(struct gk20a *g, u64 offset, u32 size);
 		int (*perfbuf_disable)(struct gk20a *g);
+		int (*init_inst_block)(struct gk20a *g);
+		void (*deinit_inst_block)(struct gk20a *g);
 	} perfbuf;
 #endif
 #ifdef CONFIG_NVGPU_PROFILER
--- a/drivers/gpu/nvgpu/include/nvgpu/perfbuf.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/perfbuf.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2018-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -32,5 +32,11 @@ struct gk20a;
 int nvgpu_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size);
 int nvgpu_perfbuf_disable_locked(struct gk20a *g);

+int nvgpu_perfbuf_init_vm(struct gk20a *g);
+void nvgpu_perfbuf_deinit_vm(struct gk20a *g);
+
+int nvgpu_perfbuf_init_inst_block(struct gk20a *g);
+void nvgpu_perfbuf_deinit_inst_block(struct gk20a *g);
+
 #endif /* CONFIG_NVGPU_DEBUGGER */
 #endif
--- a/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h
@@ -118,6 +118,7 @@ enum {
 	TEGRA_VGPU_CMD_GET_TPC_EXCEPTION_EN_STATUS = 87,
 	TEGRA_VGPU_CMD_FB_SET_MMU_DEBUG_MODE = 88,
 	TEGRA_VGPU_CMD_GR_SET_MMU_DEBUG_MODE = 89,
+	TEGRA_VGPU_CMD_PERFBUF_INST_BLOCK_MGT = 90,
 };

 struct tegra_vgpu_connect_params {
@@ -581,6 +582,16 @@ struct tegra_vgpu_perfbuf_mgt_params {
 	u32 size;
 };

+enum {
+	TEGRA_VGPU_PROF_PERFBUF_INST_BLOCK_INIT = 0,
+	TEGRA_VGPU_PROF_PERFBUF_INST_BLOCK_DEINIT,
+};
+
+struct tegra_vgpu_perfbuf_inst_block_mgt_params {
+	u64 vm_handle;
+	u32 mode;
+};
+
 #define TEGRA_VGPU_GPU_FREQ_TABLE_SIZE		25

 struct tegra_vgpu_get_gpu_freq_table_params {
@@ -701,6 +712,7 @@ struct tegra_vgpu_cmd_msg {
 		struct tegra_vgpu_get_tpc_exception_en_status_params get_tpc_exception_status;
 		struct tegra_vgpu_fb_set_mmu_debug_mode_params fb_set_mmu_debug_mode;
 		struct tegra_vgpu_gr_set_mmu_debug_mode_params gr_set_mmu_debug_mode;
+		struct tegra_vgpu_perfbuf_inst_block_mgt_params perfbuf_inst_block_management;
 		char padding[184];
 	} params;
 };
--- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
@@ -31,6 +31,7 @@
 #include <nvgpu/cond.h>
 #include <nvgpu/debugger.h>
 #include <nvgpu/profiler.h>
+#include <nvgpu/perfbuf.h>
 #include <nvgpu/utils.h>
 #include <nvgpu/mm.h>
 #include <nvgpu/gk20a.h>
@@ -1433,7 +1434,6 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
 	struct mm_gk20a *mm = &g->mm;
 	int err;
 	u32 virt_size;
-	u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size();

 	nvgpu_mutex_acquire(&g->dbg_sessions_lock);

@@ -1442,14 +1442,10 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
 		return -EBUSY;
 	}

-	mm->perfbuf.vm = nvgpu_vm_init(g, big_page_size,
-			big_page_size << 10,
-			NV_MM_DEFAULT_KERNEL_SIZE,
-			NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
-			false, false, false, "perfbuf");
-	if (!mm->perfbuf.vm) {
+	err = nvgpu_perfbuf_init_vm(g);
+	if (err) {
 		nvgpu_mutex_release(&g->dbg_sessions_lock);
-		return -ENOMEM;
+		return err;
 	}

 	err = nvgpu_vm_map_buffer(mm->perfbuf.vm,
@@ -1485,7 +1481,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
 err_unmap:
 	nvgpu_vm_unmap(mm->perfbuf.vm, args->offset, NULL);
 err_remove_vm:
-	nvgpu_vm_put(mm->perfbuf.vm);
+	nvgpu_perfbuf_deinit_vm(g);
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
 	return err;
 }
@@ -1712,8 +1708,8 @@ static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset)
 	err = g->ops.perfbuf.perfbuf_disable(g);

 	nvgpu_vm_unmap(vm, offset, NULL);
-	nvgpu_free_inst_block(g, &mm->perfbuf.inst_block);
-	nvgpu_vm_put(vm);
+
+	nvgpu_perfbuf_deinit_vm(g);

 	g->perfbuf.owner = NULL;
 	g->perfbuf.offset = 0;