gpu: nvgpu: vgpu: add b0cc profiler support

- added new commands to bind/unbind hwpm/hwpm_streamout/smpc - added new command to updat get/put for PMA buffer - tune function nvgpu_perfbuf_update_get_put so it could be reused on server side. - enable profiler v2 device for gv11b Jira GVSCI-10351 Signed-off-by: Richard Zhao <rizhao@nvidia.com> Change-Id: I4226c89ec3040e53dee5381ac8a30c9fd598e5ef Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2537683 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com> Reviewed-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2021-05-27 10:44:52 -07:00
parent a3c4236574
commit 1685a2404f
11 changed files with 392 additions and 10 deletions
--- a/arch/nvgpu-vgpu.yaml
+++ b/arch/nvgpu-vgpu.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2019, NVIDIA CORPORATION.  All Rights Reserved.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.  All Rights Reserved.
 #
 # vGPU architecture: currently there hasn't been much work done on
 # decomposing the vGPU architecture so all of the vGPU files are simply
@@ -67,6 +67,8 @@ all:
             common/vgpu/ptimer/ptimer_vgpu.h,
             common/vgpu/init/init_vgpu.c,
             common/vgpu/init/init_vgpu.h,
+             common/vgpu/profiler/profiler_vgpu.c,
+             common/vgpu/profiler/profiler_vgpu.h,
             include/nvgpu/vgpu/tegra_vgpu.h,
             include/nvgpu/vgpu/vgpu.h,
             include/nvgpu/vgpu/vgpu_ivc.h,
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -622,6 +622,7 @@ nvgpu-$(CONFIG_NVGPU_GR_VIRTUALIZATION) += \
 	common/vgpu/debugger_vgpu.o \
 	common/vgpu/pm_reservation_vgpu.o \
 	common/vgpu/perf/perf_vgpu.o \
+	common/vgpu/profiler/profiler_vgpu.o \
 	common/vgpu/ecc_vgpu.o \
 	common/vgpu/clk_vgpu.o \
 	common/vgpu/gr/fecs_trace_vgpu.o \
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -569,6 +569,7 @@ srcs += common/vgpu/init/init_vgpu.c \
 	common/vgpu/fifo/runlist_vgpu.c \
 	common/vgpu/fifo/ramfc_vgpu.c \
 	common/vgpu/perf/perf_vgpu.c \
+	common/vgpu/profiler/profiler_vgpu.c \
 	common/vgpu/mm/mm_vgpu.c \
 	common/vgpu/mm/vm_vgpu.c \
 	common/vgpu/gr/gr_vgpu.c \
--- a/drivers/gpu/nvgpu/common/perf/perfbuf.c
+++ b/drivers/gpu/nvgpu/common/perf/perfbuf.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -152,7 +152,7 @@ int nvgpu_perfbuf_update_get_put(struct gk20a *g, u64 bytes_consumed,
 	bool update_available_bytes = (bytes_available == NULL) ? false : true;
 	volatile u32 *available_bytes_va = (u32 *)cpuva;

-	if (update_available_bytes) {
+	if (update_available_bytes && available_bytes_va != NULL) {
 		*available_bytes_va = 0xffffffff;
 	}

@@ -162,7 +162,7 @@ int nvgpu_perfbuf_update_get_put(struct gk20a *g, u64 bytes_consumed,
 		return err;
 	}

-	if (update_available_bytes && wait) {
+	if (update_available_bytes && wait && available_bytes_va != NULL) {
 		err = nvgpu_timeout_init(g, &timeout, 10000, NVGPU_TIMER_CPU_TIMER);
 		if (err != 0) {
 			nvgpu_err(g, "nvgpu_timeout_init() failed err=%d", err);
@@ -178,6 +178,7 @@ int nvgpu_perfbuf_update_get_put(struct gk20a *g, u64 bytes_consumed,
 		} while (nvgpu_timeout_expired(&timeout) == 0);

 		if (*available_bytes_va == 0xffffffff) {
+			nvgpu_err(g, "perfbuf update get put timed out");
 			return -ETIMEDOUT;
 		}

--- a/drivers/gpu/nvgpu/common/profiler/profiler.c
+++ b/drivers/gpu/nvgpu/common/profiler/profiler.c
@@ -458,12 +458,10 @@ static int nvgpu_profiler_quiesce_hwpm_streamout_resident(struct gk20a *g,
 	/* Disable streamout */
 	g->ops.perf.pma_stream_enable(g, false);

-	if (pma_bytes_available_buffer_cpuva != NULL) {
-		/* wait for all the inflight records from fb-hub to stream out */
-		err = nvgpu_perfbuf_update_get_put(g, 0U, &bytes_available,
+	/* wait for all the inflight records from fb-hub to stream out */
+	err = nvgpu_perfbuf_update_get_put(g, 0U, &bytes_available,
 				pma_bytes_available_buffer_cpuva, true,
 				NULL, NULL);
-	}

 fail:
 	if (err != 0) {
--- a/drivers/gpu/nvgpu/common/vgpu/perf/perf_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/perf/perf_vgpu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -90,3 +90,33 @@ void vgpu_perfbuffer_deinit_inst_block(struct gk20a *g)
 	vgpu_sendrecv_perfbuf_inst_block_cmd(g,
 			TEGRA_VGPU_PROF_PERFBUF_INST_BLOCK_DEINIT);
 }
+
+int vgpu_perf_update_get_put(struct gk20a *g, u64 bytes_consumed,
+		bool update_available_bytes, u64 *put_ptr,
+		bool *overflowed)
+{
+	struct tegra_vgpu_cmd_msg msg = {};
+	struct tegra_vgpu_perf_update_get_put_params *p =
+				&msg.params.perf_updat_get_put;
+	int err;
+
+	msg.cmd = TEGRA_VGPU_CMD_PERF_UPDATE_GET_PUT;
+	msg.handle = vgpu_get_handle(g);
+
+	p->bytes_consumed = bytes_consumed;
+	p->update_available_bytes = (u8)update_available_bytes;
+
+	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+	err = err ? err : msg.ret;
+
+	if (err == 0) {
+		if (put_ptr != NULL) {
+			*put_ptr = p->put_ptr;
+		}
+		if (overflowed != NULL) {
+			*overflowed = (bool)p->overflowed;
+		}
+	}
+
+	return err;
+}
--- a/drivers/gpu/nvgpu/common/vgpu/perf/perf_vgpu.h
+++ b/drivers/gpu/nvgpu/common/vgpu/perf/perf_vgpu.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -20,6 +20,9 @@
 * DEALINGS IN THE SOFTWARE.
 */

+#ifndef NVGPU_PERF_VGPU_H
+#define NVGPU_PERF_VGPU_H
+
 struct gk20a;

 int vgpu_perfbuffer_enable(struct gk20a *g, u64 offset, u32 size);
@@ -27,3 +30,9 @@ int vgpu_perfbuffer_disable(struct gk20a *g);

 int vgpu_perfbuffer_init_inst_block(struct gk20a *g);
 void vgpu_perfbuffer_deinit_inst_block(struct gk20a *g);
+
+int vgpu_perf_update_get_put(struct gk20a *g, u64 bytes_consumed,
+		bool update_available_bytes, u64 *put_ptr,
+		bool *overflowed);
+
+#endif
--- a/drivers/gpu/nvgpu/common/vgpu/profiler/profiler_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/profiler/profiler_vgpu.c
@@ -0,0 +1,200 @@
+/*
+ * Tegra GPU Virtualization Interfaces to Server
+ *
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/tsg.h>
+#include <nvgpu/pm_reservation.h>
+#include <nvgpu/vgpu/vgpu.h>
+#include <nvgpu/vgpu/tegra_vgpu.h>
+#include <nvgpu/gr/ctx.h>
+#include <nvgpu/gk20a.h>
+
+#include "common/vgpu/ivc/comm_vgpu.h"
+#include "profiler_vgpu.h"
+
+int vgpu_profiler_bind_hwpm(struct gk20a *g,
+		u32 gr_instance_id,
+		bool is_ctxsw,
+		struct nvgpu_tsg *tsg)
+{
+	struct tegra_vgpu_cmd_msg msg = {};
+	struct tegra_vgpu_prof_bind_unbind_params *p =
+			&msg.params.prof_bind_unbind;
+	int err;
+
+	nvgpu_assert(gr_instance_id == 0U);
+
+	if (is_ctxsw) {
+		err = g->ops.gr.update_hwpm_ctxsw_mode(g, gr_instance_id,
+				tsg, 0, NVGPU_GR_CTX_HWPM_CTXSW_MODE_CTXSW);
+		if (err != 0) {
+			return err;
+		}
+	}
+
+	msg.cmd = TEGRA_VGPU_CMD_PROF_BIND_UNBIND;
+	msg.handle = vgpu_get_handle(g);
+
+	p->subcmd = TEGRA_VGPU_PROF_BIND_HWPM;
+	p->is_ctxsw = is_ctxsw;
+	p->tsg_id = tsg != NULL ? tsg->tsgid : NVGPU_INVALID_TSG_ID;
+
+	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+	err = err ? err : msg.ret;
+	return err;
+}
+
+int vgpu_profiler_unbind_hwpm(struct gk20a *g,
+		u32 gr_instance_id,
+		bool is_ctxsw,
+		struct nvgpu_tsg *tsg)
+{
+	struct tegra_vgpu_cmd_msg msg = {};
+	struct tegra_vgpu_prof_bind_unbind_params *p =
+			&msg.params.prof_bind_unbind;
+	int err;
+
+	nvgpu_assert(gr_instance_id == 0U);
+
+	msg.cmd = TEGRA_VGPU_CMD_PROF_BIND_UNBIND;
+	msg.handle = vgpu_get_handle(g);
+
+	p->subcmd = TEGRA_VGPU_PROF_UNBIND_HWPM;
+	p->is_ctxsw = is_ctxsw;
+	p->tsg_id = tsg != NULL ? tsg->tsgid : NVGPU_INVALID_TSG_ID;
+
+	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+	err = err ? err : msg.ret;
+	return err;
+}
+
+int vgpu_profiler_bind_hwpm_streamout(struct gk20a *g,
+		u32 gr_instance_id,
+		bool is_ctxsw,
+		struct nvgpu_tsg *tsg,
+		u64 pma_buffer_va,
+		u32 pma_buffer_size,
+		u64 pma_bytes_available_buffer_va)
+{
+	struct tegra_vgpu_cmd_msg msg = {};
+	struct tegra_vgpu_prof_bind_unbind_params *p =
+			&msg.params.prof_bind_unbind;
+	int err;
+
+	nvgpu_assert(gr_instance_id == 0U);
+
+	if (is_ctxsw) {
+		err = g->ops.gr.update_hwpm_ctxsw_mode(g, gr_instance_id,
+				tsg, 0,
+				NVGPU_GR_CTX_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW);
+		if (err != 0) {
+			return err;
+		}
+	}
+
+	msg.cmd = TEGRA_VGPU_CMD_PROF_BIND_UNBIND;
+	msg.handle = vgpu_get_handle(g);
+
+	p->subcmd = TEGRA_VGPU_PROF_BIND_HWPM_STREAMOUT;
+	p->is_ctxsw = is_ctxsw;
+	p->tsg_id = tsg != NULL ? tsg->tsgid : NVGPU_INVALID_TSG_ID;
+	p->pma_buffer_va = pma_buffer_va;
+	p->pma_buffer_size = pma_buffer_size;
+	p->pma_bytes_available_buffer_va = pma_bytes_available_buffer_va;
+
+	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+	err = err ? err : msg.ret;
+	return err;
+}
+
+int vgpu_profiler_unbind_hwpm_streamout(struct gk20a *g,
+		u32 gr_instance_id,
+		bool is_ctxsw,
+		struct nvgpu_tsg *tsg,
+		void *pma_bytes_available_buffer_cpuva,
+		bool smpc_reserved)
+{
+	struct tegra_vgpu_cmd_msg msg = {};
+	struct tegra_vgpu_prof_bind_unbind_params *p =
+			&msg.params.prof_bind_unbind;
+	int err;
+
+	nvgpu_assert(gr_instance_id == 0U);
+
+	msg.cmd = TEGRA_VGPU_CMD_PROF_BIND_UNBIND;
+	msg.handle = vgpu_get_handle(g);
+
+	p->subcmd = TEGRA_VGPU_PROF_UNBIND_HWPM_STREAMOUT;
+	p->is_ctxsw = is_ctxsw;
+	p->tsg_id = tsg != NULL ? tsg->tsgid : NVGPU_INVALID_TSG_ID;
+	p->smpc_reserved = (u8)smpc_reserved;
+
+	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+	err = err ? err : msg.ret;
+	return err;
+}
+
+int vgpu_profiler_bind_smpc(struct gk20a *g,
+		u32 gr_instance_id,
+		bool is_ctxsw,
+		struct nvgpu_tsg *tsg)
+{
+	struct tegra_vgpu_cmd_msg msg = {};
+	struct tegra_vgpu_prof_bind_unbind_params *p =
+			&msg.params.prof_bind_unbind;
+	int err;
+
+	nvgpu_assert(gr_instance_id == 0U);
+
+	msg.cmd = TEGRA_VGPU_CMD_PROF_BIND_UNBIND;
+	msg.handle = vgpu_get_handle(g);
+
+	p->subcmd = TEGRA_VGPU_PROF_BIND_SMPC;
+	p->is_ctxsw = is_ctxsw;
+	p->tsg_id = tsg != NULL ? tsg->tsgid : NVGPU_INVALID_TSG_ID;
+
+	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+	err = err ? err : msg.ret;
+	return err;
+}
+
+int vgpu_profiler_unbind_smpc(struct gk20a *g,
+		bool is_ctxsw,
+		struct nvgpu_tsg *tsg)
+{
+	struct tegra_vgpu_cmd_msg msg = {};
+	struct tegra_vgpu_prof_bind_unbind_params *p =
+			&msg.params.prof_bind_unbind;
+	int err;
+
+	msg.cmd = TEGRA_VGPU_CMD_PROF_BIND_UNBIND;
+	msg.handle = vgpu_get_handle(g);
+
+	p->subcmd = TEGRA_VGPU_PROF_UNBIND_SMPC;
+	p->is_ctxsw = is_ctxsw;
+	p->tsg_id = tsg != NULL ? tsg->tsgid : NVGPU_INVALID_TSG_ID;
+
+	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+	err = err ? err : msg.ret;
+	return err;
+}
--- a/drivers/gpu/nvgpu/common/vgpu/profiler/profiler_vgpu.h
+++ b/drivers/gpu/nvgpu/common/vgpu/profiler/profiler_vgpu.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NVGPU_PROFILER_VGPU_H
+#define NVGPU_PROFILER_VGPU_H
+
+int vgpu_profiler_bind_hwpm(struct gk20a *g,
+		u32 gr_instance_id,
+		bool is_ctxsw,
+		struct nvgpu_tsg *tsg);
+
+int vgpu_profiler_unbind_hwpm(struct gk20a *g,
+		u32 gr_instance_id,
+		bool is_ctxsw,
+		struct nvgpu_tsg *tsg);
+
+int vgpu_profiler_bind_hwpm_streamout(struct gk20a *g,
+		u32 gr_instance_id,
+		bool is_ctxsw,
+		struct nvgpu_tsg *tsg,
+		u64 pma_buffer_va,
+		u32 pma_buffer_size,
+		u64 pma_bytes_available_buffer_va);
+
+int vgpu_profiler_unbind_hwpm_streamout(struct gk20a *g,
+		u32 gr_instance_id,
+		bool is_ctxsw,
+		struct nvgpu_tsg *tsg,
+		void *pma_bytes_available_buffer_cpuva,
+		bool smpc_reserved);
+
+int vgpu_profiler_bind_smpc(struct gk20a *g,
+		u32 gr_instance_id,
+		bool is_ctxsw,
+		struct nvgpu_tsg *tsg);
+
+int vgpu_profiler_unbind_smpc(struct gk20a *g,
+		bool is_ctxsw,
+		struct nvgpu_tsg *tsg);
+
+#endif
--- a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c
@@ -85,6 +85,11 @@
 #include "hal/init/hal_gv11b.h"
 #include "hal/init/hal_gv11b_litter.h"
 #include "hal/fifo/channel_gv11b.h"
+#ifdef CONFIG_NVGPU_DEBUGGER
+#include "hal/regops/regops_gv11b.h"
+#include "hal/regops/allowlist_gv11b.h"
+#endif
+#include "hal/ptimer/ptimer_gv11b.h"

 #include "hal/vgpu/fifo/fifo_gv11b_vgpu.h"
 #include "hal/vgpu/sync/syncpt_cmdbuf_gv11b_vgpu.h"
@@ -109,6 +114,7 @@
 #include <nvgpu/therm.h>
 #include <nvgpu/clk_arb.h>
 #include <nvgpu/grmgr.h>
+#include <nvgpu/perfbuf.h>

 #include "common/vgpu/init/init_vgpu.h"
 #include "common/vgpu/fb/fb_vgpu.h"
@@ -132,6 +138,7 @@
 #include "common/vgpu/gr/fecs_trace_vgpu.h"
 #include "common/vgpu/perf/cyclestats_snapshot_vgpu.h"
 #include "common/vgpu/ptimer/ptimer_vgpu.h"
+#include "common/vgpu/profiler/profiler_vgpu.h"
 #include "vgpu_hal_gv11b.h"

 #include <nvgpu/debugger.h>
@@ -835,6 +842,29 @@ static const struct gops_regops vgpu_gv11b_ops_regops = {
 	.get_context_whitelist_ranges_count = gv11b_get_context_whitelist_ranges_count,
 	.get_runcontrol_whitelist = gv11b_get_runcontrol_whitelist,
 	.get_runcontrol_whitelist_count = gv11b_get_runcontrol_whitelist_count,
+	.get_hwpm_perfmon_register_stride = gv11b_get_hwpm_perfmon_register_stride,
+	.get_hwpm_router_register_stride = gv11b_get_hwpm_router_register_stride,
+	.get_hwpm_pma_channel_register_stride = gv11b_get_hwpm_pma_channel_register_stride,
+	.get_hwpm_pma_trigger_register_stride = gv11b_get_hwpm_pma_trigger_register_stride,
+	.get_smpc_register_stride = gv11b_get_smpc_register_stride,
+	.get_cau_register_stride = NULL,
+	.get_hwpm_perfmon_register_offset_allowlist =
+		gv11b_get_hwpm_perfmon_register_offset_allowlist,
+	.get_hwpm_router_register_offset_allowlist =
+		gv11b_get_hwpm_router_register_offset_allowlist,
+	.get_hwpm_pma_channel_register_offset_allowlist =
+		gv11b_get_hwpm_pma_channel_register_offset_allowlist,
+	.get_hwpm_pma_trigger_register_offset_allowlist =
+		gv11b_get_hwpm_pma_trigger_register_offset_allowlist,
+	.get_smpc_register_offset_allowlist = gv11b_get_smpc_register_offset_allowlist,
+	.get_cau_register_offset_allowlist = NULL,
+	.get_hwpm_perfmon_register_ranges = gv11b_get_hwpm_perfmon_register_ranges,
+	.get_hwpm_router_register_ranges = gv11b_get_hwpm_router_register_ranges,
+	.get_hwpm_pma_channel_register_ranges = gv11b_get_hwpm_pma_channel_register_ranges,
+	.get_hwpm_pma_trigger_register_ranges = gv11b_get_hwpm_pma_trigger_register_ranges,
+	.get_smpc_register_ranges = gv11b_get_smpc_register_ranges,
+	.get_cau_register_ranges = NULL,
+	.get_hwpm_perfmux_register_ranges = gv11b_get_hwpm_perfmux_register_ranges,
 };
 #endif

@@ -879,6 +909,7 @@ static const struct gops_perf vgpu_gv11b_ops_perf = {
 	.get_pmmsys_per_chiplet_offset = gv11b_perf_get_pmmsys_per_chiplet_offset,
 	.get_pmmgpc_per_chiplet_offset = gv11b_perf_get_pmmgpc_per_chiplet_offset,
 	.get_pmmfbp_per_chiplet_offset = gv11b_perf_get_pmmfbp_per_chiplet_offset,
+	.update_get_put = vgpu_perf_update_get_put,
 };
 #endif

@@ -888,6 +919,7 @@ static const struct gops_perfbuf vgpu_gv11b_ops_perfbuf = {
 	.perfbuf_disable = vgpu_perfbuffer_disable,
 	.init_inst_block = vgpu_perfbuffer_init_inst_block,
 	.deinit_inst_block = vgpu_perfbuffer_deinit_inst_block,
+	.update_get_put = nvgpu_perfbuf_update_get_put,
 };
 #endif

@@ -899,6 +931,17 @@ static const struct gops_pm_reservation vgpu_gv11b_ops_pm_reservation = {
 };
 #endif

+#ifdef CONFIG_NVGPU_PROFILER
+static const struct gops_profiler vgpu_gv11b_ops_profiler = {
+	.bind_hwpm = vgpu_profiler_bind_hwpm,
+	.unbind_hwpm = vgpu_profiler_unbind_hwpm,
+	.bind_hwpm_streamout = vgpu_profiler_bind_hwpm_streamout,
+	.unbind_hwpm_streamout = vgpu_profiler_unbind_hwpm_streamout,
+	.bind_smpc = vgpu_profiler_bind_smpc,
+	.unbind_smpc = vgpu_profiler_unbind_smpc,
+};
+#endif
+
 static const struct gops_bus vgpu_gv11b_ops_bus = {
 	.init_hw = NULL,
 	.isr = NULL,
@@ -915,6 +958,9 @@ static const struct gops_ptimer vgpu_gv11b_ops_ptimer = {
 #ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
 	.get_timestamps_zipper = vgpu_get_timestamps_zipper,
 #endif
+#ifdef CONFIG_NVGPU_PROFILER
+	.get_timer_reg_offsets = gv11b_ptimer_get_timer_reg_offsets,
+#endif
 };

 #if defined(CONFIG_NVGPU_CYCLESTATS)
@@ -1051,6 +1097,7 @@ int vgpu_gv11b_init_hal(struct gk20a *g)
 #endif
 #ifdef CONFIG_NVGPU_PROFILER
 	gops->pm_reservation = vgpu_gv11b_ops_pm_reservation;
+	gops->profiler = vgpu_gv11b_ops_profiler;
 #endif
 	gops->bus = vgpu_gv11b_ops_bus;
 	gops->ptimer = vgpu_gv11b_ops_ptimer;
@@ -1071,6 +1118,10 @@ int vgpu_gv11b_init_hal(struct gk20a *g)
 #ifdef CONFIG_NVGPU_FECS_TRACE
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);
 #endif
+#ifdef CONFIG_NVGPU_PROFILER
+	nvgpu_set_enabled(g, NVGPU_SUPPORT_PROFILER_V2_DEVICE, true);
+	nvgpu_set_enabled(g, NVGPU_SUPPORT_PROFILER_V2_CONTEXT, false);
+#endif

 	/* Lone functions */
 	gops->chip_init_gpu_characteristics = vgpu_gv11b_init_gpu_characteristics;
--- a/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h
@@ -121,6 +121,8 @@ enum {
 	TEGRA_VGPU_CMD_TSG_SET_LONG_TIMESLICE = 91,
 	TEGRA_VGPU_CMD_TSG_GET_L2_MAX_WAYS_EVICT_LAST = 92,
 	TEGRA_VGPU_CMD_TSG_SET_L2_MAX_WAYS_EVICT_LAST = 93,
+	TEGRA_VGPU_CMD_PROF_BIND_UNBIND = 94,
+	TEGRA_VGPU_CMD_PERF_UPDATE_GET_PUT = 95,
 };

 struct tegra_vgpu_connect_params {
@@ -658,6 +660,32 @@ struct tegra_vgpu_l2_max_ways_evict_last_params {
 	u32 num_ways;
 };

+enum {
+	TEGRA_VGPU_PROF_BIND_HWPM = 0,
+	TEGRA_VGPU_PROF_UNBIND_HWPM = 1,
+	TEGRA_VGPU_PROF_BIND_HWPM_STREAMOUT = 2,
+	TEGRA_VGPU_PROF_UNBIND_HWPM_STREAMOUT = 3,
+	TEGRA_VGPU_PROF_BIND_SMPC = 4,
+	TEGRA_VGPU_PROF_UNBIND_SMPC = 5,
+};
+
+struct tegra_vgpu_prof_bind_unbind_params {
+	u32 subcmd;
+	u8 is_ctxsw;
+	u8 smpc_reserved;
+	u32 tsg_id;
+	u32 pma_buffer_size;
+	u64 pma_buffer_va;
+	u64 pma_bytes_available_buffer_va;
+};
+
+struct tegra_vgpu_perf_update_get_put_params {
+	u64 bytes_consumed;
+	u64 put_ptr;
+	u8 update_available_bytes;
+	u8 overflowed;
+};
+
 struct tegra_vgpu_cmd_msg {
 	u32 cmd;
 	int ret;
@@ -725,6 +753,8 @@ struct tegra_vgpu_cmd_msg {
 		struct tegra_vgpu_gr_set_mmu_debug_mode_params gr_set_mmu_debug_mode;
 		struct tegra_vgpu_perfbuf_inst_block_mgt_params perfbuf_inst_block_management;
 		struct tegra_vgpu_l2_max_ways_evict_last_params l2_max_ways_evict_last;
+		struct tegra_vgpu_prof_bind_unbind_params prof_bind_unbind;
+		struct tegra_vgpu_perf_update_get_put_params perf_updat_get_put;
 		char padding[184];
 	} params;
 };