From 869735cda475c050a561e447ebbed59096d57f11 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Thu, 5 Nov 2020 17:46:44 +0530
Subject: [PATCH] gpu: nvgpu: add dynamic allowlist support

Add gv11b and tu104 HALs to get allowed  HWPM resource register ranges,
offsets, and stride meta data.

Add new enum nvgpu_pm_resource_hwpm_register_type for HWPM register
type. Add new struct nvgpu_pm_resource_register_range_map to store all
the register ranges for HWPM resources. Add pointer of map in struct
nvgpu_profiler_object along with map entry count.

Add new API nvgpu_profiler_build_regops_allowlist() to build the regops
allowlist dynamically while binding the resources. Map entry count is
received with get_pm_resource_register_range_map_entry_count() and only
those resource ranges are added for which resource is reserved by
profiler object.

Add nvgpu_profiler_destroy_regops_allowlist() to destroy the allowlist
while unbinding the resources.

Add static functions allowlist_range_search() to search a register
offset in HWPM resource ranges. Add another static function
allowlist_offset_search() to search the offset in per-resource offset
list.

Add nvgpu_profiler_validate_regops_allowlist() that accepts an offset
value, checks if it is in allowed ranges using allowlist_range_search()
and then checks if offset is in allowlist using allowlist_offset_search().

Update gops.regops.exec_regops() to receive profiler object pointer as
a parameter.

Invoke nvgpu_profiler_validate_regops_allowlist() from
validate_reg_ops() if prof pointer is not-null. This will be true only
for new profiler stack and not legacy profilers.

In gr_exec_ctx_ops(), skip regops execution if offset is invalid.

Bug 2510974
Jira NVGPU-5360

Change-Id: I40acb91cc37508629c83106ea15b062250bba473
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2460001
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com>
Reviewed-by: Antony Clince Alex <aalex@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/common/profiler/profiler.c  | 304 ++++++++++++++++++
 drivers/gpu/nvgpu/common/regops/regops.c      |  53 ++-
 drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.c |   1 +
 drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.h |   1 +
 drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c        |   9 +
 drivers/gpu/nvgpu/hal/init/hal_gv11b.c        |  24 ++
 drivers/gpu/nvgpu/hal/init/hal_tu104.c        |  24 ++
 .../gpu/nvgpu/include/nvgpu/gops/debugger.h   |  27 ++
 drivers/gpu/nvgpu/include/nvgpu/profiler.h    |  14 +
 drivers/gpu/nvgpu/include/nvgpu/regops.h      |   2 +
 .../nvgpu/include/nvgpu/regops_allowlist.h    |  16 +
 drivers/gpu/nvgpu/os/linux/ioctl_dbg.c        |   2 +-
 drivers/gpu/nvgpu/os/linux/ioctl_prof.c       |   2 +-
 13 files changed, 472 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/profiler/profiler.c b/drivers/gpu/nvgpu/common/profiler/profiler.c
index 43b7a5592..7a34e07e7 100644
--- a/drivers/gpu/nvgpu/common/profiler/profiler.c
+++ b/drivers/gpu/nvgpu/common/profiler/profiler.c
@@ -32,6 +32,11 @@
 #include <nvgpu/gr/ctx.h>
 #include <nvgpu/perfbuf.h>
 #include <nvgpu/gr/gr.h>
+#include <nvgpu/regops_allowlist.h>
+#include <nvgpu/sort.h>
+
+static int nvgpu_profiler_build_regops_allowlist(struct nvgpu_profiler_object *prof);
+static void nvgpu_profiler_destroy_regops_allowlist(struct nvgpu_profiler_object *prof);
 
 static nvgpu_atomic_t unique_id = NVGPU_ATOMIC_INIT(0);
 static int generate_unique_id(void)
@@ -576,8 +581,19 @@ int nvgpu_profiler_bind_pm_resources(struct nvgpu_profiler_object *prof)
 			"SMPC bound with profiler handle %u", prof->prof_handle);
 	}
 
+	err = nvgpu_profiler_build_regops_allowlist(prof);
+	if (err != 0) {
+		nvgpu_err(g, "failed to build allowlist");
+		goto fail_unbind;
+	}
+
 	prof->bound = true;
 
+	gk20a_idle(g);
+	return 0;
+
+fail_unbind:
+	nvgpu_profiler_unbind_pm_resources(prof);
 fail:
 	gk20a_idle(g);
 	return err;
@@ -594,6 +610,8 @@ int nvgpu_profiler_unbind_pm_resources(struct nvgpu_profiler_object *prof)
 		return -EINVAL;
 	}
 
+	nvgpu_profiler_destroy_regops_allowlist(prof);
+
 	err = gk20a_busy(g);
 	if (err) {
 		nvgpu_err(g, "failed to poweron");
@@ -681,3 +699,289 @@ void nvgpu_profiler_free_pma_stream(struct nvgpu_profiler_object *prof)
 				NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM);
 	}
 }
+
+static int map_cmp(const void *a, const void *b)
+{
+	const struct nvgpu_pm_resource_register_range_map *e1;
+	const struct nvgpu_pm_resource_register_range_map *e2;
+
+	e1 = (const struct nvgpu_pm_resource_register_range_map *)a;
+	e2 = (const struct nvgpu_pm_resource_register_range_map *)b;
+
+	if (e1->start < e2->start) {
+		return -1;
+	}
+
+	if (e1->start > e2->start) {
+		return 1;
+	}
+
+	return 0;
+}
+
+static u32 get_pm_resource_register_range_map_entry_count(struct nvgpu_profiler_object *prof)
+{
+	struct gk20a *g = prof->g;
+	u32 count = 0U;
+	u32 range_count;
+
+	if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC]) {
+		g->ops.regops.get_smpc_register_ranges(&range_count);
+		count += range_count;
+	}
+
+	if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY]) {
+		g->ops.regops.get_hwpm_perfmon_register_ranges(&range_count);
+		count += range_count;
+
+		g->ops.regops.get_hwpm_router_register_ranges(&range_count);
+		count += range_count;
+
+		g->ops.regops.get_hwpm_pma_trigger_register_ranges(&range_count);
+		count += range_count;
+
+		g->ops.regops.get_hwpm_perfmux_register_ranges(&range_count);
+		count += range_count;
+
+		if (g->ops.regops.get_cau_register_ranges != NULL) {
+			g->ops.regops.get_cau_register_ranges(&range_count);
+			count += range_count;
+		}
+	}
+
+	if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM]) {
+		g->ops.regops.get_hwpm_pma_channel_register_ranges(&range_count);
+		count += range_count;
+	}
+
+	return count;
+}
+
+static void add_range_to_map(const struct nvgpu_pm_resource_register_range *range,
+		u32 range_count, struct nvgpu_pm_resource_register_range_map *map,
+		u32 *map_index, enum nvgpu_pm_resource_hwpm_register_type type)
+{
+	u32 index = *map_index;
+	u32 i;
+
+	for (i = 0U; i < range_count; i++) {
+		map[index].start = range[i].start;
+		map[index].end = range[i].end;
+		map[index].type = type;
+		index++;
+	}
+
+	*map_index = index;
+}
+
+static int nvgpu_profiler_build_regops_allowlist(struct nvgpu_profiler_object *prof)
+{
+	struct nvgpu_pm_resource_register_range_map *map;
+	const struct nvgpu_pm_resource_register_range *range;
+	u32 map_count, map_index = 0U;
+	u32 range_count;
+	struct gk20a *g = prof->g;
+	u32 i;
+
+	map_count = get_pm_resource_register_range_map_entry_count(prof);
+	if (map_count == 0U) {
+		return -EINVAL;
+	}
+
+	nvgpu_log(g, gpu_dbg_prof, "Allowlist map number of entries %u for handle %u",
+		map_count, prof->prof_handle);
+
+	map = nvgpu_kzalloc(g, sizeof(*map) * map_count);
+	if (map == NULL) {
+		return -ENOMEM;
+	}
+
+	if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC]) {
+		range = g->ops.regops.get_smpc_register_ranges(&range_count);
+		add_range_to_map(range, range_count, map, &map_index,
+			NVGPU_HWPM_REGISTER_TYPE_SMPC);
+	}
+
+	if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY]) {
+		range = g->ops.regops.get_hwpm_perfmon_register_ranges(&range_count);
+		add_range_to_map(range, range_count, map, &map_index,
+			NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMON);
+
+		range = g->ops.regops.get_hwpm_router_register_ranges(&range_count);
+		add_range_to_map(range, range_count, map, &map_index,
+			NVGPU_HWPM_REGISTER_TYPE_HWPM_ROUTER);
+
+		range = g->ops.regops.get_hwpm_pma_trigger_register_ranges(&range_count);
+		add_range_to_map(range, range_count, map, &map_index,
+			NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_TRIGGER);
+
+		range = g->ops.regops.get_hwpm_perfmux_register_ranges(&range_count);
+		add_range_to_map(range, range_count, map, &map_index,
+			NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMUX);
+
+		if (g->ops.regops.get_cau_register_ranges != NULL) {
+			range = g->ops.regops.get_cau_register_ranges(&range_count);
+			add_range_to_map(range, range_count, map, &map_index,
+				NVGPU_HWPM_REGISTER_TYPE_CAU);
+		}
+	}
+
+	if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM]) {
+		range = g->ops.regops.get_hwpm_pma_channel_register_ranges(&range_count);
+		add_range_to_map(range, range_count, map, &map_index,
+			NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_CHANNEL);
+	}
+
+	nvgpu_log(g, gpu_dbg_prof, "Allowlist map created successfully for handle %u",
+		prof->prof_handle);
+
+	nvgpu_assert(map_count == map_index);
+
+	sort(map, map_count, sizeof(*map), map_cmp, NULL);
+
+	for (i = 0; i < map_count; i++) {
+		nvgpu_log(g, gpu_dbg_prof, "allowlist[%u]: 0x%x-0x%x : type %u",
+			i, map[i].start, map[i].end, map[i].type);
+	}
+
+	prof->map = map;
+	prof->map_count = map_count;
+	return 0;
+}
+
+static void nvgpu_profiler_destroy_regops_allowlist(struct nvgpu_profiler_object *prof)
+{
+	nvgpu_log(prof->g, gpu_dbg_prof, "Allowlist map destroy for handle %u",
+		prof->prof_handle);
+
+	nvgpu_kfree(prof->g, prof->map);
+}
+
+static bool allowlist_range_search(struct gk20a *g,
+		struct nvgpu_pm_resource_register_range_map *map,
+		u32 map_count, u32 offset,
+		enum nvgpu_pm_resource_hwpm_register_type *type)
+{
+	u32 start = 0U;
+	u32 mid = 0U;
+	u32 end = map_count - 1U;
+	bool found = false;
+
+	while (start <= end) {
+		mid = (start + end) / 2U;
+
+		if (offset < map[mid].start) {
+			end = mid - 1U;
+		} else if (offset > map[mid].end) {
+			start = mid + 1U;
+		} else {
+			found = true;
+			break;
+		}
+	}
+
+	if (found) {
+		*type = map[mid].type;
+		nvgpu_log(g, gpu_dbg_prof, "Offset 0x%x found in range 0x%x-0x%x, type: %u",
+			offset, map[mid].start, map[mid].end, map[mid].type);
+	} else {
+		nvgpu_log(g, gpu_dbg_prof, "Offset 0x%x not found in range search", offset);
+	}
+
+	return found;
+}
+
+static bool allowlist_offset_search(struct gk20a *g,
+		const u32 *offset_allowlist, u32 count, u32 offset)
+{
+	u32 start = 0U;
+	u32 mid = 0U;
+	u32 end = count - 1U;
+	bool found = false;
+
+	while (start <= end) {
+		mid = (start + end) / 2U;
+		if (offset_allowlist[mid] == offset) {
+			found = true;
+			break;
+		}
+
+		if (offset < offset_allowlist[mid]) {
+			end = mid - 1U;
+		} else {
+			start = mid + 1U;
+		}
+	}
+
+	if (found) {
+		nvgpu_log(g, gpu_dbg_prof, "Offset 0x%x found in offset allowlist",
+			offset);
+	} else {
+		nvgpu_log(g, gpu_dbg_prof, "Offset 0x%x not found in offset allowlist",
+			offset);
+	}
+
+	return found;
+}
+
+bool nvgpu_profiler_validate_regops_allowlist(struct nvgpu_profiler_object *prof,
+		u32 offset, enum nvgpu_pm_resource_hwpm_register_type *type)
+{
+	enum nvgpu_pm_resource_hwpm_register_type reg_type;
+	struct gk20a *g = prof->g;
+	const u32 *offset_allowlist;
+	u32 count;
+	u32 stride;
+	bool found;
+
+	found = allowlist_range_search(g, prof->map, prof->map_count, offset, &reg_type);
+	if (!found) {
+		return found;
+	}
+
+	if (type != NULL) {
+		*type = reg_type;
+	}
+
+	if (reg_type == NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMUX) {
+		return found;
+	}
+
+	switch ((u32)reg_type) {
+	case NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMON:
+		offset_allowlist = g->ops.regops.get_hwpm_perfmon_register_offset_allowlist(&count);
+		stride = g->ops.regops.get_hwpm_perfmon_register_stride();
+		break;
+
+	case NVGPU_HWPM_REGISTER_TYPE_HWPM_ROUTER:
+		offset_allowlist = g->ops.regops.get_hwpm_router_register_offset_allowlist(&count);
+		stride = g->ops.regops.get_hwpm_router_register_stride();
+		break;
+
+	case NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_TRIGGER:
+		offset_allowlist = g->ops.regops.get_hwpm_pma_trigger_register_offset_allowlist(&count);
+		stride = g->ops.regops.get_hwpm_pma_trigger_register_stride();
+		break;
+
+	case NVGPU_HWPM_REGISTER_TYPE_SMPC:
+		offset_allowlist = g->ops.regops.get_smpc_register_offset_allowlist(&count);
+		stride = g->ops.regops.get_smpc_register_stride();
+		break;
+
+	case NVGPU_HWPM_REGISTER_TYPE_CAU:
+		offset_allowlist = g->ops.regops.get_cau_register_offset_allowlist(&count);
+		stride = g->ops.regops.get_cau_register_stride();
+		break;
+
+	case NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_CHANNEL:
+		offset_allowlist = g->ops.regops.get_hwpm_pma_channel_register_offset_allowlist(&count);
+		stride = g->ops.regops.get_hwpm_pma_channel_register_stride();
+		break;
+
+	default:
+		return false;
+	}
+
+	offset = offset & (stride - 1U);
+	return allowlist_offset_search(g, offset_allowlist, count, offset);
+}
diff --git a/drivers/gpu/nvgpu/common/regops/regops.c b/drivers/gpu/nvgpu/common/regops/regops.c
index feb4b6067..f71e4f6cd 100644
--- a/drivers/gpu/nvgpu/common/regops/regops.c
+++ b/drivers/gpu/nvgpu/common/regops/regops.c
@@ -30,6 +30,7 @@
 #include <nvgpu/regops.h>
 #include <nvgpu/gr/obj_ctx.h>
 #include <nvgpu/gr/gr_utils.h>
+#include <nvgpu/profiler.h>
 
 /* Access ctx buffer offset functions in gr_gk20a.h */
 #include "hal/gr/gr/gr_gk20a.h"
@@ -78,6 +79,7 @@ static bool gr_context_info_available(struct gk20a *g)
 }
 
 static bool validate_reg_ops(struct gk20a *g,
+			    struct nvgpu_profiler_object *prof,
 			    u32 *ctx_rd_count, u32 *ctx_wr_count,
 			    struct nvgpu_dbg_reg_op *ops,
 			    u32 op_count,
@@ -86,6 +88,7 @@ static bool validate_reg_ops(struct gk20a *g,
 
 int exec_regops_gk20a(struct gk20a *g,
 		      struct nvgpu_tsg *tsg,
+		      struct nvgpu_profiler_object *prof,
 		      struct nvgpu_dbg_reg_op *ops,
 		      u32 num_ops,
 		      u32 *flags)
@@ -99,7 +102,7 @@ int exec_regops_gk20a(struct gk20a *g,
 
 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
 
-	ok = validate_reg_ops(g, &ctx_rd_count, &ctx_wr_count,
+	ok = validate_reg_ops(g, prof, &ctx_rd_count, &ctx_wr_count,
 		ops, num_ops, tsg != NULL, flags);
 	if (!ok) {
 		nvgpu_err(g, "invalid op(s)");
@@ -318,6 +321,36 @@ static bool check_whitelists(struct gk20a *g,
 	return valid;
 }
 
+static int profiler_obj_validate_reg_op_offset(struct nvgpu_profiler_object *prof,
+		struct nvgpu_dbg_reg_op *op)
+{
+	struct gk20a *g = prof->g;
+	bool valid = false;
+	u32 offset;
+
+	op->status = 0;
+	offset = op->offset;
+
+	/* support only 24-bit 4-byte aligned offsets */
+	if ((offset & 0xFF000003U) != 0U) {
+		nvgpu_err(g, "invalid regop offset: 0x%x", offset);
+		op->status |= REGOP(STATUS_INVALID_OFFSET);
+		return -EINVAL;
+	}
+
+	valid = nvgpu_profiler_validate_regops_allowlist(prof, offset, NULL);
+	if ((op->op == REGOP(READ_64) || op->op == REGOP(WRITE_64)) && valid) {
+		valid = nvgpu_profiler_validate_regops_allowlist(prof, offset + 4U, NULL);
+	}
+
+	if (!valid) {
+		op->status |= REGOP(STATUS_INVALID_OFFSET);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 /* note: the op here has already been through validate_reg_op_info */
 static int validate_reg_op_offset(struct gk20a *g,
 				  struct nvgpu_dbg_reg_op *op,
@@ -377,6 +410,7 @@ static int validate_reg_op_offset(struct gk20a *g,
 }
 
 static bool validate_reg_ops(struct gk20a *g,
+			    struct nvgpu_profiler_object *prof,
 			    u32 *ctx_rd_count, u32 *ctx_wr_count,
 			    struct nvgpu_dbg_reg_op *ops,
 			    u32 op_count,
@@ -419,10 +453,19 @@ static bool validate_reg_ops(struct gk20a *g,
 
 		/* if "allow_all" flag enabled, dont validate offset */
 		if (!g->allow_all) {
-			if (validate_reg_op_offset(g, &ops[i], valid_ctx) != 0) {
-				op_failed = true;
-				if (all_or_none) {
-					break;
+			if (prof != NULL) {
+				if (profiler_obj_validate_reg_op_offset(prof, &ops[i]) != 0) {
+					op_failed = true;
+					if (all_or_none) {
+						break;
+					}
+				}
+			} else {
+				if (validate_reg_op_offset(g, &ops[i], valid_ctx) != 0) {
+					op_failed = true;
+					if (all_or_none) {
+						break;
+					}
 				}
 			}
 		}
diff --git a/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.c
index 2f3d9f754..86c327493 100644
--- a/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.c
@@ -36,6 +36,7 @@
 
 int vgpu_exec_regops(struct gk20a *g,
 		      struct nvgpu_tsg *tsg,
+		      struct nvgpu_profiler_object *prof,
 		      struct nvgpu_dbg_reg_op *ops,
 		      u32 num_ops,
 		      u32 *flags)
diff --git a/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.h
index ebe274c8e..bf805e236 100644
--- a/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.h
+++ b/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.h
@@ -31,6 +31,7 @@ struct nvgpu_channel;
 
 int vgpu_exec_regops(struct gk20a *g,
 		      struct nvgpu_tsg *tsg,
+		      struct nvgpu_profiler_object *prof,
 		      struct nvgpu_dbg_reg_op *ops,
 		      u32 num_ops,
 		      u32 *flags);
diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c
index f2e69d146..866c4f68c 100644
--- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c
@@ -1489,6 +1489,15 @@ static int gr_exec_ctx_ops(struct nvgpu_tsg *tsg,
 					break;
 			}
 
+			/*
+			 * Move to next op if current op is invalid.
+			 * Execution will reach here only if CONTINUE_ON_ERROR
+			 * mode is requested.
+			 */
+			if (ctx_ops[i].status != REGOP(STATUS_SUCCESS)) {
+				continue;
+			}
+
 			/* only do ctx ops and only on the right pass */
 			if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) ||
 			    (((pass == 0) && reg_op_is_read(ctx_ops[i].op)) ||
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
index a2caee408..ccf209bc4 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
@@ -89,6 +89,7 @@
 #include "hal/ptimer/ptimer_gp10b.h"
 #ifdef CONFIG_NVGPU_DEBUGGER
 #include "hal/regops/regops_gv11b.h"
+#include "hal/regops/allowlist_gv11b.h"
 #endif
 #ifdef CONFIG_NVGPU_RECOVERY
 #include "hal/rc/rc_gv11b.h"
@@ -1169,6 +1170,29 @@ static const struct gops_regops gv11b_ops_regops = {
 	.get_context_whitelist_ranges_count = gv11b_get_context_whitelist_ranges_count,
 	.get_runcontrol_whitelist = gv11b_get_runcontrol_whitelist,
 	.get_runcontrol_whitelist_count = gv11b_get_runcontrol_whitelist_count,
+	.get_hwpm_perfmon_register_stride = gv11b_get_hwpm_perfmon_register_stride,
+	.get_hwpm_router_register_stride = gv11b_get_hwpm_router_register_stride,
+	.get_hwpm_pma_channel_register_stride = gv11b_get_hwpm_pma_channel_register_stride,
+	.get_hwpm_pma_trigger_register_stride = gv11b_get_hwpm_pma_trigger_register_stride,
+	.get_smpc_register_stride = gv11b_get_smpc_register_stride,
+	.get_cau_register_stride = NULL,
+	.get_hwpm_perfmon_register_offset_allowlist =
+		gv11b_get_hwpm_perfmon_register_offset_allowlist,
+	.get_hwpm_router_register_offset_allowlist =
+		gv11b_get_hwpm_router_register_offset_allowlist,
+	.get_hwpm_pma_channel_register_offset_allowlist =
+		gv11b_get_hwpm_pma_channel_register_offset_allowlist,
+	.get_hwpm_pma_trigger_register_offset_allowlist =
+		gv11b_get_hwpm_pma_trigger_register_offset_allowlist,
+	.get_smpc_register_offset_allowlist = gv11b_get_smpc_register_offset_allowlist,
+	.get_cau_register_offset_allowlist = NULL,
+	.get_hwpm_perfmon_register_ranges = gv11b_get_hwpm_perfmon_register_ranges,
+	.get_hwpm_router_register_ranges = gv11b_get_hwpm_router_register_ranges,
+	.get_hwpm_pma_channel_register_ranges = gv11b_get_hwpm_pma_channel_register_ranges,
+	.get_hwpm_pma_trigger_register_ranges = gv11b_get_hwpm_pma_trigger_register_ranges,
+	.get_smpc_register_ranges = gv11b_get_smpc_register_ranges,
+	.get_cau_register_ranges = NULL,
+	.get_hwpm_perfmux_register_ranges = gv11b_get_hwpm_perfmux_register_ranges,
 };
 #endif
 
diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
index 952d82c93..cc9e5b532 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
@@ -74,6 +74,7 @@
 #include "hal/ptimer/ptimer_gk20a.h"
 #include "hal/ptimer/ptimer_gp10b.h"
 #include "hal/regops/regops_tu104.h"
+#include "hal/regops/allowlist_tu104.h"
 #include "hal/func/func_tu104.h"
 #include "hal/fuse/fuse_gm20b.h"
 #include "hal/fuse/fuse_gp10b.h"
@@ -1232,6 +1233,29 @@ static const struct gops_regops tu104_ops_regops = {
 	.get_context_whitelist_ranges_count = tu104_get_context_whitelist_ranges_count,
 	.get_runcontrol_whitelist = tu104_get_runcontrol_whitelist,
 	.get_runcontrol_whitelist_count = tu104_get_runcontrol_whitelist_count,
+	.get_hwpm_perfmon_register_stride = tu104_get_hwpm_perfmon_register_stride,
+	.get_hwpm_router_register_stride = tu104_get_hwpm_router_register_stride,
+	.get_hwpm_pma_channel_register_stride = tu104_get_hwpm_pma_channel_register_stride,
+	.get_hwpm_pma_trigger_register_stride = tu104_get_hwpm_pma_trigger_register_stride,
+	.get_smpc_register_stride = tu104_get_smpc_register_stride,
+	.get_cau_register_stride = tu104_get_cau_register_stride,
+	.get_hwpm_perfmon_register_offset_allowlist =
+		tu104_get_hwpm_perfmon_register_offset_allowlist,
+	.get_hwpm_router_register_offset_allowlist =
+		tu104_get_hwpm_router_register_offset_allowlist,
+	.get_hwpm_pma_channel_register_offset_allowlist =
+		tu104_get_hwpm_pma_channel_register_offset_allowlist,
+	.get_hwpm_pma_trigger_register_offset_allowlist =
+		tu104_get_hwpm_pma_trigger_register_offset_allowlist,
+	.get_smpc_register_offset_allowlist = tu104_get_smpc_register_offset_allowlist,
+	.get_cau_register_offset_allowlist = tu104_get_cau_register_offset_allowlist,
+	.get_hwpm_perfmon_register_ranges = tu104_get_hwpm_perfmon_register_ranges,
+	.get_hwpm_router_register_ranges = tu104_get_hwpm_router_register_ranges,
+	.get_hwpm_pma_channel_register_ranges = tu104_get_hwpm_pma_channel_register_ranges,
+	.get_hwpm_pma_trigger_register_ranges = tu104_get_hwpm_pma_trigger_register_ranges,
+	.get_smpc_register_ranges = tu104_get_smpc_register_ranges,
+	.get_cau_register_ranges = tu104_get_cau_register_ranges,
+	.get_hwpm_perfmux_register_ranges = tu104_get_hwpm_perfmux_register_ranges,
 };
 #endif
 
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/debugger.h b/drivers/gpu/nvgpu/include/nvgpu/gops/debugger.h
index b4854eaf7..4e93c63bf 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gops/debugger.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gops/debugger.h
@@ -26,6 +26,7 @@
 struct gops_regops {
 	int (*exec_regops)(struct gk20a *g,
 				struct nvgpu_tsg *tsg,
+				struct nvgpu_profiler_object *prof,
 				struct nvgpu_dbg_reg_op *ops,
 				u32 num_ops,
 				u32 *flags);
@@ -37,6 +38,32 @@ struct gops_regops {
 	u64 (*get_context_whitelist_ranges_count)(void);
 	const u32* (*get_runcontrol_whitelist)(void);
 	u64 (*get_runcontrol_whitelist_count)(void);
+	u32 (*get_hwpm_perfmon_register_stride)(void);
+	u32 (*get_hwpm_router_register_stride)(void);
+	u32 (*get_hwpm_pma_channel_register_stride)(void);
+	u32 (*get_hwpm_pma_trigger_register_stride)(void);
+	u32 (*get_smpc_register_stride)(void);
+	u32 (*get_cau_register_stride)(void);
+	const u32 *(*get_hwpm_perfmon_register_offset_allowlist)(u32 *count);
+	const u32 *(*get_hwpm_router_register_offset_allowlist)(u32 *count);
+	const u32 *(*get_hwpm_pma_channel_register_offset_allowlist)(u32 *count);
+	const u32 *(*get_hwpm_pma_trigger_register_offset_allowlist)(u32 *count);
+	const u32 *(*get_smpc_register_offset_allowlist)(u32 *count);
+	const u32 *(*get_cau_register_offset_allowlist)(u32 *count);
+	const struct nvgpu_pm_resource_register_range *
+		(*get_hwpm_perfmon_register_ranges)(u32 *count);
+	const struct nvgpu_pm_resource_register_range *
+		(*get_hwpm_router_register_ranges)(u32 *count);
+	const struct nvgpu_pm_resource_register_range *
+		(*get_hwpm_pma_channel_register_ranges)(u32 *count);
+	const struct nvgpu_pm_resource_register_range *
+		(*get_hwpm_pma_trigger_register_ranges)(u32 *count);
+	const struct nvgpu_pm_resource_register_range *
+		(*get_smpc_register_ranges)(u32 *count);
+	const struct nvgpu_pm_resource_register_range *
+		(*get_cau_register_ranges)(u32 *count);
+	const struct nvgpu_pm_resource_register_range *
+		(*get_hwpm_perfmux_register_ranges)(u32 *count);
 };
 struct gops_debugger {
 	void (*post_events)(struct nvgpu_channel *ch);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/profiler.h b/drivers/gpu/nvgpu/include/nvgpu/profiler.h
index 5588ee375..38dec66ee 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/profiler.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/profiler.h
@@ -32,6 +32,8 @@
 struct gk20a;
 struct nvgpu_channel;
 struct nvgpu_tsg;
+struct nvgpu_pm_resource_register_range_map;
+enum nvgpu_pm_resource_hwpm_register_type;
 
 struct nvgpu_profiler_object {
 	struct gk20a *g;
@@ -106,6 +108,15 @@ struct nvgpu_profiler_object {
 	 * (if PMA stream resource is reserved successfully).
 	 */
 	void *pma_bytes_available_buffer_cpuva;
+
+	/*
+	 * Dynamic map of HWPM register ranges that can be accessed
+	 * through regops.
+	 */
+	struct nvgpu_pm_resource_register_range_map *map;
+
+	/* Number of range entries in map above */
+	u32 map_count;
 };
 
 static inline struct nvgpu_profiler_object *
@@ -135,5 +146,8 @@ int nvgpu_profiler_unbind_pm_resources(struct nvgpu_profiler_object *prof);
 int nvgpu_profiler_alloc_pma_stream(struct nvgpu_profiler_object *prof);
 void nvgpu_profiler_free_pma_stream(struct nvgpu_profiler_object *prof);
 
+bool nvgpu_profiler_validate_regops_allowlist(struct nvgpu_profiler_object *prof,
+		u32 offset, enum nvgpu_pm_resource_hwpm_register_type *type);
+
 #endif /* CONFIG_NVGPU_PROFILER */
 #endif /* NVGPU_PROFILER_H */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/regops.h b/drivers/gpu/nvgpu/include/nvgpu/regops.h
index d60162fed..bd0df7b7d 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/regops.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/regops.h
@@ -30,6 +30,7 @@
 
 struct gk20a;
 struct nvgpu_tsg;
+struct nvgpu_profiler_object;
 
 /*
  * Register operations
@@ -88,6 +89,7 @@ struct regop_offset_range {
 
 int exec_regops_gk20a(struct gk20a *g,
 		      struct nvgpu_tsg *tsg,
+		      struct nvgpu_profiler_object *prof,
 		      struct nvgpu_dbg_reg_op *ops,
 		      u32 num_ops,
 		      u32 *flags);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/regops_allowlist.h b/drivers/gpu/nvgpu/include/nvgpu/regops_allowlist.h
index f1def23ac..888148cc3 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/regops_allowlist.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/regops_allowlist.h
@@ -30,4 +30,20 @@ struct nvgpu_pm_resource_register_range {
 	u32 end;
 };
 
+enum nvgpu_pm_resource_hwpm_register_type {
+	NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMON,
+	NVGPU_HWPM_REGISTER_TYPE_HWPM_ROUTER,
+	NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_TRIGGER,
+	NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMUX,
+	NVGPU_HWPM_REGISTER_TYPE_SMPC,
+	NVGPU_HWPM_REGISTER_TYPE_CAU,
+	NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_CHANNEL,
+};
+
+struct nvgpu_pm_resource_register_range_map {
+	u32 start;
+	u32 end;
+	enum nvgpu_pm_resource_hwpm_register_type type;
+};
+
 #endif /* NVGPU_REGOPS_ALLOWLIST_H */
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
index e66f09138..b7b0969a2 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
@@ -875,7 +875,7 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
 			if (err)
 				break;
 
-			err = g->ops.regops.exec_regops(g, tsg,
+			err = g->ops.regops.exec_regops(g, tsg, NULL,
 				g->dbg_regops_tmp_buf, num_ops, &flags);
 
 			if (err) {
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_prof.c b/drivers/gpu/nvgpu/os/linux/ioctl_prof.c
index fe284a531..742ef1230 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_prof.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_prof.c
@@ -620,7 +620,7 @@ static int nvgpu_prof_ioctl_exec_reg_ops(struct nvgpu_profiler_object_priv *priv
 			flags &= ~NVGPU_REG_OP_FLAG_ALL_PASSED;
 		}
 
-		err = g->ops.regops.exec_regops(g, tsg,
+		err = g->ops.regops.exec_regops(g, tsg, prof,
 			priv->regops_staging_buf, num_ops,
 			&flags);
 		if (err) {