From 869735cda475c050a561e447ebbed59096d57f11 Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Thu, 5 Nov 2020 17:46:44 +0530 Subject: [PATCH] gpu: nvgpu: add dynamic allowlist support Add gv11b and tu104 HALs to get allowed HWPM resource register ranges, offsets, and stride meta data. Add new enum nvgpu_pm_resource_hwpm_register_type for HWPM register type. Add new struct nvgpu_pm_resource_register_range_map to store all the register ranges for HWPM resources. Add pointer of map in struct nvgpu_profiler_object along with map entry count. Add new API nvgpu_profiler_build_regops_allowlist() to build the regops allowlist dynamically while binding the resources. Map entry count is received with get_pm_resource_register_range_map_entry_count() and only those resource ranges are added for which resource is reserved by profiler object. Add nvgpu_profiler_destroy_regops_allowlist() to destroy the allowlist while unbinding the resources. Add static functions allowlist_range_search() to search a register offset in HWPM resource ranges. Add another static function allowlist_offset_search() to search the offset in per-resource offset list. Add nvgpu_profiler_validate_regops_allowlist() that accepts an offset value, checks if it is in allowed ranges using allowlist_range_search() and then checks if offset is in allowlist using allowlist_offset_search(). Update gops.regops.exec_regops() to receive profiler object pointer as a parameter. Invoke nvgpu_profiler_validate_regops_allowlist() from validate_reg_ops() if prof pointer is not-null. This will be true only for new profiler stack and not legacy profilers. In gr_exec_ctx_ops(), skip regops execution if offset is invalid. Bug 2510974 Jira NVGPU-5360 Change-Id: I40acb91cc37508629c83106ea15b062250bba473 Signed-off-by: Deepak Nibade Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2460001 Reviewed-by: svc-mobile-coverity Reviewed-by: svc-mobile-cert Reviewed-by: svc-mobile-misra Reviewed-by: Antony Clince Alex Reviewed-by: mobile promotions Tested-by: mobile promotions GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/common/profiler/profiler.c | 304 ++++++++++++++++++ drivers/gpu/nvgpu/common/regops/regops.c | 53 ++- drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.c | 1 + drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.h | 1 + drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c | 9 + drivers/gpu/nvgpu/hal/init/hal_gv11b.c | 24 ++ drivers/gpu/nvgpu/hal/init/hal_tu104.c | 24 ++ .../gpu/nvgpu/include/nvgpu/gops/debugger.h | 27 ++ drivers/gpu/nvgpu/include/nvgpu/profiler.h | 14 + drivers/gpu/nvgpu/include/nvgpu/regops.h | 2 + .../nvgpu/include/nvgpu/regops_allowlist.h | 16 + drivers/gpu/nvgpu/os/linux/ioctl_dbg.c | 2 +- drivers/gpu/nvgpu/os/linux/ioctl_prof.c | 2 +- 13 files changed, 472 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/nvgpu/common/profiler/profiler.c b/drivers/gpu/nvgpu/common/profiler/profiler.c index 43b7a5592..7a34e07e7 100644 --- a/drivers/gpu/nvgpu/common/profiler/profiler.c +++ b/drivers/gpu/nvgpu/common/profiler/profiler.c @@ -32,6 +32,11 @@ #include #include #include +#include +#include + +static int nvgpu_profiler_build_regops_allowlist(struct nvgpu_profiler_object *prof); +static void nvgpu_profiler_destroy_regops_allowlist(struct nvgpu_profiler_object *prof); static nvgpu_atomic_t unique_id = NVGPU_ATOMIC_INIT(0); static int generate_unique_id(void) @@ -576,8 +581,19 @@ int nvgpu_profiler_bind_pm_resources(struct nvgpu_profiler_object *prof) "SMPC bound with profiler handle %u", prof->prof_handle); } + err = nvgpu_profiler_build_regops_allowlist(prof); + if (err != 0) { + nvgpu_err(g, "failed to build allowlist"); + goto fail_unbind; + } + prof->bound = true; + gk20a_idle(g); + return 0; + +fail_unbind: + nvgpu_profiler_unbind_pm_resources(prof); fail: gk20a_idle(g); return err; @@ -594,6 +610,8 @@ int nvgpu_profiler_unbind_pm_resources(struct nvgpu_profiler_object *prof) return -EINVAL; } + nvgpu_profiler_destroy_regops_allowlist(prof); + err = gk20a_busy(g); if (err) { nvgpu_err(g, "failed to poweron"); @@ -681,3 +699,289 @@ void nvgpu_profiler_free_pma_stream(struct nvgpu_profiler_object *prof) NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM); } } + +static int map_cmp(const void *a, const void *b) +{ + const struct nvgpu_pm_resource_register_range_map *e1; + const struct nvgpu_pm_resource_register_range_map *e2; + + e1 = (const struct nvgpu_pm_resource_register_range_map *)a; + e2 = (const struct nvgpu_pm_resource_register_range_map *)b; + + if (e1->start < e2->start) { + return -1; + } + + if (e1->start > e2->start) { + return 1; + } + + return 0; +} + +static u32 get_pm_resource_register_range_map_entry_count(struct nvgpu_profiler_object *prof) +{ + struct gk20a *g = prof->g; + u32 count = 0U; + u32 range_count; + + if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC]) { + g->ops.regops.get_smpc_register_ranges(&range_count); + count += range_count; + } + + if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY]) { + g->ops.regops.get_hwpm_perfmon_register_ranges(&range_count); + count += range_count; + + g->ops.regops.get_hwpm_router_register_ranges(&range_count); + count += range_count; + + g->ops.regops.get_hwpm_pma_trigger_register_ranges(&range_count); + count += range_count; + + g->ops.regops.get_hwpm_perfmux_register_ranges(&range_count); + count += range_count; + + if (g->ops.regops.get_cau_register_ranges != NULL) { + g->ops.regops.get_cau_register_ranges(&range_count); + count += range_count; + } + } + + if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM]) { + g->ops.regops.get_hwpm_pma_channel_register_ranges(&range_count); + count += range_count; + } + + return count; +} + +static void add_range_to_map(const struct nvgpu_pm_resource_register_range *range, + u32 range_count, struct nvgpu_pm_resource_register_range_map *map, + u32 *map_index, enum nvgpu_pm_resource_hwpm_register_type type) +{ + u32 index = *map_index; + u32 i; + + for (i = 0U; i < range_count; i++) { + map[index].start = range[i].start; + map[index].end = range[i].end; + map[index].type = type; + index++; + } + + *map_index = index; +} + +static int nvgpu_profiler_build_regops_allowlist(struct nvgpu_profiler_object *prof) +{ + struct nvgpu_pm_resource_register_range_map *map; + const struct nvgpu_pm_resource_register_range *range; + u32 map_count, map_index = 0U; + u32 range_count; + struct gk20a *g = prof->g; + u32 i; + + map_count = get_pm_resource_register_range_map_entry_count(prof); + if (map_count == 0U) { + return -EINVAL; + } + + nvgpu_log(g, gpu_dbg_prof, "Allowlist map number of entries %u for handle %u", + map_count, prof->prof_handle); + + map = nvgpu_kzalloc(g, sizeof(*map) * map_count); + if (map == NULL) { + return -ENOMEM; + } + + if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC]) { + range = g->ops.regops.get_smpc_register_ranges(&range_count); + add_range_to_map(range, range_count, map, &map_index, + NVGPU_HWPM_REGISTER_TYPE_SMPC); + } + + if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY]) { + range = g->ops.regops.get_hwpm_perfmon_register_ranges(&range_count); + add_range_to_map(range, range_count, map, &map_index, + NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMON); + + range = g->ops.regops.get_hwpm_router_register_ranges(&range_count); + add_range_to_map(range, range_count, map, &map_index, + NVGPU_HWPM_REGISTER_TYPE_HWPM_ROUTER); + + range = g->ops.regops.get_hwpm_pma_trigger_register_ranges(&range_count); + add_range_to_map(range, range_count, map, &map_index, + NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_TRIGGER); + + range = g->ops.regops.get_hwpm_perfmux_register_ranges(&range_count); + add_range_to_map(range, range_count, map, &map_index, + NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMUX); + + if (g->ops.regops.get_cau_register_ranges != NULL) { + range = g->ops.regops.get_cau_register_ranges(&range_count); + add_range_to_map(range, range_count, map, &map_index, + NVGPU_HWPM_REGISTER_TYPE_CAU); + } + } + + if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM]) { + range = g->ops.regops.get_hwpm_pma_channel_register_ranges(&range_count); + add_range_to_map(range, range_count, map, &map_index, + NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_CHANNEL); + } + + nvgpu_log(g, gpu_dbg_prof, "Allowlist map created successfully for handle %u", + prof->prof_handle); + + nvgpu_assert(map_count == map_index); + + sort(map, map_count, sizeof(*map), map_cmp, NULL); + + for (i = 0; i < map_count; i++) { + nvgpu_log(g, gpu_dbg_prof, "allowlist[%u]: 0x%x-0x%x : type %u", + i, map[i].start, map[i].end, map[i].type); + } + + prof->map = map; + prof->map_count = map_count; + return 0; +} + +static void nvgpu_profiler_destroy_regops_allowlist(struct nvgpu_profiler_object *prof) +{ + nvgpu_log(prof->g, gpu_dbg_prof, "Allowlist map destroy for handle %u", + prof->prof_handle); + + nvgpu_kfree(prof->g, prof->map); +} + +static bool allowlist_range_search(struct gk20a *g, + struct nvgpu_pm_resource_register_range_map *map, + u32 map_count, u32 offset, + enum nvgpu_pm_resource_hwpm_register_type *type) +{ + u32 start = 0U; + u32 mid = 0U; + u32 end = map_count - 1U; + bool found = false; + + while (start <= end) { + mid = (start + end) / 2U; + + if (offset < map[mid].start) { + end = mid - 1U; + } else if (offset > map[mid].end) { + start = mid + 1U; + } else { + found = true; + break; + } + } + + if (found) { + *type = map[mid].type; + nvgpu_log(g, gpu_dbg_prof, "Offset 0x%x found in range 0x%x-0x%x, type: %u", + offset, map[mid].start, map[mid].end, map[mid].type); + } else { + nvgpu_log(g, gpu_dbg_prof, "Offset 0x%x not found in range search", offset); + } + + return found; +} + +static bool allowlist_offset_search(struct gk20a *g, + const u32 *offset_allowlist, u32 count, u32 offset) +{ + u32 start = 0U; + u32 mid = 0U; + u32 end = count - 1U; + bool found = false; + + while (start <= end) { + mid = (start + end) / 2U; + if (offset_allowlist[mid] == offset) { + found = true; + break; + } + + if (offset < offset_allowlist[mid]) { + end = mid - 1U; + } else { + start = mid + 1U; + } + } + + if (found) { + nvgpu_log(g, gpu_dbg_prof, "Offset 0x%x found in offset allowlist", + offset); + } else { + nvgpu_log(g, gpu_dbg_prof, "Offset 0x%x not found in offset allowlist", + offset); + } + + return found; +} + +bool nvgpu_profiler_validate_regops_allowlist(struct nvgpu_profiler_object *prof, + u32 offset, enum nvgpu_pm_resource_hwpm_register_type *type) +{ + enum nvgpu_pm_resource_hwpm_register_type reg_type; + struct gk20a *g = prof->g; + const u32 *offset_allowlist; + u32 count; + u32 stride; + bool found; + + found = allowlist_range_search(g, prof->map, prof->map_count, offset, ®_type); + if (!found) { + return found; + } + + if (type != NULL) { + *type = reg_type; + } + + if (reg_type == NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMUX) { + return found; + } + + switch ((u32)reg_type) { + case NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMON: + offset_allowlist = g->ops.regops.get_hwpm_perfmon_register_offset_allowlist(&count); + stride = g->ops.regops.get_hwpm_perfmon_register_stride(); + break; + + case NVGPU_HWPM_REGISTER_TYPE_HWPM_ROUTER: + offset_allowlist = g->ops.regops.get_hwpm_router_register_offset_allowlist(&count); + stride = g->ops.regops.get_hwpm_router_register_stride(); + break; + + case NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_TRIGGER: + offset_allowlist = g->ops.regops.get_hwpm_pma_trigger_register_offset_allowlist(&count); + stride = g->ops.regops.get_hwpm_pma_trigger_register_stride(); + break; + + case NVGPU_HWPM_REGISTER_TYPE_SMPC: + offset_allowlist = g->ops.regops.get_smpc_register_offset_allowlist(&count); + stride = g->ops.regops.get_smpc_register_stride(); + break; + + case NVGPU_HWPM_REGISTER_TYPE_CAU: + offset_allowlist = g->ops.regops.get_cau_register_offset_allowlist(&count); + stride = g->ops.regops.get_cau_register_stride(); + break; + + case NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_CHANNEL: + offset_allowlist = g->ops.regops.get_hwpm_pma_channel_register_offset_allowlist(&count); + stride = g->ops.regops.get_hwpm_pma_channel_register_stride(); + break; + + default: + return false; + } + + offset = offset & (stride - 1U); + return allowlist_offset_search(g, offset_allowlist, count, offset); +} diff --git a/drivers/gpu/nvgpu/common/regops/regops.c b/drivers/gpu/nvgpu/common/regops/regops.c index feb4b6067..f71e4f6cd 100644 --- a/drivers/gpu/nvgpu/common/regops/regops.c +++ b/drivers/gpu/nvgpu/common/regops/regops.c @@ -30,6 +30,7 @@ #include #include #include +#include /* Access ctx buffer offset functions in gr_gk20a.h */ #include "hal/gr/gr/gr_gk20a.h" @@ -78,6 +79,7 @@ static bool gr_context_info_available(struct gk20a *g) } static bool validate_reg_ops(struct gk20a *g, + struct nvgpu_profiler_object *prof, u32 *ctx_rd_count, u32 *ctx_wr_count, struct nvgpu_dbg_reg_op *ops, u32 op_count, @@ -86,6 +88,7 @@ static bool validate_reg_ops(struct gk20a *g, int exec_regops_gk20a(struct gk20a *g, struct nvgpu_tsg *tsg, + struct nvgpu_profiler_object *prof, struct nvgpu_dbg_reg_op *ops, u32 num_ops, u32 *flags) @@ -99,7 +102,7 @@ int exec_regops_gk20a(struct gk20a *g, nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); - ok = validate_reg_ops(g, &ctx_rd_count, &ctx_wr_count, + ok = validate_reg_ops(g, prof, &ctx_rd_count, &ctx_wr_count, ops, num_ops, tsg != NULL, flags); if (!ok) { nvgpu_err(g, "invalid op(s)"); @@ -318,6 +321,36 @@ static bool check_whitelists(struct gk20a *g, return valid; } +static int profiler_obj_validate_reg_op_offset(struct nvgpu_profiler_object *prof, + struct nvgpu_dbg_reg_op *op) +{ + struct gk20a *g = prof->g; + bool valid = false; + u32 offset; + + op->status = 0; + offset = op->offset; + + /* support only 24-bit 4-byte aligned offsets */ + if ((offset & 0xFF000003U) != 0U) { + nvgpu_err(g, "invalid regop offset: 0x%x", offset); + op->status |= REGOP(STATUS_INVALID_OFFSET); + return -EINVAL; + } + + valid = nvgpu_profiler_validate_regops_allowlist(prof, offset, NULL); + if ((op->op == REGOP(READ_64) || op->op == REGOP(WRITE_64)) && valid) { + valid = nvgpu_profiler_validate_regops_allowlist(prof, offset + 4U, NULL); + } + + if (!valid) { + op->status |= REGOP(STATUS_INVALID_OFFSET); + return -EINVAL; + } + + return 0; +} + /* note: the op here has already been through validate_reg_op_info */ static int validate_reg_op_offset(struct gk20a *g, struct nvgpu_dbg_reg_op *op, @@ -377,6 +410,7 @@ static int validate_reg_op_offset(struct gk20a *g, } static bool validate_reg_ops(struct gk20a *g, + struct nvgpu_profiler_object *prof, u32 *ctx_rd_count, u32 *ctx_wr_count, struct nvgpu_dbg_reg_op *ops, u32 op_count, @@ -419,10 +453,19 @@ static bool validate_reg_ops(struct gk20a *g, /* if "allow_all" flag enabled, dont validate offset */ if (!g->allow_all) { - if (validate_reg_op_offset(g, &ops[i], valid_ctx) != 0) { - op_failed = true; - if (all_or_none) { - break; + if (prof != NULL) { + if (profiler_obj_validate_reg_op_offset(prof, &ops[i]) != 0) { + op_failed = true; + if (all_or_none) { + break; + } + } + } else { + if (validate_reg_op_offset(g, &ops[i], valid_ctx) != 0) { + op_failed = true; + if (all_or_none) { + break; + } } } } diff --git a/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.c index 2f3d9f754..86c327493 100644 --- a/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.c @@ -36,6 +36,7 @@ int vgpu_exec_regops(struct gk20a *g, struct nvgpu_tsg *tsg, + struct nvgpu_profiler_object *prof, struct nvgpu_dbg_reg_op *ops, u32 num_ops, u32 *flags) diff --git a/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.h index ebe274c8e..bf805e236 100644 --- a/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.h +++ b/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.h @@ -31,6 +31,7 @@ struct nvgpu_channel; int vgpu_exec_regops(struct gk20a *g, struct nvgpu_tsg *tsg, + struct nvgpu_profiler_object *prof, struct nvgpu_dbg_reg_op *ops, u32 num_ops, u32 *flags); diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c index f2e69d146..866c4f68c 100644 --- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c +++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c @@ -1489,6 +1489,15 @@ static int gr_exec_ctx_ops(struct nvgpu_tsg *tsg, break; } + /* + * Move to next op if current op is invalid. + * Execution will reach here only if CONTINUE_ON_ERROR + * mode is requested. + */ + if (ctx_ops[i].status != REGOP(STATUS_SUCCESS)) { + continue; + } + /* only do ctx ops and only on the right pass */ if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) || (((pass == 0) && reg_op_is_read(ctx_ops[i].op)) || diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c index a2caee408..ccf209bc4 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c @@ -89,6 +89,7 @@ #include "hal/ptimer/ptimer_gp10b.h" #ifdef CONFIG_NVGPU_DEBUGGER #include "hal/regops/regops_gv11b.h" +#include "hal/regops/allowlist_gv11b.h" #endif #ifdef CONFIG_NVGPU_RECOVERY #include "hal/rc/rc_gv11b.h" @@ -1169,6 +1170,29 @@ static const struct gops_regops gv11b_ops_regops = { .get_context_whitelist_ranges_count = gv11b_get_context_whitelist_ranges_count, .get_runcontrol_whitelist = gv11b_get_runcontrol_whitelist, .get_runcontrol_whitelist_count = gv11b_get_runcontrol_whitelist_count, + .get_hwpm_perfmon_register_stride = gv11b_get_hwpm_perfmon_register_stride, + .get_hwpm_router_register_stride = gv11b_get_hwpm_router_register_stride, + .get_hwpm_pma_channel_register_stride = gv11b_get_hwpm_pma_channel_register_stride, + .get_hwpm_pma_trigger_register_stride = gv11b_get_hwpm_pma_trigger_register_stride, + .get_smpc_register_stride = gv11b_get_smpc_register_stride, + .get_cau_register_stride = NULL, + .get_hwpm_perfmon_register_offset_allowlist = + gv11b_get_hwpm_perfmon_register_offset_allowlist, + .get_hwpm_router_register_offset_allowlist = + gv11b_get_hwpm_router_register_offset_allowlist, + .get_hwpm_pma_channel_register_offset_allowlist = + gv11b_get_hwpm_pma_channel_register_offset_allowlist, + .get_hwpm_pma_trigger_register_offset_allowlist = + gv11b_get_hwpm_pma_trigger_register_offset_allowlist, + .get_smpc_register_offset_allowlist = gv11b_get_smpc_register_offset_allowlist, + .get_cau_register_offset_allowlist = NULL, + .get_hwpm_perfmon_register_ranges = gv11b_get_hwpm_perfmon_register_ranges, + .get_hwpm_router_register_ranges = gv11b_get_hwpm_router_register_ranges, + .get_hwpm_pma_channel_register_ranges = gv11b_get_hwpm_pma_channel_register_ranges, + .get_hwpm_pma_trigger_register_ranges = gv11b_get_hwpm_pma_trigger_register_ranges, + .get_smpc_register_ranges = gv11b_get_smpc_register_ranges, + .get_cau_register_ranges = NULL, + .get_hwpm_perfmux_register_ranges = gv11b_get_hwpm_perfmux_register_ranges, }; #endif diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c index 952d82c93..cc9e5b532 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c +++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c @@ -74,6 +74,7 @@ #include "hal/ptimer/ptimer_gk20a.h" #include "hal/ptimer/ptimer_gp10b.h" #include "hal/regops/regops_tu104.h" +#include "hal/regops/allowlist_tu104.h" #include "hal/func/func_tu104.h" #include "hal/fuse/fuse_gm20b.h" #include "hal/fuse/fuse_gp10b.h" @@ -1232,6 +1233,29 @@ static const struct gops_regops tu104_ops_regops = { .get_context_whitelist_ranges_count = tu104_get_context_whitelist_ranges_count, .get_runcontrol_whitelist = tu104_get_runcontrol_whitelist, .get_runcontrol_whitelist_count = tu104_get_runcontrol_whitelist_count, + .get_hwpm_perfmon_register_stride = tu104_get_hwpm_perfmon_register_stride, + .get_hwpm_router_register_stride = tu104_get_hwpm_router_register_stride, + .get_hwpm_pma_channel_register_stride = tu104_get_hwpm_pma_channel_register_stride, + .get_hwpm_pma_trigger_register_stride = tu104_get_hwpm_pma_trigger_register_stride, + .get_smpc_register_stride = tu104_get_smpc_register_stride, + .get_cau_register_stride = tu104_get_cau_register_stride, + .get_hwpm_perfmon_register_offset_allowlist = + tu104_get_hwpm_perfmon_register_offset_allowlist, + .get_hwpm_router_register_offset_allowlist = + tu104_get_hwpm_router_register_offset_allowlist, + .get_hwpm_pma_channel_register_offset_allowlist = + tu104_get_hwpm_pma_channel_register_offset_allowlist, + .get_hwpm_pma_trigger_register_offset_allowlist = + tu104_get_hwpm_pma_trigger_register_offset_allowlist, + .get_smpc_register_offset_allowlist = tu104_get_smpc_register_offset_allowlist, + .get_cau_register_offset_allowlist = tu104_get_cau_register_offset_allowlist, + .get_hwpm_perfmon_register_ranges = tu104_get_hwpm_perfmon_register_ranges, + .get_hwpm_router_register_ranges = tu104_get_hwpm_router_register_ranges, + .get_hwpm_pma_channel_register_ranges = tu104_get_hwpm_pma_channel_register_ranges, + .get_hwpm_pma_trigger_register_ranges = tu104_get_hwpm_pma_trigger_register_ranges, + .get_smpc_register_ranges = tu104_get_smpc_register_ranges, + .get_cau_register_ranges = tu104_get_cau_register_ranges, + .get_hwpm_perfmux_register_ranges = tu104_get_hwpm_perfmux_register_ranges, }; #endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/debugger.h b/drivers/gpu/nvgpu/include/nvgpu/gops/debugger.h index b4854eaf7..4e93c63bf 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops/debugger.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops/debugger.h @@ -26,6 +26,7 @@ struct gops_regops { int (*exec_regops)(struct gk20a *g, struct nvgpu_tsg *tsg, + struct nvgpu_profiler_object *prof, struct nvgpu_dbg_reg_op *ops, u32 num_ops, u32 *flags); @@ -37,6 +38,32 @@ struct gops_regops { u64 (*get_context_whitelist_ranges_count)(void); const u32* (*get_runcontrol_whitelist)(void); u64 (*get_runcontrol_whitelist_count)(void); + u32 (*get_hwpm_perfmon_register_stride)(void); + u32 (*get_hwpm_router_register_stride)(void); + u32 (*get_hwpm_pma_channel_register_stride)(void); + u32 (*get_hwpm_pma_trigger_register_stride)(void); + u32 (*get_smpc_register_stride)(void); + u32 (*get_cau_register_stride)(void); + const u32 *(*get_hwpm_perfmon_register_offset_allowlist)(u32 *count); + const u32 *(*get_hwpm_router_register_offset_allowlist)(u32 *count); + const u32 *(*get_hwpm_pma_channel_register_offset_allowlist)(u32 *count); + const u32 *(*get_hwpm_pma_trigger_register_offset_allowlist)(u32 *count); + const u32 *(*get_smpc_register_offset_allowlist)(u32 *count); + const u32 *(*get_cau_register_offset_allowlist)(u32 *count); + const struct nvgpu_pm_resource_register_range * + (*get_hwpm_perfmon_register_ranges)(u32 *count); + const struct nvgpu_pm_resource_register_range * + (*get_hwpm_router_register_ranges)(u32 *count); + const struct nvgpu_pm_resource_register_range * + (*get_hwpm_pma_channel_register_ranges)(u32 *count); + const struct nvgpu_pm_resource_register_range * + (*get_hwpm_pma_trigger_register_ranges)(u32 *count); + const struct nvgpu_pm_resource_register_range * + (*get_smpc_register_ranges)(u32 *count); + const struct nvgpu_pm_resource_register_range * + (*get_cau_register_ranges)(u32 *count); + const struct nvgpu_pm_resource_register_range * + (*get_hwpm_perfmux_register_ranges)(u32 *count); }; struct gops_debugger { void (*post_events)(struct nvgpu_channel *ch); diff --git a/drivers/gpu/nvgpu/include/nvgpu/profiler.h b/drivers/gpu/nvgpu/include/nvgpu/profiler.h index 5588ee375..38dec66ee 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/profiler.h +++ b/drivers/gpu/nvgpu/include/nvgpu/profiler.h @@ -32,6 +32,8 @@ struct gk20a; struct nvgpu_channel; struct nvgpu_tsg; +struct nvgpu_pm_resource_register_range_map; +enum nvgpu_pm_resource_hwpm_register_type; struct nvgpu_profiler_object { struct gk20a *g; @@ -106,6 +108,15 @@ struct nvgpu_profiler_object { * (if PMA stream resource is reserved successfully). */ void *pma_bytes_available_buffer_cpuva; + + /* + * Dynamic map of HWPM register ranges that can be accessed + * through regops. + */ + struct nvgpu_pm_resource_register_range_map *map; + + /* Number of range entries in map above */ + u32 map_count; }; static inline struct nvgpu_profiler_object * @@ -135,5 +146,8 @@ int nvgpu_profiler_unbind_pm_resources(struct nvgpu_profiler_object *prof); int nvgpu_profiler_alloc_pma_stream(struct nvgpu_profiler_object *prof); void nvgpu_profiler_free_pma_stream(struct nvgpu_profiler_object *prof); +bool nvgpu_profiler_validate_regops_allowlist(struct nvgpu_profiler_object *prof, + u32 offset, enum nvgpu_pm_resource_hwpm_register_type *type); + #endif /* CONFIG_NVGPU_PROFILER */ #endif /* NVGPU_PROFILER_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/regops.h b/drivers/gpu/nvgpu/include/nvgpu/regops.h index d60162fed..bd0df7b7d 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/regops.h +++ b/drivers/gpu/nvgpu/include/nvgpu/regops.h @@ -30,6 +30,7 @@ struct gk20a; struct nvgpu_tsg; +struct nvgpu_profiler_object; /* * Register operations @@ -88,6 +89,7 @@ struct regop_offset_range { int exec_regops_gk20a(struct gk20a *g, struct nvgpu_tsg *tsg, + struct nvgpu_profiler_object *prof, struct nvgpu_dbg_reg_op *ops, u32 num_ops, u32 *flags); diff --git a/drivers/gpu/nvgpu/include/nvgpu/regops_allowlist.h b/drivers/gpu/nvgpu/include/nvgpu/regops_allowlist.h index f1def23ac..888148cc3 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/regops_allowlist.h +++ b/drivers/gpu/nvgpu/include/nvgpu/regops_allowlist.h @@ -30,4 +30,20 @@ struct nvgpu_pm_resource_register_range { u32 end; }; +enum nvgpu_pm_resource_hwpm_register_type { + NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMON, + NVGPU_HWPM_REGISTER_TYPE_HWPM_ROUTER, + NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_TRIGGER, + NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMUX, + NVGPU_HWPM_REGISTER_TYPE_SMPC, + NVGPU_HWPM_REGISTER_TYPE_CAU, + NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_CHANNEL, +}; + +struct nvgpu_pm_resource_register_range_map { + u32 start; + u32 end; + enum nvgpu_pm_resource_hwpm_register_type type; +}; + #endif /* NVGPU_REGOPS_ALLOWLIST_H */ diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c index e66f09138..b7b0969a2 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c @@ -875,7 +875,7 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, if (err) break; - err = g->ops.regops.exec_regops(g, tsg, + err = g->ops.regops.exec_regops(g, tsg, NULL, g->dbg_regops_tmp_buf, num_ops, &flags); if (err) { diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_prof.c b/drivers/gpu/nvgpu/os/linux/ioctl_prof.c index fe284a531..742ef1230 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_prof.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_prof.c @@ -620,7 +620,7 @@ static int nvgpu_prof_ioctl_exec_reg_ops(struct nvgpu_profiler_object_priv *priv flags &= ~NVGPU_REG_OP_FLAG_ALL_PASSED; } - err = g->ops.regops.exec_regops(g, tsg, + err = g->ops.regops.exec_regops(g, tsg, prof, priv->regops_staging_buf, num_ops, &flags); if (err) {