gpu: nvgpu: add dynamic allowlist support

Add gv11b and tu104 HALs to get allowed  HWPM resource register ranges,
offsets, and stride meta data.

Add new enum nvgpu_pm_resource_hwpm_register_type for HWPM register
type. Add new struct nvgpu_pm_resource_register_range_map to store all
the register ranges for HWPM resources. Add pointer of map in struct
nvgpu_profiler_object along with map entry count.

Add new API nvgpu_profiler_build_regops_allowlist() to build the regops
allowlist dynamically while binding the resources. Map entry count is
received with get_pm_resource_register_range_map_entry_count() and only
those resource ranges are added for which resource is reserved by
profiler object.

Add nvgpu_profiler_destroy_regops_allowlist() to destroy the allowlist
while unbinding the resources.

Add static functions allowlist_range_search() to search a register
offset in HWPM resource ranges. Add another static function
allowlist_offset_search() to search the offset in per-resource offset
list.

Add nvgpu_profiler_validate_regops_allowlist() that accepts an offset
value, checks if it is in allowed ranges using allowlist_range_search()
and then checks if offset is in allowlist using allowlist_offset_search().

Update gops.regops.exec_regops() to receive profiler object pointer as
a parameter.

Invoke nvgpu_profiler_validate_regops_allowlist() from
validate_reg_ops() if prof pointer is not-null. This will be true only
for new profiler stack and not legacy profilers.

In gr_exec_ctx_ops(), skip regops execution if offset is invalid.

Bug 2510974
Jira NVGPU-5360

Change-Id: I40acb91cc37508629c83106ea15b062250bba473
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2460001
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com>
Reviewed-by: Antony Clince Alex <aalex@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Deepak Nibade
2020-11-05 17:46:44 +05:30
committed by mobile promotions
parent 14a8b86d43
commit 869735cda4
13 changed files with 472 additions and 7 deletions

View File

@@ -32,6 +32,11 @@
#include <nvgpu/gr/ctx.h>
#include <nvgpu/perfbuf.h>
#include <nvgpu/gr/gr.h>
#include <nvgpu/regops_allowlist.h>
#include <nvgpu/sort.h>
static int nvgpu_profiler_build_regops_allowlist(struct nvgpu_profiler_object *prof);
static void nvgpu_profiler_destroy_regops_allowlist(struct nvgpu_profiler_object *prof);
static nvgpu_atomic_t unique_id = NVGPU_ATOMIC_INIT(0);
static int generate_unique_id(void)
@@ -576,8 +581,19 @@ int nvgpu_profiler_bind_pm_resources(struct nvgpu_profiler_object *prof)
"SMPC bound with profiler handle %u", prof->prof_handle);
}
err = nvgpu_profiler_build_regops_allowlist(prof);
if (err != 0) {
nvgpu_err(g, "failed to build allowlist");
goto fail_unbind;
}
prof->bound = true;
gk20a_idle(g);
return 0;
fail_unbind:
nvgpu_profiler_unbind_pm_resources(prof);
fail:
gk20a_idle(g);
return err;
@@ -594,6 +610,8 @@ int nvgpu_profiler_unbind_pm_resources(struct nvgpu_profiler_object *prof)
return -EINVAL;
}
nvgpu_profiler_destroy_regops_allowlist(prof);
err = gk20a_busy(g);
if (err) {
nvgpu_err(g, "failed to poweron");
@@ -681,3 +699,289 @@ void nvgpu_profiler_free_pma_stream(struct nvgpu_profiler_object *prof)
NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM);
}
}
static int map_cmp(const void *a, const void *b)
{
const struct nvgpu_pm_resource_register_range_map *e1;
const struct nvgpu_pm_resource_register_range_map *e2;
e1 = (const struct nvgpu_pm_resource_register_range_map *)a;
e2 = (const struct nvgpu_pm_resource_register_range_map *)b;
if (e1->start < e2->start) {
return -1;
}
if (e1->start > e2->start) {
return 1;
}
return 0;
}
static u32 get_pm_resource_register_range_map_entry_count(struct nvgpu_profiler_object *prof)
{
struct gk20a *g = prof->g;
u32 count = 0U;
u32 range_count;
if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC]) {
g->ops.regops.get_smpc_register_ranges(&range_count);
count += range_count;
}
if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY]) {
g->ops.regops.get_hwpm_perfmon_register_ranges(&range_count);
count += range_count;
g->ops.regops.get_hwpm_router_register_ranges(&range_count);
count += range_count;
g->ops.regops.get_hwpm_pma_trigger_register_ranges(&range_count);
count += range_count;
g->ops.regops.get_hwpm_perfmux_register_ranges(&range_count);
count += range_count;
if (g->ops.regops.get_cau_register_ranges != NULL) {
g->ops.regops.get_cau_register_ranges(&range_count);
count += range_count;
}
}
if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM]) {
g->ops.regops.get_hwpm_pma_channel_register_ranges(&range_count);
count += range_count;
}
return count;
}
static void add_range_to_map(const struct nvgpu_pm_resource_register_range *range,
u32 range_count, struct nvgpu_pm_resource_register_range_map *map,
u32 *map_index, enum nvgpu_pm_resource_hwpm_register_type type)
{
u32 index = *map_index;
u32 i;
for (i = 0U; i < range_count; i++) {
map[index].start = range[i].start;
map[index].end = range[i].end;
map[index].type = type;
index++;
}
*map_index = index;
}
static int nvgpu_profiler_build_regops_allowlist(struct nvgpu_profiler_object *prof)
{
struct nvgpu_pm_resource_register_range_map *map;
const struct nvgpu_pm_resource_register_range *range;
u32 map_count, map_index = 0U;
u32 range_count;
struct gk20a *g = prof->g;
u32 i;
map_count = get_pm_resource_register_range_map_entry_count(prof);
if (map_count == 0U) {
return -EINVAL;
}
nvgpu_log(g, gpu_dbg_prof, "Allowlist map number of entries %u for handle %u",
map_count, prof->prof_handle);
map = nvgpu_kzalloc(g, sizeof(*map) * map_count);
if (map == NULL) {
return -ENOMEM;
}
if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC]) {
range = g->ops.regops.get_smpc_register_ranges(&range_count);
add_range_to_map(range, range_count, map, &map_index,
NVGPU_HWPM_REGISTER_TYPE_SMPC);
}
if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY]) {
range = g->ops.regops.get_hwpm_perfmon_register_ranges(&range_count);
add_range_to_map(range, range_count, map, &map_index,
NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMON);
range = g->ops.regops.get_hwpm_router_register_ranges(&range_count);
add_range_to_map(range, range_count, map, &map_index,
NVGPU_HWPM_REGISTER_TYPE_HWPM_ROUTER);
range = g->ops.regops.get_hwpm_pma_trigger_register_ranges(&range_count);
add_range_to_map(range, range_count, map, &map_index,
NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_TRIGGER);
range = g->ops.regops.get_hwpm_perfmux_register_ranges(&range_count);
add_range_to_map(range, range_count, map, &map_index,
NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMUX);
if (g->ops.regops.get_cau_register_ranges != NULL) {
range = g->ops.regops.get_cau_register_ranges(&range_count);
add_range_to_map(range, range_count, map, &map_index,
NVGPU_HWPM_REGISTER_TYPE_CAU);
}
}
if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM]) {
range = g->ops.regops.get_hwpm_pma_channel_register_ranges(&range_count);
add_range_to_map(range, range_count, map, &map_index,
NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_CHANNEL);
}
nvgpu_log(g, gpu_dbg_prof, "Allowlist map created successfully for handle %u",
prof->prof_handle);
nvgpu_assert(map_count == map_index);
sort(map, map_count, sizeof(*map), map_cmp, NULL);
for (i = 0; i < map_count; i++) {
nvgpu_log(g, gpu_dbg_prof, "allowlist[%u]: 0x%x-0x%x : type %u",
i, map[i].start, map[i].end, map[i].type);
}
prof->map = map;
prof->map_count = map_count;
return 0;
}
static void nvgpu_profiler_destroy_regops_allowlist(struct nvgpu_profiler_object *prof)
{
nvgpu_log(prof->g, gpu_dbg_prof, "Allowlist map destroy for handle %u",
prof->prof_handle);
nvgpu_kfree(prof->g, prof->map);
}
static bool allowlist_range_search(struct gk20a *g,
struct nvgpu_pm_resource_register_range_map *map,
u32 map_count, u32 offset,
enum nvgpu_pm_resource_hwpm_register_type *type)
{
u32 start = 0U;
u32 mid = 0U;
u32 end = map_count - 1U;
bool found = false;
while (start <= end) {
mid = (start + end) / 2U;
if (offset < map[mid].start) {
end = mid - 1U;
} else if (offset > map[mid].end) {
start = mid + 1U;
} else {
found = true;
break;
}
}
if (found) {
*type = map[mid].type;
nvgpu_log(g, gpu_dbg_prof, "Offset 0x%x found in range 0x%x-0x%x, type: %u",
offset, map[mid].start, map[mid].end, map[mid].type);
} else {
nvgpu_log(g, gpu_dbg_prof, "Offset 0x%x not found in range search", offset);
}
return found;
}
static bool allowlist_offset_search(struct gk20a *g,
const u32 *offset_allowlist, u32 count, u32 offset)
{
u32 start = 0U;
u32 mid = 0U;
u32 end = count - 1U;
bool found = false;
while (start <= end) {
mid = (start + end) / 2U;
if (offset_allowlist[mid] == offset) {
found = true;
break;
}
if (offset < offset_allowlist[mid]) {
end = mid - 1U;
} else {
start = mid + 1U;
}
}
if (found) {
nvgpu_log(g, gpu_dbg_prof, "Offset 0x%x found in offset allowlist",
offset);
} else {
nvgpu_log(g, gpu_dbg_prof, "Offset 0x%x not found in offset allowlist",
offset);
}
return found;
}
bool nvgpu_profiler_validate_regops_allowlist(struct nvgpu_profiler_object *prof,
u32 offset, enum nvgpu_pm_resource_hwpm_register_type *type)
{
enum nvgpu_pm_resource_hwpm_register_type reg_type;
struct gk20a *g = prof->g;
const u32 *offset_allowlist;
u32 count;
u32 stride;
bool found;
found = allowlist_range_search(g, prof->map, prof->map_count, offset, &reg_type);
if (!found) {
return found;
}
if (type != NULL) {
*type = reg_type;
}
if (reg_type == NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMUX) {
return found;
}
switch ((u32)reg_type) {
case NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMON:
offset_allowlist = g->ops.regops.get_hwpm_perfmon_register_offset_allowlist(&count);
stride = g->ops.regops.get_hwpm_perfmon_register_stride();
break;
case NVGPU_HWPM_REGISTER_TYPE_HWPM_ROUTER:
offset_allowlist = g->ops.regops.get_hwpm_router_register_offset_allowlist(&count);
stride = g->ops.regops.get_hwpm_router_register_stride();
break;
case NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_TRIGGER:
offset_allowlist = g->ops.regops.get_hwpm_pma_trigger_register_offset_allowlist(&count);
stride = g->ops.regops.get_hwpm_pma_trigger_register_stride();
break;
case NVGPU_HWPM_REGISTER_TYPE_SMPC:
offset_allowlist = g->ops.regops.get_smpc_register_offset_allowlist(&count);
stride = g->ops.regops.get_smpc_register_stride();
break;
case NVGPU_HWPM_REGISTER_TYPE_CAU:
offset_allowlist = g->ops.regops.get_cau_register_offset_allowlist(&count);
stride = g->ops.regops.get_cau_register_stride();
break;
case NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_CHANNEL:
offset_allowlist = g->ops.regops.get_hwpm_pma_channel_register_offset_allowlist(&count);
stride = g->ops.regops.get_hwpm_pma_channel_register_stride();
break;
default:
return false;
}
offset = offset & (stride - 1U);
return allowlist_offset_search(g, offset_allowlist, count, offset);
}

View File

@@ -30,6 +30,7 @@
#include <nvgpu/regops.h>
#include <nvgpu/gr/obj_ctx.h>
#include <nvgpu/gr/gr_utils.h>
#include <nvgpu/profiler.h>
/* Access ctx buffer offset functions in gr_gk20a.h */
#include "hal/gr/gr/gr_gk20a.h"
@@ -78,6 +79,7 @@ static bool gr_context_info_available(struct gk20a *g)
}
static bool validate_reg_ops(struct gk20a *g,
struct nvgpu_profiler_object *prof,
u32 *ctx_rd_count, u32 *ctx_wr_count,
struct nvgpu_dbg_reg_op *ops,
u32 op_count,
@@ -86,6 +88,7 @@ static bool validate_reg_ops(struct gk20a *g,
int exec_regops_gk20a(struct gk20a *g,
struct nvgpu_tsg *tsg,
struct nvgpu_profiler_object *prof,
struct nvgpu_dbg_reg_op *ops,
u32 num_ops,
u32 *flags)
@@ -99,7 +102,7 @@ int exec_regops_gk20a(struct gk20a *g,
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
ok = validate_reg_ops(g, &ctx_rd_count, &ctx_wr_count,
ok = validate_reg_ops(g, prof, &ctx_rd_count, &ctx_wr_count,
ops, num_ops, tsg != NULL, flags);
if (!ok) {
nvgpu_err(g, "invalid op(s)");
@@ -318,6 +321,36 @@ static bool check_whitelists(struct gk20a *g,
return valid;
}
static int profiler_obj_validate_reg_op_offset(struct nvgpu_profiler_object *prof,
struct nvgpu_dbg_reg_op *op)
{
struct gk20a *g = prof->g;
bool valid = false;
u32 offset;
op->status = 0;
offset = op->offset;
/* support only 24-bit 4-byte aligned offsets */
if ((offset & 0xFF000003U) != 0U) {
nvgpu_err(g, "invalid regop offset: 0x%x", offset);
op->status |= REGOP(STATUS_INVALID_OFFSET);
return -EINVAL;
}
valid = nvgpu_profiler_validate_regops_allowlist(prof, offset, NULL);
if ((op->op == REGOP(READ_64) || op->op == REGOP(WRITE_64)) && valid) {
valid = nvgpu_profiler_validate_regops_allowlist(prof, offset + 4U, NULL);
}
if (!valid) {
op->status |= REGOP(STATUS_INVALID_OFFSET);
return -EINVAL;
}
return 0;
}
/* note: the op here has already been through validate_reg_op_info */
static int validate_reg_op_offset(struct gk20a *g,
struct nvgpu_dbg_reg_op *op,
@@ -377,6 +410,7 @@ static int validate_reg_op_offset(struct gk20a *g,
}
static bool validate_reg_ops(struct gk20a *g,
struct nvgpu_profiler_object *prof,
u32 *ctx_rd_count, u32 *ctx_wr_count,
struct nvgpu_dbg_reg_op *ops,
u32 op_count,
@@ -419,10 +453,19 @@ static bool validate_reg_ops(struct gk20a *g,
/* if "allow_all" flag enabled, dont validate offset */
if (!g->allow_all) {
if (validate_reg_op_offset(g, &ops[i], valid_ctx) != 0) {
op_failed = true;
if (all_or_none) {
break;
if (prof != NULL) {
if (profiler_obj_validate_reg_op_offset(prof, &ops[i]) != 0) {
op_failed = true;
if (all_or_none) {
break;
}
}
} else {
if (validate_reg_op_offset(g, &ops[i], valid_ctx) != 0) {
op_failed = true;
if (all_or_none) {
break;
}
}
}
}

View File

@@ -36,6 +36,7 @@
int vgpu_exec_regops(struct gk20a *g,
struct nvgpu_tsg *tsg,
struct nvgpu_profiler_object *prof,
struct nvgpu_dbg_reg_op *ops,
u32 num_ops,
u32 *flags)

View File

@@ -31,6 +31,7 @@ struct nvgpu_channel;
int vgpu_exec_regops(struct gk20a *g,
struct nvgpu_tsg *tsg,
struct nvgpu_profiler_object *prof,
struct nvgpu_dbg_reg_op *ops,
u32 num_ops,
u32 *flags);

View File

@@ -1489,6 +1489,15 @@ static int gr_exec_ctx_ops(struct nvgpu_tsg *tsg,
break;
}
/*
* Move to next op if current op is invalid.
* Execution will reach here only if CONTINUE_ON_ERROR
* mode is requested.
*/
if (ctx_ops[i].status != REGOP(STATUS_SUCCESS)) {
continue;
}
/* only do ctx ops and only on the right pass */
if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) ||
(((pass == 0) && reg_op_is_read(ctx_ops[i].op)) ||

View File

@@ -89,6 +89,7 @@
#include "hal/ptimer/ptimer_gp10b.h"
#ifdef CONFIG_NVGPU_DEBUGGER
#include "hal/regops/regops_gv11b.h"
#include "hal/regops/allowlist_gv11b.h"
#endif
#ifdef CONFIG_NVGPU_RECOVERY
#include "hal/rc/rc_gv11b.h"
@@ -1169,6 +1170,29 @@ static const struct gops_regops gv11b_ops_regops = {
.get_context_whitelist_ranges_count = gv11b_get_context_whitelist_ranges_count,
.get_runcontrol_whitelist = gv11b_get_runcontrol_whitelist,
.get_runcontrol_whitelist_count = gv11b_get_runcontrol_whitelist_count,
.get_hwpm_perfmon_register_stride = gv11b_get_hwpm_perfmon_register_stride,
.get_hwpm_router_register_stride = gv11b_get_hwpm_router_register_stride,
.get_hwpm_pma_channel_register_stride = gv11b_get_hwpm_pma_channel_register_stride,
.get_hwpm_pma_trigger_register_stride = gv11b_get_hwpm_pma_trigger_register_stride,
.get_smpc_register_stride = gv11b_get_smpc_register_stride,
.get_cau_register_stride = NULL,
.get_hwpm_perfmon_register_offset_allowlist =
gv11b_get_hwpm_perfmon_register_offset_allowlist,
.get_hwpm_router_register_offset_allowlist =
gv11b_get_hwpm_router_register_offset_allowlist,
.get_hwpm_pma_channel_register_offset_allowlist =
gv11b_get_hwpm_pma_channel_register_offset_allowlist,
.get_hwpm_pma_trigger_register_offset_allowlist =
gv11b_get_hwpm_pma_trigger_register_offset_allowlist,
.get_smpc_register_offset_allowlist = gv11b_get_smpc_register_offset_allowlist,
.get_cau_register_offset_allowlist = NULL,
.get_hwpm_perfmon_register_ranges = gv11b_get_hwpm_perfmon_register_ranges,
.get_hwpm_router_register_ranges = gv11b_get_hwpm_router_register_ranges,
.get_hwpm_pma_channel_register_ranges = gv11b_get_hwpm_pma_channel_register_ranges,
.get_hwpm_pma_trigger_register_ranges = gv11b_get_hwpm_pma_trigger_register_ranges,
.get_smpc_register_ranges = gv11b_get_smpc_register_ranges,
.get_cau_register_ranges = NULL,
.get_hwpm_perfmux_register_ranges = gv11b_get_hwpm_perfmux_register_ranges,
};
#endif

View File

@@ -74,6 +74,7 @@
#include "hal/ptimer/ptimer_gk20a.h"
#include "hal/ptimer/ptimer_gp10b.h"
#include "hal/regops/regops_tu104.h"
#include "hal/regops/allowlist_tu104.h"
#include "hal/func/func_tu104.h"
#include "hal/fuse/fuse_gm20b.h"
#include "hal/fuse/fuse_gp10b.h"
@@ -1232,6 +1233,29 @@ static const struct gops_regops tu104_ops_regops = {
.get_context_whitelist_ranges_count = tu104_get_context_whitelist_ranges_count,
.get_runcontrol_whitelist = tu104_get_runcontrol_whitelist,
.get_runcontrol_whitelist_count = tu104_get_runcontrol_whitelist_count,
.get_hwpm_perfmon_register_stride = tu104_get_hwpm_perfmon_register_stride,
.get_hwpm_router_register_stride = tu104_get_hwpm_router_register_stride,
.get_hwpm_pma_channel_register_stride = tu104_get_hwpm_pma_channel_register_stride,
.get_hwpm_pma_trigger_register_stride = tu104_get_hwpm_pma_trigger_register_stride,
.get_smpc_register_stride = tu104_get_smpc_register_stride,
.get_cau_register_stride = tu104_get_cau_register_stride,
.get_hwpm_perfmon_register_offset_allowlist =
tu104_get_hwpm_perfmon_register_offset_allowlist,
.get_hwpm_router_register_offset_allowlist =
tu104_get_hwpm_router_register_offset_allowlist,
.get_hwpm_pma_channel_register_offset_allowlist =
tu104_get_hwpm_pma_channel_register_offset_allowlist,
.get_hwpm_pma_trigger_register_offset_allowlist =
tu104_get_hwpm_pma_trigger_register_offset_allowlist,
.get_smpc_register_offset_allowlist = tu104_get_smpc_register_offset_allowlist,
.get_cau_register_offset_allowlist = tu104_get_cau_register_offset_allowlist,
.get_hwpm_perfmon_register_ranges = tu104_get_hwpm_perfmon_register_ranges,
.get_hwpm_router_register_ranges = tu104_get_hwpm_router_register_ranges,
.get_hwpm_pma_channel_register_ranges = tu104_get_hwpm_pma_channel_register_ranges,
.get_hwpm_pma_trigger_register_ranges = tu104_get_hwpm_pma_trigger_register_ranges,
.get_smpc_register_ranges = tu104_get_smpc_register_ranges,
.get_cau_register_ranges = tu104_get_cau_register_ranges,
.get_hwpm_perfmux_register_ranges = tu104_get_hwpm_perfmux_register_ranges,
};
#endif

View File

@@ -26,6 +26,7 @@
struct gops_regops {
int (*exec_regops)(struct gk20a *g,
struct nvgpu_tsg *tsg,
struct nvgpu_profiler_object *prof,
struct nvgpu_dbg_reg_op *ops,
u32 num_ops,
u32 *flags);
@@ -37,6 +38,32 @@ struct gops_regops {
u64 (*get_context_whitelist_ranges_count)(void);
const u32* (*get_runcontrol_whitelist)(void);
u64 (*get_runcontrol_whitelist_count)(void);
u32 (*get_hwpm_perfmon_register_stride)(void);
u32 (*get_hwpm_router_register_stride)(void);
u32 (*get_hwpm_pma_channel_register_stride)(void);
u32 (*get_hwpm_pma_trigger_register_stride)(void);
u32 (*get_smpc_register_stride)(void);
u32 (*get_cau_register_stride)(void);
const u32 *(*get_hwpm_perfmon_register_offset_allowlist)(u32 *count);
const u32 *(*get_hwpm_router_register_offset_allowlist)(u32 *count);
const u32 *(*get_hwpm_pma_channel_register_offset_allowlist)(u32 *count);
const u32 *(*get_hwpm_pma_trigger_register_offset_allowlist)(u32 *count);
const u32 *(*get_smpc_register_offset_allowlist)(u32 *count);
const u32 *(*get_cau_register_offset_allowlist)(u32 *count);
const struct nvgpu_pm_resource_register_range *
(*get_hwpm_perfmon_register_ranges)(u32 *count);
const struct nvgpu_pm_resource_register_range *
(*get_hwpm_router_register_ranges)(u32 *count);
const struct nvgpu_pm_resource_register_range *
(*get_hwpm_pma_channel_register_ranges)(u32 *count);
const struct nvgpu_pm_resource_register_range *
(*get_hwpm_pma_trigger_register_ranges)(u32 *count);
const struct nvgpu_pm_resource_register_range *
(*get_smpc_register_ranges)(u32 *count);
const struct nvgpu_pm_resource_register_range *
(*get_cau_register_ranges)(u32 *count);
const struct nvgpu_pm_resource_register_range *
(*get_hwpm_perfmux_register_ranges)(u32 *count);
};
struct gops_debugger {
void (*post_events)(struct nvgpu_channel *ch);

View File

@@ -32,6 +32,8 @@
struct gk20a;
struct nvgpu_channel;
struct nvgpu_tsg;
struct nvgpu_pm_resource_register_range_map;
enum nvgpu_pm_resource_hwpm_register_type;
struct nvgpu_profiler_object {
struct gk20a *g;
@@ -106,6 +108,15 @@ struct nvgpu_profiler_object {
* (if PMA stream resource is reserved successfully).
*/
void *pma_bytes_available_buffer_cpuva;
/*
* Dynamic map of HWPM register ranges that can be accessed
* through regops.
*/
struct nvgpu_pm_resource_register_range_map *map;
/* Number of range entries in map above */
u32 map_count;
};
static inline struct nvgpu_profiler_object *
@@ -135,5 +146,8 @@ int nvgpu_profiler_unbind_pm_resources(struct nvgpu_profiler_object *prof);
int nvgpu_profiler_alloc_pma_stream(struct nvgpu_profiler_object *prof);
void nvgpu_profiler_free_pma_stream(struct nvgpu_profiler_object *prof);
bool nvgpu_profiler_validate_regops_allowlist(struct nvgpu_profiler_object *prof,
u32 offset, enum nvgpu_pm_resource_hwpm_register_type *type);
#endif /* CONFIG_NVGPU_PROFILER */
#endif /* NVGPU_PROFILER_H */

View File

@@ -30,6 +30,7 @@
struct gk20a;
struct nvgpu_tsg;
struct nvgpu_profiler_object;
/*
* Register operations
@@ -88,6 +89,7 @@ struct regop_offset_range {
int exec_regops_gk20a(struct gk20a *g,
struct nvgpu_tsg *tsg,
struct nvgpu_profiler_object *prof,
struct nvgpu_dbg_reg_op *ops,
u32 num_ops,
u32 *flags);

View File

@@ -30,4 +30,20 @@ struct nvgpu_pm_resource_register_range {
u32 end;
};
enum nvgpu_pm_resource_hwpm_register_type {
NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMON,
NVGPU_HWPM_REGISTER_TYPE_HWPM_ROUTER,
NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_TRIGGER,
NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMUX,
NVGPU_HWPM_REGISTER_TYPE_SMPC,
NVGPU_HWPM_REGISTER_TYPE_CAU,
NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_CHANNEL,
};
struct nvgpu_pm_resource_register_range_map {
u32 start;
u32 end;
enum nvgpu_pm_resource_hwpm_register_type type;
};
#endif /* NVGPU_REGOPS_ALLOWLIST_H */

View File

@@ -875,7 +875,7 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
if (err)
break;
err = g->ops.regops.exec_regops(g, tsg,
err = g->ops.regops.exec_regops(g, tsg, NULL,
g->dbg_regops_tmp_buf, num_ops, &flags);
if (err) {

View File

@@ -620,7 +620,7 @@ static int nvgpu_prof_ioctl_exec_reg_ops(struct nvgpu_profiler_object_priv *priv
flags &= ~NVGPU_REG_OP_FLAG_ALL_PASSED;
}
err = g->ops.regops.exec_regops(g, tsg,
err = g->ops.regops.exec_regops(g, tsg, prof,
priv->regops_staging_buf, num_ops,
&flags);
if (err) {