mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: add dynamic allowlist support
Add gv11b and tu104 HALs to get allowed HWPM resource register ranges, offsets, and stride meta data. Add new enum nvgpu_pm_resource_hwpm_register_type for HWPM register type. Add new struct nvgpu_pm_resource_register_range_map to store all the register ranges for HWPM resources. Add pointer of map in struct nvgpu_profiler_object along with map entry count. Add new API nvgpu_profiler_build_regops_allowlist() to build the regops allowlist dynamically while binding the resources. Map entry count is received with get_pm_resource_register_range_map_entry_count() and only those resource ranges are added for which resource is reserved by profiler object. Add nvgpu_profiler_destroy_regops_allowlist() to destroy the allowlist while unbinding the resources. Add static functions allowlist_range_search() to search a register offset in HWPM resource ranges. Add another static function allowlist_offset_search() to search the offset in per-resource offset list. Add nvgpu_profiler_validate_regops_allowlist() that accepts an offset value, checks if it is in allowed ranges using allowlist_range_search() and then checks if offset is in allowlist using allowlist_offset_search(). Update gops.regops.exec_regops() to receive profiler object pointer as a parameter. Invoke nvgpu_profiler_validate_regops_allowlist() from validate_reg_ops() if prof pointer is not-null. This will be true only for new profiler stack and not legacy profilers. In gr_exec_ctx_ops(), skip regops execution if offset is invalid. Bug 2510974 Jira NVGPU-5360 Change-Id: I40acb91cc37508629c83106ea15b062250bba473 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2460001 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com> Reviewed-by: Antony Clince Alex <aalex@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit
This commit is contained in:
committed by
mobile promotions
parent
14a8b86d43
commit
869735cda4
@@ -32,6 +32,11 @@
|
||||
#include <nvgpu/gr/ctx.h>
|
||||
#include <nvgpu/perfbuf.h>
|
||||
#include <nvgpu/gr/gr.h>
|
||||
#include <nvgpu/regops_allowlist.h>
|
||||
#include <nvgpu/sort.h>
|
||||
|
||||
static int nvgpu_profiler_build_regops_allowlist(struct nvgpu_profiler_object *prof);
|
||||
static void nvgpu_profiler_destroy_regops_allowlist(struct nvgpu_profiler_object *prof);
|
||||
|
||||
static nvgpu_atomic_t unique_id = NVGPU_ATOMIC_INIT(0);
|
||||
static int generate_unique_id(void)
|
||||
@@ -576,8 +581,19 @@ int nvgpu_profiler_bind_pm_resources(struct nvgpu_profiler_object *prof)
|
||||
"SMPC bound with profiler handle %u", prof->prof_handle);
|
||||
}
|
||||
|
||||
err = nvgpu_profiler_build_regops_allowlist(prof);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "failed to build allowlist");
|
||||
goto fail_unbind;
|
||||
}
|
||||
|
||||
prof->bound = true;
|
||||
|
||||
gk20a_idle(g);
|
||||
return 0;
|
||||
|
||||
fail_unbind:
|
||||
nvgpu_profiler_unbind_pm_resources(prof);
|
||||
fail:
|
||||
gk20a_idle(g);
|
||||
return err;
|
||||
@@ -594,6 +610,8 @@ int nvgpu_profiler_unbind_pm_resources(struct nvgpu_profiler_object *prof)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
nvgpu_profiler_destroy_regops_allowlist(prof);
|
||||
|
||||
err = gk20a_busy(g);
|
||||
if (err) {
|
||||
nvgpu_err(g, "failed to poweron");
|
||||
@@ -681,3 +699,289 @@ void nvgpu_profiler_free_pma_stream(struct nvgpu_profiler_object *prof)
|
||||
NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM);
|
||||
}
|
||||
}
|
||||
|
||||
static int map_cmp(const void *a, const void *b)
|
||||
{
|
||||
const struct nvgpu_pm_resource_register_range_map *e1;
|
||||
const struct nvgpu_pm_resource_register_range_map *e2;
|
||||
|
||||
e1 = (const struct nvgpu_pm_resource_register_range_map *)a;
|
||||
e2 = (const struct nvgpu_pm_resource_register_range_map *)b;
|
||||
|
||||
if (e1->start < e2->start) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (e1->start > e2->start) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u32 get_pm_resource_register_range_map_entry_count(struct nvgpu_profiler_object *prof)
|
||||
{
|
||||
struct gk20a *g = prof->g;
|
||||
u32 count = 0U;
|
||||
u32 range_count;
|
||||
|
||||
if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC]) {
|
||||
g->ops.regops.get_smpc_register_ranges(&range_count);
|
||||
count += range_count;
|
||||
}
|
||||
|
||||
if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY]) {
|
||||
g->ops.regops.get_hwpm_perfmon_register_ranges(&range_count);
|
||||
count += range_count;
|
||||
|
||||
g->ops.regops.get_hwpm_router_register_ranges(&range_count);
|
||||
count += range_count;
|
||||
|
||||
g->ops.regops.get_hwpm_pma_trigger_register_ranges(&range_count);
|
||||
count += range_count;
|
||||
|
||||
g->ops.regops.get_hwpm_perfmux_register_ranges(&range_count);
|
||||
count += range_count;
|
||||
|
||||
if (g->ops.regops.get_cau_register_ranges != NULL) {
|
||||
g->ops.regops.get_cau_register_ranges(&range_count);
|
||||
count += range_count;
|
||||
}
|
||||
}
|
||||
|
||||
if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM]) {
|
||||
g->ops.regops.get_hwpm_pma_channel_register_ranges(&range_count);
|
||||
count += range_count;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static void add_range_to_map(const struct nvgpu_pm_resource_register_range *range,
|
||||
u32 range_count, struct nvgpu_pm_resource_register_range_map *map,
|
||||
u32 *map_index, enum nvgpu_pm_resource_hwpm_register_type type)
|
||||
{
|
||||
u32 index = *map_index;
|
||||
u32 i;
|
||||
|
||||
for (i = 0U; i < range_count; i++) {
|
||||
map[index].start = range[i].start;
|
||||
map[index].end = range[i].end;
|
||||
map[index].type = type;
|
||||
index++;
|
||||
}
|
||||
|
||||
*map_index = index;
|
||||
}
|
||||
|
||||
static int nvgpu_profiler_build_regops_allowlist(struct nvgpu_profiler_object *prof)
|
||||
{
|
||||
struct nvgpu_pm_resource_register_range_map *map;
|
||||
const struct nvgpu_pm_resource_register_range *range;
|
||||
u32 map_count, map_index = 0U;
|
||||
u32 range_count;
|
||||
struct gk20a *g = prof->g;
|
||||
u32 i;
|
||||
|
||||
map_count = get_pm_resource_register_range_map_entry_count(prof);
|
||||
if (map_count == 0U) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_prof, "Allowlist map number of entries %u for handle %u",
|
||||
map_count, prof->prof_handle);
|
||||
|
||||
map = nvgpu_kzalloc(g, sizeof(*map) * map_count);
|
||||
if (map == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC]) {
|
||||
range = g->ops.regops.get_smpc_register_ranges(&range_count);
|
||||
add_range_to_map(range, range_count, map, &map_index,
|
||||
NVGPU_HWPM_REGISTER_TYPE_SMPC);
|
||||
}
|
||||
|
||||
if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY]) {
|
||||
range = g->ops.regops.get_hwpm_perfmon_register_ranges(&range_count);
|
||||
add_range_to_map(range, range_count, map, &map_index,
|
||||
NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMON);
|
||||
|
||||
range = g->ops.regops.get_hwpm_router_register_ranges(&range_count);
|
||||
add_range_to_map(range, range_count, map, &map_index,
|
||||
NVGPU_HWPM_REGISTER_TYPE_HWPM_ROUTER);
|
||||
|
||||
range = g->ops.regops.get_hwpm_pma_trigger_register_ranges(&range_count);
|
||||
add_range_to_map(range, range_count, map, &map_index,
|
||||
NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_TRIGGER);
|
||||
|
||||
range = g->ops.regops.get_hwpm_perfmux_register_ranges(&range_count);
|
||||
add_range_to_map(range, range_count, map, &map_index,
|
||||
NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMUX);
|
||||
|
||||
if (g->ops.regops.get_cau_register_ranges != NULL) {
|
||||
range = g->ops.regops.get_cau_register_ranges(&range_count);
|
||||
add_range_to_map(range, range_count, map, &map_index,
|
||||
NVGPU_HWPM_REGISTER_TYPE_CAU);
|
||||
}
|
||||
}
|
||||
|
||||
if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM]) {
|
||||
range = g->ops.regops.get_hwpm_pma_channel_register_ranges(&range_count);
|
||||
add_range_to_map(range, range_count, map, &map_index,
|
||||
NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_CHANNEL);
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_prof, "Allowlist map created successfully for handle %u",
|
||||
prof->prof_handle);
|
||||
|
||||
nvgpu_assert(map_count == map_index);
|
||||
|
||||
sort(map, map_count, sizeof(*map), map_cmp, NULL);
|
||||
|
||||
for (i = 0; i < map_count; i++) {
|
||||
nvgpu_log(g, gpu_dbg_prof, "allowlist[%u]: 0x%x-0x%x : type %u",
|
||||
i, map[i].start, map[i].end, map[i].type);
|
||||
}
|
||||
|
||||
prof->map = map;
|
||||
prof->map_count = map_count;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void nvgpu_profiler_destroy_regops_allowlist(struct nvgpu_profiler_object *prof)
|
||||
{
|
||||
nvgpu_log(prof->g, gpu_dbg_prof, "Allowlist map destroy for handle %u",
|
||||
prof->prof_handle);
|
||||
|
||||
nvgpu_kfree(prof->g, prof->map);
|
||||
}
|
||||
|
||||
static bool allowlist_range_search(struct gk20a *g,
|
||||
struct nvgpu_pm_resource_register_range_map *map,
|
||||
u32 map_count, u32 offset,
|
||||
enum nvgpu_pm_resource_hwpm_register_type *type)
|
||||
{
|
||||
u32 start = 0U;
|
||||
u32 mid = 0U;
|
||||
u32 end = map_count - 1U;
|
||||
bool found = false;
|
||||
|
||||
while (start <= end) {
|
||||
mid = (start + end) / 2U;
|
||||
|
||||
if (offset < map[mid].start) {
|
||||
end = mid - 1U;
|
||||
} else if (offset > map[mid].end) {
|
||||
start = mid + 1U;
|
||||
} else {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (found) {
|
||||
*type = map[mid].type;
|
||||
nvgpu_log(g, gpu_dbg_prof, "Offset 0x%x found in range 0x%x-0x%x, type: %u",
|
||||
offset, map[mid].start, map[mid].end, map[mid].type);
|
||||
} else {
|
||||
nvgpu_log(g, gpu_dbg_prof, "Offset 0x%x not found in range search", offset);
|
||||
}
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
static bool allowlist_offset_search(struct gk20a *g,
|
||||
const u32 *offset_allowlist, u32 count, u32 offset)
|
||||
{
|
||||
u32 start = 0U;
|
||||
u32 mid = 0U;
|
||||
u32 end = count - 1U;
|
||||
bool found = false;
|
||||
|
||||
while (start <= end) {
|
||||
mid = (start + end) / 2U;
|
||||
if (offset_allowlist[mid] == offset) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (offset < offset_allowlist[mid]) {
|
||||
end = mid - 1U;
|
||||
} else {
|
||||
start = mid + 1U;
|
||||
}
|
||||
}
|
||||
|
||||
if (found) {
|
||||
nvgpu_log(g, gpu_dbg_prof, "Offset 0x%x found in offset allowlist",
|
||||
offset);
|
||||
} else {
|
||||
nvgpu_log(g, gpu_dbg_prof, "Offset 0x%x not found in offset allowlist",
|
||||
offset);
|
||||
}
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
bool nvgpu_profiler_validate_regops_allowlist(struct nvgpu_profiler_object *prof,
|
||||
u32 offset, enum nvgpu_pm_resource_hwpm_register_type *type)
|
||||
{
|
||||
enum nvgpu_pm_resource_hwpm_register_type reg_type;
|
||||
struct gk20a *g = prof->g;
|
||||
const u32 *offset_allowlist;
|
||||
u32 count;
|
||||
u32 stride;
|
||||
bool found;
|
||||
|
||||
found = allowlist_range_search(g, prof->map, prof->map_count, offset, ®_type);
|
||||
if (!found) {
|
||||
return found;
|
||||
}
|
||||
|
||||
if (type != NULL) {
|
||||
*type = reg_type;
|
||||
}
|
||||
|
||||
if (reg_type == NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMUX) {
|
||||
return found;
|
||||
}
|
||||
|
||||
switch ((u32)reg_type) {
|
||||
case NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMON:
|
||||
offset_allowlist = g->ops.regops.get_hwpm_perfmon_register_offset_allowlist(&count);
|
||||
stride = g->ops.regops.get_hwpm_perfmon_register_stride();
|
||||
break;
|
||||
|
||||
case NVGPU_HWPM_REGISTER_TYPE_HWPM_ROUTER:
|
||||
offset_allowlist = g->ops.regops.get_hwpm_router_register_offset_allowlist(&count);
|
||||
stride = g->ops.regops.get_hwpm_router_register_stride();
|
||||
break;
|
||||
|
||||
case NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_TRIGGER:
|
||||
offset_allowlist = g->ops.regops.get_hwpm_pma_trigger_register_offset_allowlist(&count);
|
||||
stride = g->ops.regops.get_hwpm_pma_trigger_register_stride();
|
||||
break;
|
||||
|
||||
case NVGPU_HWPM_REGISTER_TYPE_SMPC:
|
||||
offset_allowlist = g->ops.regops.get_smpc_register_offset_allowlist(&count);
|
||||
stride = g->ops.regops.get_smpc_register_stride();
|
||||
break;
|
||||
|
||||
case NVGPU_HWPM_REGISTER_TYPE_CAU:
|
||||
offset_allowlist = g->ops.regops.get_cau_register_offset_allowlist(&count);
|
||||
stride = g->ops.regops.get_cau_register_stride();
|
||||
break;
|
||||
|
||||
case NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_CHANNEL:
|
||||
offset_allowlist = g->ops.regops.get_hwpm_pma_channel_register_offset_allowlist(&count);
|
||||
stride = g->ops.regops.get_hwpm_pma_channel_register_stride();
|
||||
break;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
offset = offset & (stride - 1U);
|
||||
return allowlist_offset_search(g, offset_allowlist, count, offset);
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
#include <nvgpu/regops.h>
|
||||
#include <nvgpu/gr/obj_ctx.h>
|
||||
#include <nvgpu/gr/gr_utils.h>
|
||||
#include <nvgpu/profiler.h>
|
||||
|
||||
/* Access ctx buffer offset functions in gr_gk20a.h */
|
||||
#include "hal/gr/gr/gr_gk20a.h"
|
||||
@@ -78,6 +79,7 @@ static bool gr_context_info_available(struct gk20a *g)
|
||||
}
|
||||
|
||||
static bool validate_reg_ops(struct gk20a *g,
|
||||
struct nvgpu_profiler_object *prof,
|
||||
u32 *ctx_rd_count, u32 *ctx_wr_count,
|
||||
struct nvgpu_dbg_reg_op *ops,
|
||||
u32 op_count,
|
||||
@@ -86,6 +88,7 @@ static bool validate_reg_ops(struct gk20a *g,
|
||||
|
||||
int exec_regops_gk20a(struct gk20a *g,
|
||||
struct nvgpu_tsg *tsg,
|
||||
struct nvgpu_profiler_object *prof,
|
||||
struct nvgpu_dbg_reg_op *ops,
|
||||
u32 num_ops,
|
||||
u32 *flags)
|
||||
@@ -99,7 +102,7 @@ int exec_regops_gk20a(struct gk20a *g,
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
|
||||
|
||||
ok = validate_reg_ops(g, &ctx_rd_count, &ctx_wr_count,
|
||||
ok = validate_reg_ops(g, prof, &ctx_rd_count, &ctx_wr_count,
|
||||
ops, num_ops, tsg != NULL, flags);
|
||||
if (!ok) {
|
||||
nvgpu_err(g, "invalid op(s)");
|
||||
@@ -318,6 +321,36 @@ static bool check_whitelists(struct gk20a *g,
|
||||
return valid;
|
||||
}
|
||||
|
||||
static int profiler_obj_validate_reg_op_offset(struct nvgpu_profiler_object *prof,
|
||||
struct nvgpu_dbg_reg_op *op)
|
||||
{
|
||||
struct gk20a *g = prof->g;
|
||||
bool valid = false;
|
||||
u32 offset;
|
||||
|
||||
op->status = 0;
|
||||
offset = op->offset;
|
||||
|
||||
/* support only 24-bit 4-byte aligned offsets */
|
||||
if ((offset & 0xFF000003U) != 0U) {
|
||||
nvgpu_err(g, "invalid regop offset: 0x%x", offset);
|
||||
op->status |= REGOP(STATUS_INVALID_OFFSET);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
valid = nvgpu_profiler_validate_regops_allowlist(prof, offset, NULL);
|
||||
if ((op->op == REGOP(READ_64) || op->op == REGOP(WRITE_64)) && valid) {
|
||||
valid = nvgpu_profiler_validate_regops_allowlist(prof, offset + 4U, NULL);
|
||||
}
|
||||
|
||||
if (!valid) {
|
||||
op->status |= REGOP(STATUS_INVALID_OFFSET);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* note: the op here has already been through validate_reg_op_info */
|
||||
static int validate_reg_op_offset(struct gk20a *g,
|
||||
struct nvgpu_dbg_reg_op *op,
|
||||
@@ -377,6 +410,7 @@ static int validate_reg_op_offset(struct gk20a *g,
|
||||
}
|
||||
|
||||
static bool validate_reg_ops(struct gk20a *g,
|
||||
struct nvgpu_profiler_object *prof,
|
||||
u32 *ctx_rd_count, u32 *ctx_wr_count,
|
||||
struct nvgpu_dbg_reg_op *ops,
|
||||
u32 op_count,
|
||||
@@ -419,6 +453,14 @@ static bool validate_reg_ops(struct gk20a *g,
|
||||
|
||||
/* if "allow_all" flag enabled, dont validate offset */
|
||||
if (!g->allow_all) {
|
||||
if (prof != NULL) {
|
||||
if (profiler_obj_validate_reg_op_offset(prof, &ops[i]) != 0) {
|
||||
op_failed = true;
|
||||
if (all_or_none) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (validate_reg_op_offset(g, &ops[i], valid_ctx) != 0) {
|
||||
op_failed = true;
|
||||
if (all_or_none) {
|
||||
@@ -427,6 +469,7 @@ static bool validate_reg_ops(struct gk20a *g,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d",
|
||||
*ctx_wr_count, *ctx_rd_count);
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
|
||||
int vgpu_exec_regops(struct gk20a *g,
|
||||
struct nvgpu_tsg *tsg,
|
||||
struct nvgpu_profiler_object *prof,
|
||||
struct nvgpu_dbg_reg_op *ops,
|
||||
u32 num_ops,
|
||||
u32 *flags)
|
||||
|
||||
@@ -31,6 +31,7 @@ struct nvgpu_channel;
|
||||
|
||||
int vgpu_exec_regops(struct gk20a *g,
|
||||
struct nvgpu_tsg *tsg,
|
||||
struct nvgpu_profiler_object *prof,
|
||||
struct nvgpu_dbg_reg_op *ops,
|
||||
u32 num_ops,
|
||||
u32 *flags);
|
||||
|
||||
@@ -1489,6 +1489,15 @@ static int gr_exec_ctx_ops(struct nvgpu_tsg *tsg,
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Move to next op if current op is invalid.
|
||||
* Execution will reach here only if CONTINUE_ON_ERROR
|
||||
* mode is requested.
|
||||
*/
|
||||
if (ctx_ops[i].status != REGOP(STATUS_SUCCESS)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* only do ctx ops and only on the right pass */
|
||||
if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) ||
|
||||
(((pass == 0) && reg_op_is_read(ctx_ops[i].op)) ||
|
||||
|
||||
@@ -89,6 +89,7 @@
|
||||
#include "hal/ptimer/ptimer_gp10b.h"
|
||||
#ifdef CONFIG_NVGPU_DEBUGGER
|
||||
#include "hal/regops/regops_gv11b.h"
|
||||
#include "hal/regops/allowlist_gv11b.h"
|
||||
#endif
|
||||
#ifdef CONFIG_NVGPU_RECOVERY
|
||||
#include "hal/rc/rc_gv11b.h"
|
||||
@@ -1169,6 +1170,29 @@ static const struct gops_regops gv11b_ops_regops = {
|
||||
.get_context_whitelist_ranges_count = gv11b_get_context_whitelist_ranges_count,
|
||||
.get_runcontrol_whitelist = gv11b_get_runcontrol_whitelist,
|
||||
.get_runcontrol_whitelist_count = gv11b_get_runcontrol_whitelist_count,
|
||||
.get_hwpm_perfmon_register_stride = gv11b_get_hwpm_perfmon_register_stride,
|
||||
.get_hwpm_router_register_stride = gv11b_get_hwpm_router_register_stride,
|
||||
.get_hwpm_pma_channel_register_stride = gv11b_get_hwpm_pma_channel_register_stride,
|
||||
.get_hwpm_pma_trigger_register_stride = gv11b_get_hwpm_pma_trigger_register_stride,
|
||||
.get_smpc_register_stride = gv11b_get_smpc_register_stride,
|
||||
.get_cau_register_stride = NULL,
|
||||
.get_hwpm_perfmon_register_offset_allowlist =
|
||||
gv11b_get_hwpm_perfmon_register_offset_allowlist,
|
||||
.get_hwpm_router_register_offset_allowlist =
|
||||
gv11b_get_hwpm_router_register_offset_allowlist,
|
||||
.get_hwpm_pma_channel_register_offset_allowlist =
|
||||
gv11b_get_hwpm_pma_channel_register_offset_allowlist,
|
||||
.get_hwpm_pma_trigger_register_offset_allowlist =
|
||||
gv11b_get_hwpm_pma_trigger_register_offset_allowlist,
|
||||
.get_smpc_register_offset_allowlist = gv11b_get_smpc_register_offset_allowlist,
|
||||
.get_cau_register_offset_allowlist = NULL,
|
||||
.get_hwpm_perfmon_register_ranges = gv11b_get_hwpm_perfmon_register_ranges,
|
||||
.get_hwpm_router_register_ranges = gv11b_get_hwpm_router_register_ranges,
|
||||
.get_hwpm_pma_channel_register_ranges = gv11b_get_hwpm_pma_channel_register_ranges,
|
||||
.get_hwpm_pma_trigger_register_ranges = gv11b_get_hwpm_pma_trigger_register_ranges,
|
||||
.get_smpc_register_ranges = gv11b_get_smpc_register_ranges,
|
||||
.get_cau_register_ranges = NULL,
|
||||
.get_hwpm_perfmux_register_ranges = gv11b_get_hwpm_perfmux_register_ranges,
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
@@ -74,6 +74,7 @@
|
||||
#include "hal/ptimer/ptimer_gk20a.h"
|
||||
#include "hal/ptimer/ptimer_gp10b.h"
|
||||
#include "hal/regops/regops_tu104.h"
|
||||
#include "hal/regops/allowlist_tu104.h"
|
||||
#include "hal/func/func_tu104.h"
|
||||
#include "hal/fuse/fuse_gm20b.h"
|
||||
#include "hal/fuse/fuse_gp10b.h"
|
||||
@@ -1232,6 +1233,29 @@ static const struct gops_regops tu104_ops_regops = {
|
||||
.get_context_whitelist_ranges_count = tu104_get_context_whitelist_ranges_count,
|
||||
.get_runcontrol_whitelist = tu104_get_runcontrol_whitelist,
|
||||
.get_runcontrol_whitelist_count = tu104_get_runcontrol_whitelist_count,
|
||||
.get_hwpm_perfmon_register_stride = tu104_get_hwpm_perfmon_register_stride,
|
||||
.get_hwpm_router_register_stride = tu104_get_hwpm_router_register_stride,
|
||||
.get_hwpm_pma_channel_register_stride = tu104_get_hwpm_pma_channel_register_stride,
|
||||
.get_hwpm_pma_trigger_register_stride = tu104_get_hwpm_pma_trigger_register_stride,
|
||||
.get_smpc_register_stride = tu104_get_smpc_register_stride,
|
||||
.get_cau_register_stride = tu104_get_cau_register_stride,
|
||||
.get_hwpm_perfmon_register_offset_allowlist =
|
||||
tu104_get_hwpm_perfmon_register_offset_allowlist,
|
||||
.get_hwpm_router_register_offset_allowlist =
|
||||
tu104_get_hwpm_router_register_offset_allowlist,
|
||||
.get_hwpm_pma_channel_register_offset_allowlist =
|
||||
tu104_get_hwpm_pma_channel_register_offset_allowlist,
|
||||
.get_hwpm_pma_trigger_register_offset_allowlist =
|
||||
tu104_get_hwpm_pma_trigger_register_offset_allowlist,
|
||||
.get_smpc_register_offset_allowlist = tu104_get_smpc_register_offset_allowlist,
|
||||
.get_cau_register_offset_allowlist = tu104_get_cau_register_offset_allowlist,
|
||||
.get_hwpm_perfmon_register_ranges = tu104_get_hwpm_perfmon_register_ranges,
|
||||
.get_hwpm_router_register_ranges = tu104_get_hwpm_router_register_ranges,
|
||||
.get_hwpm_pma_channel_register_ranges = tu104_get_hwpm_pma_channel_register_ranges,
|
||||
.get_hwpm_pma_trigger_register_ranges = tu104_get_hwpm_pma_trigger_register_ranges,
|
||||
.get_smpc_register_ranges = tu104_get_smpc_register_ranges,
|
||||
.get_cau_register_ranges = tu104_get_cau_register_ranges,
|
||||
.get_hwpm_perfmux_register_ranges = tu104_get_hwpm_perfmux_register_ranges,
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
struct gops_regops {
|
||||
int (*exec_regops)(struct gk20a *g,
|
||||
struct nvgpu_tsg *tsg,
|
||||
struct nvgpu_profiler_object *prof,
|
||||
struct nvgpu_dbg_reg_op *ops,
|
||||
u32 num_ops,
|
||||
u32 *flags);
|
||||
@@ -37,6 +38,32 @@ struct gops_regops {
|
||||
u64 (*get_context_whitelist_ranges_count)(void);
|
||||
const u32* (*get_runcontrol_whitelist)(void);
|
||||
u64 (*get_runcontrol_whitelist_count)(void);
|
||||
u32 (*get_hwpm_perfmon_register_stride)(void);
|
||||
u32 (*get_hwpm_router_register_stride)(void);
|
||||
u32 (*get_hwpm_pma_channel_register_stride)(void);
|
||||
u32 (*get_hwpm_pma_trigger_register_stride)(void);
|
||||
u32 (*get_smpc_register_stride)(void);
|
||||
u32 (*get_cau_register_stride)(void);
|
||||
const u32 *(*get_hwpm_perfmon_register_offset_allowlist)(u32 *count);
|
||||
const u32 *(*get_hwpm_router_register_offset_allowlist)(u32 *count);
|
||||
const u32 *(*get_hwpm_pma_channel_register_offset_allowlist)(u32 *count);
|
||||
const u32 *(*get_hwpm_pma_trigger_register_offset_allowlist)(u32 *count);
|
||||
const u32 *(*get_smpc_register_offset_allowlist)(u32 *count);
|
||||
const u32 *(*get_cau_register_offset_allowlist)(u32 *count);
|
||||
const struct nvgpu_pm_resource_register_range *
|
||||
(*get_hwpm_perfmon_register_ranges)(u32 *count);
|
||||
const struct nvgpu_pm_resource_register_range *
|
||||
(*get_hwpm_router_register_ranges)(u32 *count);
|
||||
const struct nvgpu_pm_resource_register_range *
|
||||
(*get_hwpm_pma_channel_register_ranges)(u32 *count);
|
||||
const struct nvgpu_pm_resource_register_range *
|
||||
(*get_hwpm_pma_trigger_register_ranges)(u32 *count);
|
||||
const struct nvgpu_pm_resource_register_range *
|
||||
(*get_smpc_register_ranges)(u32 *count);
|
||||
const struct nvgpu_pm_resource_register_range *
|
||||
(*get_cau_register_ranges)(u32 *count);
|
||||
const struct nvgpu_pm_resource_register_range *
|
||||
(*get_hwpm_perfmux_register_ranges)(u32 *count);
|
||||
};
|
||||
struct gops_debugger {
|
||||
void (*post_events)(struct nvgpu_channel *ch);
|
||||
|
||||
@@ -32,6 +32,8 @@
|
||||
struct gk20a;
|
||||
struct nvgpu_channel;
|
||||
struct nvgpu_tsg;
|
||||
struct nvgpu_pm_resource_register_range_map;
|
||||
enum nvgpu_pm_resource_hwpm_register_type;
|
||||
|
||||
struct nvgpu_profiler_object {
|
||||
struct gk20a *g;
|
||||
@@ -106,6 +108,15 @@ struct nvgpu_profiler_object {
|
||||
* (if PMA stream resource is reserved successfully).
|
||||
*/
|
||||
void *pma_bytes_available_buffer_cpuva;
|
||||
|
||||
/*
|
||||
* Dynamic map of HWPM register ranges that can be accessed
|
||||
* through regops.
|
||||
*/
|
||||
struct nvgpu_pm_resource_register_range_map *map;
|
||||
|
||||
/* Number of range entries in map above */
|
||||
u32 map_count;
|
||||
};
|
||||
|
||||
static inline struct nvgpu_profiler_object *
|
||||
@@ -135,5 +146,8 @@ int nvgpu_profiler_unbind_pm_resources(struct nvgpu_profiler_object *prof);
|
||||
int nvgpu_profiler_alloc_pma_stream(struct nvgpu_profiler_object *prof);
|
||||
void nvgpu_profiler_free_pma_stream(struct nvgpu_profiler_object *prof);
|
||||
|
||||
bool nvgpu_profiler_validate_regops_allowlist(struct nvgpu_profiler_object *prof,
|
||||
u32 offset, enum nvgpu_pm_resource_hwpm_register_type *type);
|
||||
|
||||
#endif /* CONFIG_NVGPU_PROFILER */
|
||||
#endif /* NVGPU_PROFILER_H */
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
|
||||
struct gk20a;
|
||||
struct nvgpu_tsg;
|
||||
struct nvgpu_profiler_object;
|
||||
|
||||
/*
|
||||
* Register operations
|
||||
@@ -88,6 +89,7 @@ struct regop_offset_range {
|
||||
|
||||
int exec_regops_gk20a(struct gk20a *g,
|
||||
struct nvgpu_tsg *tsg,
|
||||
struct nvgpu_profiler_object *prof,
|
||||
struct nvgpu_dbg_reg_op *ops,
|
||||
u32 num_ops,
|
||||
u32 *flags);
|
||||
|
||||
@@ -30,4 +30,20 @@ struct nvgpu_pm_resource_register_range {
|
||||
u32 end;
|
||||
};
|
||||
|
||||
enum nvgpu_pm_resource_hwpm_register_type {
|
||||
NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMON,
|
||||
NVGPU_HWPM_REGISTER_TYPE_HWPM_ROUTER,
|
||||
NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_TRIGGER,
|
||||
NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMUX,
|
||||
NVGPU_HWPM_REGISTER_TYPE_SMPC,
|
||||
NVGPU_HWPM_REGISTER_TYPE_CAU,
|
||||
NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_CHANNEL,
|
||||
};
|
||||
|
||||
struct nvgpu_pm_resource_register_range_map {
|
||||
u32 start;
|
||||
u32 end;
|
||||
enum nvgpu_pm_resource_hwpm_register_type type;
|
||||
};
|
||||
|
||||
#endif /* NVGPU_REGOPS_ALLOWLIST_H */
|
||||
|
||||
@@ -875,7 +875,7 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
|
||||
if (err)
|
||||
break;
|
||||
|
||||
err = g->ops.regops.exec_regops(g, tsg,
|
||||
err = g->ops.regops.exec_regops(g, tsg, NULL,
|
||||
g->dbg_regops_tmp_buf, num_ops, &flags);
|
||||
|
||||
if (err) {
|
||||
|
||||
@@ -620,7 +620,7 @@ static int nvgpu_prof_ioctl_exec_reg_ops(struct nvgpu_profiler_object_priv *priv
|
||||
flags &= ~NVGPU_REG_OP_FLAG_ALL_PASSED;
|
||||
}
|
||||
|
||||
err = g->ops.regops.exec_regops(g, tsg,
|
||||
err = g->ops.regops.exec_regops(g, tsg, prof,
|
||||
priv->regops_staging_buf, num_ops,
|
||||
&flags);
|
||||
if (err) {
|
||||
|
||||
Reference in New Issue
Block a user