gpu: nvgpu: add broadcast to unicast expansion

Add broadcast to unicast expansion for NV_PLTCG_LTCS_MISC_LTC_PM and
PMM*_[GPC|FBP]SROUTER broadcast registers for non-resident regops.

Bug: 3442801

Change-Id: I88dcf00f4f6e910f0342d3968970070e0248a786
Signed-off-by: atanand <atanand@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2704951
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
atanand
2022-04-29 12:58:37 +00:00
committed by mobile promotions
parent 961925be02
commit 2ebc0bdf98
15 changed files with 110 additions and 5 deletions

View File

@@ -54,6 +54,7 @@
#include "common/gr/gr_priv.h"
#include <nvgpu/hw/gv11b/hw_ltc_gv11b.h>
#include <nvgpu/hw/gv11b/hw_gr_gv11b.h>
#include <nvgpu/hw/gv11b/hw_proj_gv11b.h>
#include <nvgpu/hw/gv11b/hw_perf_gv11b.h>
@@ -1802,6 +1803,8 @@ int gr_gv11b_decode_priv_addr(struct gk20a *g, u32 addr,
*broadcast_flags |= PRI_BROADCAST_FLAGS_LTCS;
} else if (g->ops.ltc.is_ltcn_ltss_addr(g, addr)) {
*broadcast_flags |= PRI_BROADCAST_FLAGS_LTSS;
} else if (g->ops.ltc.is_pltcg_ltcs_addr(g, addr)) {
*broadcast_flags |= PRI_BROADCAST_FLAGS_PLTCG_LTCS;
}
return 0;
} else if (pri_is_fbpa_addr(g, addr)) {
@@ -1843,6 +1846,16 @@ int gr_gv11b_decode_priv_addr(struct gk20a *g, u32 addr,
PRI_BROADCAST_FLAGS_PMMGPC);
*addr_type = CTXSW_ADDR_TYPE_GPC;
return 0;
} else if (PRI_PMMGS_BASE_ADDR_MASK(addr) == NV_PERF_PMMFBP_FBPS_ROUTER) {
*broadcast_flags |= (PRI_BROADCAST_FLAGS_PMM_FBPS_ROUTER |
PRI_BROADCAST_FLAGS_PMMFBP);
*addr_type = CTXSW_ADDR_TYPE_FBP;
return 0;
} else if (PRI_PMMGS_BASE_ADDR_MASK(addr) == NV_PERF_PMMGPC_GPCS_ROUTER) {
*broadcast_flags |= (PRI_BROADCAST_FLAGS_PMM_GPCS_ROUTER |
PRI_BROADCAST_FLAGS_PMMGPC);
*addr_type = CTXSW_ADDR_TYPE_GPC;
return 0;
} else if (PRI_PMMS_BASE_ADDR_MASK(addr) == NV_PERF_PMMFBP_FBPS) {
*broadcast_flags |= (PRI_BROADCAST_FLAGS_PMM_FBPS |
PRI_BROADCAST_FLAGS_PMMFBP);
@@ -1863,6 +1876,22 @@ static u32 gr_gv11b_pri_pmmgpc_addr(struct gk20a *g, u32 gpc_num,
offset;
}
static u32 gr_gv11b_pri_pmmgpcrouter_addr(struct gk20a *g, u32 gpc_num,
u32 offset)
{
return perf_pmmgpcrouter_base_v() +
(gpc_num * g->ops.perf.get_pmmgpcrouter_per_chiplet_offset()) +
offset;
}
static u32 gr_gv11b_pri_pmmfbprouter_addr(struct gk20a *g, u32 fbp_num,
u32 offset)
{
return perf_pmmfbprouter_base_v() +
(fbp_num * g->ops.perf.get_pmmfbprouter_per_chiplet_offset()) +
offset;
}
static void gr_gv11b_split_pmm_fbp_broadcast_address(struct gk20a *g,
u32 offset, u32 *priv_addr_table, u32 *t,
u32 domain_start, u32 num_domains)
@@ -2008,6 +2037,19 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g,
perf_pmmgpc_perdomain_offset_v();
num_domains = 1;
offset = PRI_PMMS_ADDR_MASK(addr);
} else if ((broadcast_flags &
PRI_BROADCAST_FLAGS_PMM_GPCS_ROUTER) != 0U) {
offset = PRI_PMMGS_OFFSET_MASK(addr);
for (gpc_num = 0;
gpc_num < nvgpu_gr_config_get_gpc_count(gr->config);
gpc_num++) {
priv_addr_table[t++] =
gr_gv11b_pri_pmmgpcrouter_addr(g, gpc_num,
offset);
}
*num_registers = t;
return 0;
} else {
return -EINVAL;
}
@@ -2046,6 +2088,18 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g,
priv_addr_table, &t,
nvgpu_get_litter_value(g, GPU_LIT_PERFMON_PMMFBP_LTC_DOMAIN_START),
nvgpu_get_litter_value(g, GPU_LIT_PERFMON_PMMFBP_LTC_DOMAIN_COUNT));
} else if ((addr_type == CTXSW_ADDR_TYPE_LTCS) &&
((broadcast_flags & PRI_BROADCAST_FLAGS_PLTCG_LTCS) != 0U)) {
u32 num_ltc = g->ltc->ltc_count;
u32 ltc_num;
u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
for (ltc_num = 0; ltc_num < num_ltc; ltc_num =
nvgpu_safe_add_u32(ltc_num, 1U)) {
priv_addr_table[t++] = nvgpu_safe_add_u32(ltc_pltcg_base_v(),
nvgpu_safe_add_u32(nvgpu_safe_mult_u32(ltc_num, ltc_stride),
(addr & nvgpu_safe_sub_u32(ltc_stride, 1U))));
}
} else if ((addr_type == CTXSW_ADDR_TYPE_PMM_FBPGS_ROP) &&
((broadcast_flags & PRI_BROADCAST_FLAGS_PMM_FBPGS_ROP) != 0U)) {
gr_gv11b_split_pmm_fbp_broadcast_address(g,
@@ -2064,6 +2118,22 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g,
PRI_PMMS_ADDR_MASK(addr),
priv_addr_table, &t,
domain_start, 1);
} else if ((addr_type == CTXSW_ADDR_TYPE_FBP) &&
((broadcast_flags & PRI_BROADCAST_FLAGS_PMM_FBPS_ROUTER) != 0U)) {
u32 offset = 0;
u32 fbp_num = 0;
offset = PRI_PMMGS_OFFSET_MASK(addr);
for (fbp_num = 0;
fbp_num < nvgpu_fbp_get_num_fbps(g->fbp);
fbp_num++) {
priv_addr_table[t++] =
gr_gv11b_pri_pmmfbprouter_addr(g, fbp_num,
offset);
}
*num_registers = t;
return 0;
} else if ((broadcast_flags & PRI_BROADCAST_FLAGS_GPC) == 0U) {
if ((broadcast_flags & PRI_BROADCAST_FLAGS_TPC) != 0U) {
for (tpc_num = 0;

View File

@@ -1,7 +1,7 @@
/*
* GK20A Graphics Context Pri Register Addressing
*
* Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2014-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -264,6 +264,9 @@ enum ctxsw_addr_type {
#define PRI_BROADCAST_FLAGS_PMM_FBPGS_LTC BIT32(15)
#define PRI_BROADCAST_FLAGS_PMM_FBPGS_ROP BIT32(16)
#define PRI_BROADCAST_FLAGS_SM BIT32(17)
#define PRI_BROADCAST_FLAGS_PMM_GPCS_ROUTER BIT32(18)
#define PRI_BROADCAST_FLAGS_PMM_FBPS_ROUTER BIT32(19)
#define PRI_BROADCAST_FLAGS_PLTCG_LTCS BIT32(20)
#endif /* CONFIG_NVGPU_DEBUGGER */
#endif /* GR_PRI_GK20A_H */

View File

@@ -1,7 +1,7 @@
/*
* GV11B/GV100 Graphics Context Pri Register Addressing
*
* Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -37,7 +37,9 @@
#define NV_PERF_PMMGPC_GPCGS_GPCTPCA 0x00250000U
#define NV_PERF_PMMGPC_GPCGS_GPCTPCB 0x00250200U
#define NV_PERF_PMMGPC_GPCS 0x00278000U
#define NV_PERF_PMMGPC_GPCS_ROUTER 0x00251800U
#define NV_PERF_PMMFBP_FBPS 0x0027C000U
#define NV_PERF_PMMFBP_FBPS_ROUTER 0x00251A00U
#define PRI_PMMGS_ADDR_WIDTH 9U
#define PRI_PMMS_ADDR_WIDTH 14U

View File

@@ -393,6 +393,7 @@ static const struct gops_ltc ga100_ops_ltc = {
#endif /* CONFIG_NVGPU_GRAPHICS */
#ifdef CONFIG_NVGPU_DEBUGGER
.pri_is_ltc_addr = gm20b_ltc_pri_is_ltc_addr,
.is_pltcg_ltcs_addr = gm20b_ltc_is_pltcg_ltcs_addr,
.is_ltcs_ltss_addr = gm20b_ltc_is_ltcs_ltss_addr,
.is_ltcn_ltss_addr = gm20b_ltc_is_ltcn_ltss_addr,
.split_lts_broadcast_addr = gm20b_ltc_split_lts_broadcast_addr,

View File

@@ -366,6 +366,7 @@ static const struct gops_ltc ga10b_ops_ltc = {
#endif /* CONFIG_NVGPU_GRAPHICS */
#ifdef CONFIG_NVGPU_DEBUGGER
.pri_is_ltc_addr = gm20b_ltc_pri_is_ltc_addr,
.is_pltcg_ltcs_addr = gm20b_ltc_is_pltcg_ltcs_addr,
.is_ltcs_ltss_addr = gm20b_ltc_is_ltcs_ltss_addr,
.is_ltcn_ltss_addr = gm20b_ltc_is_ltcn_ltss_addr,
.split_lts_broadcast_addr = gm20b_ltc_split_lts_broadcast_addr,

View File

@@ -173,6 +173,7 @@ static const struct gops_ltc gm20b_ops_ltc = {
#endif /*CONFIG_NVGPU_GRAPHICS */
#ifdef CONFIG_NVGPU_DEBUGGER
.pri_is_ltc_addr = gm20b_ltc_pri_is_ltc_addr,
.is_pltcg_ltcs_addr = gm20b_ltc_is_pltcg_ltcs_addr,
.is_ltcs_ltss_addr = gm20b_ltc_is_ltcs_ltss_addr,
.is_ltcn_ltss_addr = gm20b_ltc_is_ltcn_ltss_addr,
.split_lts_broadcast_addr = gm20b_ltc_split_lts_broadcast_addr,

View File

@@ -287,6 +287,7 @@ static const struct gops_ltc gv11b_ops_ltc = {
#endif /* CONFIG_NVGPU_GRAPHICS */
#ifdef CONFIG_NVGPU_DEBUGGER
.pri_is_ltc_addr = gm20b_ltc_pri_is_ltc_addr,
.is_pltcg_ltcs_addr = gm20b_ltc_is_pltcg_ltcs_addr,
.is_ltcs_ltss_addr = gm20b_ltc_is_ltcs_ltss_addr,
.is_ltcn_ltss_addr = gm20b_ltc_is_ltcn_ltss_addr,
.split_lts_broadcast_addr = gm20b_ltc_split_lts_broadcast_addr,
@@ -1303,7 +1304,9 @@ static const struct gops_perf gv11b_ops_perf = {
.get_membuf_overflow_status = gv11b_perf_get_membuf_overflow_status,
.get_pmmsys_per_chiplet_offset = gv11b_perf_get_pmmsys_per_chiplet_offset,
.get_pmmgpc_per_chiplet_offset = gv11b_perf_get_pmmgpc_per_chiplet_offset,
.get_pmmgpcrouter_per_chiplet_offset = gv11b_perf_get_pmmgpcrouter_per_chiplet_offset,
.get_pmmfbp_per_chiplet_offset = gv11b_perf_get_pmmfbp_per_chiplet_offset,
.get_pmmfbprouter_per_chiplet_offset = gv11b_perf_get_pmmfbprouter_per_chiplet_offset,
.update_get_put = gv11b_perf_update_get_put,
.get_hwpm_sys_perfmon_regs = gv11b_perf_get_hwpm_sys_perfmon_regs,
.get_hwpm_gpc_perfmon_regs = gv11b_perf_get_hwpm_gpc_perfmon_regs,

View File

@@ -332,6 +332,7 @@ static const struct gops_ltc tu104_ops_ltc = {
#endif /* CONFIG_NVGPU_GRAPHICS */
#ifdef CONFIG_NVGPU_DEBUGGER
.pri_is_ltc_addr = gm20b_ltc_pri_is_ltc_addr,
.is_pltcg_ltcs_addr = gm20b_ltc_is_pltcg_ltcs_addr,
.is_ltcs_ltss_addr = gm20b_ltc_is_ltcs_ltss_addr,
.is_ltcn_ltss_addr = gm20b_ltc_is_ltcn_ltss_addr,
.split_lts_broadcast_addr = gm20b_ltc_split_lts_broadcast_addr,
@@ -1373,7 +1374,9 @@ static const struct gops_perf tu104_ops_perf = {
.get_membuf_overflow_status = gv11b_perf_get_membuf_overflow_status,
.get_pmmsys_per_chiplet_offset = gv11b_perf_get_pmmsys_per_chiplet_offset,
.get_pmmgpc_per_chiplet_offset = gv11b_perf_get_pmmgpc_per_chiplet_offset,
.get_pmmgpcrouter_per_chiplet_offset = gv11b_perf_get_pmmgpcrouter_per_chiplet_offset,
.get_pmmfbp_per_chiplet_offset = gv11b_perf_get_pmmfbp_per_chiplet_offset,
.get_pmmfbprouter_per_chiplet_offset = gv11b_perf_get_pmmfbprouter_per_chiplet_offset,
.update_get_put = gv11b_perf_update_get_put,
.get_hwpm_sys_perfmon_regs = tu104_perf_get_hwpm_sys_perfmon_regs,
.get_hwpm_gpc_perfmon_regs = tu104_perf_get_hwpm_gpc_perfmon_regs,

View File

@@ -1,7 +1,7 @@
/*
* GM20B L2
*
* Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2014-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -45,6 +45,7 @@ void gm20b_ltc_set_zbc_depth_entry(struct gk20a *g,
#endif /* CONFIG_NVGPU_GRAPHICS */
#ifdef CONFIG_NVGPU_DEBUGGER
bool gm20b_ltc_pri_is_ltc_addr(struct gk20a *g, u32 addr);
bool gm20b_ltc_is_pltcg_ltcs_addr(struct gk20a *g, u32 addr);
bool gm20b_ltc_is_ltcs_ltss_addr(struct gk20a *g, u32 addr);
bool gm20b_ltc_is_ltcn_ltss_addr(struct gk20a *g, u32 addr);
void gm20b_ltc_split_lts_broadcast_addr(struct gk20a *g, u32 addr,

View File

@@ -1,7 +1,7 @@
/*
* GM20B L2
*
* Copyright (c) 2014-2020 NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2014-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -47,6 +47,11 @@ bool gm20b_ltc_pri_is_ltc_addr(struct gk20a *g, u32 addr)
return ((addr >= ltc_pltcg_base_v()) && (addr < ltc_pltcg_extent_v()));
}
bool gm20b_ltc_is_pltcg_ltcs_addr(struct gk20a *g, u32 addr)
{
return ((addr >= ltc_pltcg_ltcs_base_v()) && (addr < ltc_pltcg_extent_v()));
}
bool gm20b_ltc_is_ltcs_ltss_addr(struct gk20a *g, u32 addr)
{
u32 ltc_shared_base = ltc_ltcs_ltss_v();

View File

@@ -170,11 +170,21 @@ u32 gv11b_perf_get_pmmgpc_per_chiplet_offset(void)
return (perf_pmmgpc_extent_v() - perf_pmmgpc_base_v() + 1U);
}
u32 gv11b_perf_get_pmmgpcrouter_per_chiplet_offset(void)
{
return (perf_pmmgpcrouter_extent_v() - perf_pmmgpcrouter_base_v() + 1U);
}
u32 gv11b_perf_get_pmmfbp_per_chiplet_offset(void)
{
return (perf_pmmfbp_extent_v() - perf_pmmfbp_base_v() + 1U);
}
u32 gv11b_perf_get_pmmfbprouter_per_chiplet_offset(void)
{
return (perf_pmmfbprouter_extent_v() - perf_pmmfbprouter_base_v() + 1U);
}
static const u32 hwpm_sys_perfmon_regs[] =
{
/* This list is autogenerated. Do not edit. */

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -50,7 +50,9 @@ void gv11b_perf_deinit_inst_block(struct gk20a *g);
u32 gv11b_perf_get_pmmsys_per_chiplet_offset(void);
u32 gv11b_perf_get_pmmgpc_per_chiplet_offset(void);
u32 gv11b_perf_get_pmmgpcrouter_per_chiplet_offset(void);
u32 gv11b_perf_get_pmmfbp_per_chiplet_offset(void);
u32 gv11b_perf_get_pmmfbprouter_per_chiplet_offset(void);
const u32 *gv11b_perf_get_hwpm_sys_perfmon_regs(u32 *count);
const u32 *gv11b_perf_get_hwpm_gpc_perfmon_regs(u32 *count);

View File

@@ -244,6 +244,7 @@ static const struct gops_ltc vgpu_ga10b_ops_ltc = {
#endif
#ifdef CONFIG_NVGPU_DEBUGGER
.pri_is_ltc_addr = gm20b_ltc_pri_is_ltc_addr,
.is_pltcg_ltcs_addr = gm20b_ltc_is_pltcg_ltcs_addr,
.is_ltcs_ltss_addr = gm20b_ltc_is_ltcs_ltss_addr,
.is_ltcn_ltss_addr = gm20b_ltc_is_ltcn_ltss_addr,
.split_lts_broadcast_addr = gm20b_ltc_split_lts_broadcast_addr,

View File

@@ -219,6 +219,7 @@ static const struct gops_ltc vgpu_gv11b_ops_ltc = {
#endif
#ifdef CONFIG_NVGPU_DEBUGGER
.pri_is_ltc_addr = gm20b_ltc_pri_is_ltc_addr,
.is_pltcg_ltcs_addr = gm20b_ltc_is_pltcg_ltcs_addr,
.is_ltcs_ltss_addr = gm20b_ltc_is_ltcs_ltss_addr,
.is_ltcn_ltss_addr = gm20b_ltc_is_ltcn_ltss_addr,
.split_lts_broadcast_addr = gm20b_ltc_split_lts_broadcast_addr,

View File

@@ -430,6 +430,7 @@ struct gops_ltc {
#endif
#ifdef CONFIG_NVGPU_DEBUGGER
bool (*pri_is_ltc_addr)(struct gk20a *g, u32 addr);
bool (*is_pltcg_ltcs_addr)(struct gk20a *g, u32 addr);
bool (*is_ltcs_ltss_addr)(struct gk20a *g, u32 addr);
bool (*is_ltcn_ltss_addr)(struct gk20a *g, u32 addr);
void (*split_lts_broadcast_addr)(struct gk20a *g, u32 addr,