mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
create a new unit common.fbp which initializes fbp support and provides
APIs to retrieve fbp data.
Create private header with below data
struct nvgpu_fbp {
u32 num_fbps;
u32 max_fbps_count;
u32 fbp_en_mask;
u32 *fbp_rop_l2_en_mask;
};
Expose below public APIs to initialize/remove fbp support:
nvgpu_fbp_init_support()
nvgpu_fbp_remove_support()
vgpu_fbp_init_support() for vGPU
Expose below APIs to retrieve fbp data
nvgpu_fbp_get_num_fbps()
nvgpu_fbp_get_max_fbps_count()
nvgpu_fbp_get_fbp_en_mask()
nvgpu_fbp_get_rop_l2_en_mask()
Use above APIs to retrieve fbp data in all the code.
Remove corresponding fields from struct nvgpu_gr since they are no
longer referred from that structure
Jira NVGPU-3124
Change-Id: I027caf4874b1f6154219f01902020dec4d7b0cb1
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2108617
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
560 lines
17 KiB
C
560 lines
17 KiB
C
/*
|
|
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include <nvgpu/gk20a.h>
|
|
#include <nvgpu/netlist.h>
|
|
#include <nvgpu/log.h>
|
|
#include <nvgpu/sort.h>
|
|
#include <nvgpu/kmem.h>
|
|
#include <nvgpu/bsearch.h>
|
|
#include <nvgpu/fbp.h>
|
|
#include <nvgpu/gr/config.h>
|
|
#include <nvgpu/gr/hwpm_map.h>
|
|
|
|
/* needed for pri_is_ppc_addr_shared */
|
|
#include "hal/gr/gr/gr_pri_gk20a.h"
|
|
#include "gr_priv.h"
|
|
|
|
#define NV_PCFG_BASE 0x00088000U
|
|
#define NV_PERF_PMM_FBP_ROUTER_STRIDE 0x0200U
|
|
#define NV_PERF_PMMGPCROUTER_STRIDE 0x0200U
|
|
#define NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE 0x0020U
|
|
|
|
int nvgpu_gr_hwpm_map_init(struct gk20a *g, struct nvgpu_gr_hwpm_map **hwpm_map,
|
|
u32 size)
|
|
{
|
|
struct nvgpu_gr_hwpm_map *tmp_map;
|
|
|
|
if (size == 0U) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
tmp_map = nvgpu_kzalloc(g, sizeof(*tmp_map));
|
|
if (tmp_map == NULL) {
|
|
return -ENOMEM;
|
|
}
|
|
|
|
tmp_map->pm_ctxsw_image_size = size;
|
|
tmp_map->init = false;
|
|
|
|
*hwpm_map = tmp_map;
|
|
|
|
return 0;
|
|
}
|
|
|
|
void nvgpu_gr_hwpm_map_deinit(struct gk20a *g,
|
|
struct nvgpu_gr_hwpm_map *hwpm_map)
|
|
{
|
|
if (hwpm_map->init) {
|
|
nvgpu_big_free(g, hwpm_map->map);
|
|
}
|
|
|
|
nvgpu_kfree(g, hwpm_map);
|
|
}
|
|
|
|
u32 nvgpu_gr_hwpm_map_get_size(struct nvgpu_gr_hwpm_map *hwpm_map)
|
|
{
|
|
return hwpm_map->pm_ctxsw_image_size;
|
|
}
|
|
|
|
static int map_cmp(const void *a, const void *b)
|
|
{
|
|
const struct ctxsw_buf_offset_map_entry *e1;
|
|
const struct ctxsw_buf_offset_map_entry *e2;
|
|
|
|
e1 = (const struct ctxsw_buf_offset_map_entry *)a;
|
|
e2 = (const struct ctxsw_buf_offset_map_entry *)b;
|
|
|
|
if (e1->addr < e2->addr) {
|
|
return -1;
|
|
}
|
|
|
|
if (e1->addr > e2->addr) {
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int add_ctxsw_buffer_map_entries_pmsys(
|
|
struct ctxsw_buf_offset_map_entry *map,
|
|
struct netlist_aiv_list *regs, u32 *count, u32 *offset,
|
|
u32 max_cnt, u32 base, u32 mask)
|
|
{
|
|
u32 idx;
|
|
u32 cnt = *count;
|
|
u32 off = *offset;
|
|
|
|
if ((cnt + regs->count) > max_cnt) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
for (idx = 0; idx < regs->count; idx++) {
|
|
if ((base + (regs->l[idx].addr & mask)) < 0xFFFU) {
|
|
map[cnt].addr = base + (regs->l[idx].addr & mask)
|
|
+ NV_PCFG_BASE;
|
|
} else {
|
|
map[cnt].addr = base + (regs->l[idx].addr & mask);
|
|
}
|
|
map[cnt++].offset = off;
|
|
off += 4U;
|
|
}
|
|
*count = cnt;
|
|
*offset = off;
|
|
return 0;
|
|
}
|
|
|
|
static int add_ctxsw_buffer_map_entries_pmgpc(struct gk20a *g,
|
|
struct ctxsw_buf_offset_map_entry *map,
|
|
struct netlist_aiv_list *regs,
|
|
u32 *count, u32 *offset,
|
|
u32 max_cnt, u32 base, u32 mask)
|
|
{
|
|
u32 idx;
|
|
u32 cnt = *count;
|
|
u32 off = *offset;
|
|
|
|
if ((cnt + regs->count) > max_cnt) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* NOTE: The PPC offsets get added to the pm_gpc list if numPpc <= 1
|
|
* To handle the case of PPC registers getting added into GPC, the below
|
|
* code specifically checks for any PPC offsets and adds them using
|
|
* proper mask
|
|
*/
|
|
for (idx = 0; idx < regs->count; idx++) {
|
|
/* Check if the address is PPC address */
|
|
if (pri_is_ppc_addr_shared(g, regs->l[idx].addr & mask)) {
|
|
u32 ppc_in_gpc_base = nvgpu_get_litter_value(g,
|
|
GPU_LIT_PPC_IN_GPC_BASE);
|
|
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g,
|
|
GPU_LIT_PPC_IN_GPC_STRIDE);
|
|
/* Use PPC mask instead of the GPC mask provided */
|
|
u32 ppcmask = ppc_in_gpc_stride - 1U;
|
|
|
|
map[cnt].addr = base + ppc_in_gpc_base
|
|
+ (regs->l[idx].addr & ppcmask);
|
|
} else {
|
|
map[cnt].addr = base + (regs->l[idx].addr & mask);
|
|
}
|
|
map[cnt++].offset = off;
|
|
off += 4U;
|
|
}
|
|
*count = cnt;
|
|
*offset = off;
|
|
return 0;
|
|
}
|
|
|
|
static int add_ctxsw_buffer_map_entries(struct ctxsw_buf_offset_map_entry *map,
|
|
struct netlist_aiv_list *regs,
|
|
u32 *count, u32 *offset,
|
|
u32 max_cnt, u32 base, u32 mask)
|
|
{
|
|
u32 idx;
|
|
u32 cnt = *count;
|
|
u32 off = *offset;
|
|
|
|
if ((cnt + regs->count) > max_cnt) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
for (idx = 0; idx < regs->count; idx++) {
|
|
map[cnt].addr = base + (regs->l[idx].addr & mask);
|
|
map[cnt++].offset = off;
|
|
off += 4U;
|
|
}
|
|
*count = cnt;
|
|
*offset = off;
|
|
return 0;
|
|
}
|
|
|
|
/* Helper function to add register entries to the register map for all
|
|
* subunits
|
|
*/
|
|
static int add_ctxsw_buffer_map_entries_subunits(
|
|
struct ctxsw_buf_offset_map_entry *map,
|
|
struct netlist_aiv_list *regs,
|
|
u32 *count, u32 *offset,
|
|
u32 max_cnt, u32 base, u32 num_units,
|
|
u32 active_unit_mask, u32 stride, u32 mask)
|
|
{
|
|
u32 unit;
|
|
u32 idx;
|
|
u32 cnt = *count;
|
|
u32 off = *offset;
|
|
|
|
if ((cnt + (regs->count * num_units)) > max_cnt) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* Data is interleaved for units in ctxsw buffer */
|
|
for (idx = 0; idx < regs->count; idx++) {
|
|
for (unit = 0; unit < num_units; unit++) {
|
|
if ((active_unit_mask & BIT32(unit)) != 0U) {
|
|
map[cnt].addr = base +
|
|
(regs->l[idx].addr & mask) +
|
|
(unit * stride);
|
|
map[cnt++].offset = off;
|
|
off += 4U;
|
|
}
|
|
}
|
|
}
|
|
*count = cnt;
|
|
*offset = off;
|
|
return 0;
|
|
}
|
|
|
|
static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
|
|
struct ctxsw_buf_offset_map_entry *map,
|
|
u32 *count, u32 *offset, u32 max_cnt,
|
|
struct nvgpu_gr_config *config)
|
|
{
|
|
u32 num_gpcs = nvgpu_gr_config_get_gpc_count(config);
|
|
u32 num_ppcs, num_tpcs, gpc_num, base;
|
|
u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
|
|
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
|
u32 ppc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_BASE);
|
|
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
|
|
u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
|
|
u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
|
|
|
|
for (gpc_num = 0; gpc_num < num_gpcs; gpc_num++) {
|
|
num_tpcs = nvgpu_gr_config_get_gpc_tpc_count(config, gpc_num);
|
|
base = gpc_base + (gpc_stride * gpc_num) + tpc_in_gpc_base;
|
|
if (add_ctxsw_buffer_map_entries_subunits(map,
|
|
nvgpu_netlist_get_pm_tpc_ctxsw_regs(g),
|
|
count, offset, max_cnt, base,
|
|
num_tpcs, ~U32(0U), tpc_in_gpc_stride,
|
|
(tpc_in_gpc_stride - 1U)) != 0) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
num_ppcs = nvgpu_gr_config_get_gpc_ppc_count(config, gpc_num);
|
|
base = gpc_base + (gpc_stride * gpc_num) + ppc_in_gpc_base;
|
|
if (add_ctxsw_buffer_map_entries_subunits(map,
|
|
nvgpu_netlist_get_pm_ppc_ctxsw_regs(g),
|
|
count, offset, max_cnt, base, num_ppcs,
|
|
~U32(0U), ppc_in_gpc_stride,
|
|
(ppc_in_gpc_stride - 1U)) != 0) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
base = gpc_base + (gpc_stride * gpc_num);
|
|
if (add_ctxsw_buffer_map_entries_pmgpc(g, map,
|
|
nvgpu_netlist_get_pm_gpc_ctxsw_regs(g),
|
|
count, offset, max_cnt, base,
|
|
(gpc_stride - 1U)) != 0) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
base = NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE * gpc_num;
|
|
if (add_ctxsw_buffer_map_entries(map,
|
|
nvgpu_netlist_get_pm_ucgpc_ctxsw_regs(g),
|
|
count, offset, max_cnt, base, ~U32(0U)) != 0) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
base = (g->ops.perf.get_pmm_per_chiplet_offset() * gpc_num);
|
|
if (add_ctxsw_buffer_map_entries(map,
|
|
nvgpu_netlist_get_perf_gpc_ctxsw_regs(g),
|
|
count, offset, max_cnt, base, ~U32(0U)) != 0) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
base = (NV_PERF_PMMGPCROUTER_STRIDE * gpc_num);
|
|
if (add_ctxsw_buffer_map_entries(map,
|
|
nvgpu_netlist_get_gpc_router_ctxsw_regs(g),
|
|
count, offset, max_cnt, base, ~U32(0U)) != 0) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* Counter Aggregation Unit, if available */
|
|
if (nvgpu_netlist_get_pm_cau_ctxsw_regs(g)->count != 0U) {
|
|
base = gpc_base + (gpc_stride * gpc_num)
|
|
+ tpc_in_gpc_base;
|
|
if (add_ctxsw_buffer_map_entries_subunits(map,
|
|
nvgpu_netlist_get_pm_cau_ctxsw_regs(g),
|
|
count, offset, max_cnt, base, num_tpcs,
|
|
~U32(0U), tpc_in_gpc_stride,
|
|
(tpc_in_gpc_stride - 1U)) != 0) {
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
*offset = ALIGN(*offset, 256);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* PM CTXSW BUFFER LAYOUT :
|
|
*|---------------------------------------------|0x00 <----PM CTXSW BUFFER BASE
|
|
*| |
|
|
*| LIST_compressed_pm_ctx_reg_SYS |Space allocated: numRegs words
|
|
*|---------------------------------------------|
|
|
*| |
|
|
*| LIST_compressed_nv_perf_ctx_reg_SYS |Space allocated: numRegs words
|
|
*|---------------------------------------------|
|
|
*| |
|
|
*| LIST_compressed_nv_perf_ctx_reg_sysrouter|Space allocated: numRegs words
|
|
*|---------------------------------------------|
|
|
*| PADDING for 256 byte alignment on Volta+ |
|
|
*|---------------------------------------------|<----256 byte aligned
|
|
*| |
|
|
*| LIST_compressed_nv_perf_ctx_reg_PMA |Space allocated: numRegs words
|
|
*|---------------------------------------------|
|
|
*| PADDING for 256 byte alignment |
|
|
*|---------------------------------------------|<----256 byte aligned
|
|
*| LIST_compressed_nv_perf_fbp_ctx_regs |
|
|
*| |Space allocated: numRegs * n words (for n FB units)
|
|
*|---------------------------------------------|
|
|
*| LIST_compressed_nv_perf_fbprouter_ctx_regs |
|
|
*| |Space allocated: numRegs * n words (for n FB units)
|
|
*|---------------------------------------------|
|
|
*| LIST_compressed_pm_fbpa_ctx_regs |
|
|
*| |Space allocated: numRegs * n words (for n FB units)
|
|
*|---------------------------------------------|
|
|
*| LIST_compressed_pm_rop_ctx_regs |
|
|
*|---------------------------------------------|
|
|
*| LIST_compressed_pm_ltc_ctx_regs |
|
|
*| LTC0 LTS0 |
|
|
*| LTC1 LTS0 |Space allocated: numRegs * n words (for n LTC units)
|
|
*| LTCn LTS0 |
|
|
*| LTC0 LTS1 |
|
|
*| LTC1 LTS1 |
|
|
*| LTCn LTS1 |
|
|
*| LTC0 LTSn |
|
|
*| LTC1 LTSn |
|
|
*| LTCn LTSn |
|
|
*|---------------------------------------------|
|
|
*| PADDING for 256 byte alignment |
|
|
*|---------------------------------------------|<----256 byte aligned
|
|
*| GPC0 REG0 TPC0 |Each GPC has space allocated to accommodate
|
|
*| REG0 TPC1 | all the GPC/TPC register lists
|
|
*| Lists in each GPC region: REG0 TPCn |Per GPC allocated space is always 256 byte aligned
|
|
*| LIST_pm_ctx_reg_TPC REG1 TPC0 |
|
|
*| * numTpcs REG1 TPC1 |
|
|
*| LIST_pm_ctx_reg_PPC REG1 TPCn |
|
|
*| * numPpcs REGn TPC0 |
|
|
*| LIST_pm_ctx_reg_GPC REGn TPC1 |
|
|
*| List_pm_ctx_reg_uc_GPC REGn TPCn |
|
|
*| LIST_nv_perf_ctx_reg_GPC |
|
|
*| LIST_nv_perf_gpcrouter_ctx_reg |
|
|
*| LIST_nv_perf_ctx_reg_CAU |
|
|
*| ---- |--
|
|
*| GPC1 . |
|
|
*| . |<----
|
|
*|---------------------------------------------|
|
|
*= =
|
|
*| GPCn |
|
|
*= =
|
|
*|---------------------------------------------|
|
|
*/
|
|
|
|
static int nvgpu_gr_hwpm_map_create(struct gk20a *g,
|
|
struct nvgpu_gr_hwpm_map *hwpm_map, struct nvgpu_gr_config *config)
|
|
{
|
|
u32 hwpm_ctxsw_buffer_size = hwpm_map->pm_ctxsw_image_size;
|
|
struct ctxsw_buf_offset_map_entry *map;
|
|
u32 hwpm_ctxsw_reg_count_max;
|
|
u32 map_size;
|
|
u32 i, count = 0;
|
|
u32 offset = 0;
|
|
int ret;
|
|
u32 active_fbpa_mask;
|
|
u32 num_fbps = nvgpu_fbp_get_num_fbps(g->fbp);
|
|
u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
|
|
u32 num_fbpas = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS);
|
|
u32 fbpa_stride = nvgpu_get_litter_value(g, GPU_LIT_FBPA_STRIDE);
|
|
u32 num_ltc = g->ops.top.get_max_ltc_per_fbp(g) *
|
|
g->ops.priv_ring.get_fbp_count(g);
|
|
|
|
if (hwpm_ctxsw_buffer_size == 0U) {
|
|
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
|
|
"no PM Ctxsw buffer memory in context buffer");
|
|
return -EINVAL;
|
|
}
|
|
|
|
hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2;
|
|
map_size = hwpm_ctxsw_reg_count_max * (u32)sizeof(*map);
|
|
|
|
map = nvgpu_big_zalloc(g, map_size);
|
|
if (map == NULL) {
|
|
return -ENOMEM;
|
|
}
|
|
|
|
/* Add entries from _LIST_pm_ctx_reg_SYS */
|
|
if (add_ctxsw_buffer_map_entries_pmsys(map,
|
|
nvgpu_netlist_get_pm_sys_ctxsw_regs(g),
|
|
&count, &offset, hwpm_ctxsw_reg_count_max, 0, ~U32(0U)) != 0) {
|
|
goto cleanup;
|
|
}
|
|
|
|
/* Add entries from _LIST_nv_perf_ctx_reg_SYS */
|
|
if (add_ctxsw_buffer_map_entries(map,
|
|
nvgpu_netlist_get_perf_sys_ctxsw_regs(g),
|
|
&count, &offset, hwpm_ctxsw_reg_count_max, 0, ~U32(0U)) != 0) {
|
|
goto cleanup;
|
|
}
|
|
|
|
/* Add entries from _LIST_nv_perf_sysrouter_ctx_reg*/
|
|
if (add_ctxsw_buffer_map_entries(map,
|
|
nvgpu_netlist_get_perf_sys_router_ctxsw_regs(g),
|
|
&count, &offset, hwpm_ctxsw_reg_count_max, 0, ~U32(0U)) != 0) {
|
|
goto cleanup;
|
|
}
|
|
|
|
if (g->ops.gr.hwpm_map.align_regs_perf_pma) {
|
|
g->ops.gr.hwpm_map.align_regs_perf_pma(&offset);
|
|
}
|
|
|
|
/* Add entries from _LIST_nv_perf_pma_ctx_reg*/
|
|
ret = add_ctxsw_buffer_map_entries(map,
|
|
nvgpu_netlist_get_perf_pma_ctxsw_regs(g), &count, &offset,
|
|
hwpm_ctxsw_reg_count_max, 0, ~U32(0U));
|
|
if (ret != 0) {
|
|
goto cleanup;
|
|
}
|
|
|
|
offset = ALIGN(offset, 256);
|
|
|
|
/* Add entries from _LIST_nv_perf_fbp_ctx_regs */
|
|
if (add_ctxsw_buffer_map_entries_subunits(map,
|
|
nvgpu_netlist_get_fbp_ctxsw_regs(g), &count, &offset,
|
|
hwpm_ctxsw_reg_count_max, 0, num_fbps, ~U32(0U),
|
|
g->ops.perf.get_pmm_per_chiplet_offset(),
|
|
~U32(0U)) != 0) {
|
|
goto cleanup;
|
|
}
|
|
|
|
/* Add entries from _LIST_nv_perf_fbprouter_ctx_regs */
|
|
if (add_ctxsw_buffer_map_entries_subunits(map,
|
|
nvgpu_netlist_get_fbp_router_ctxsw_regs(g),
|
|
&count, &offset, hwpm_ctxsw_reg_count_max, 0,
|
|
num_fbps, ~U32(0U), NV_PERF_PMM_FBP_ROUTER_STRIDE,
|
|
~U32(0U)) != 0) {
|
|
goto cleanup;
|
|
}
|
|
|
|
if (g->ops.gr.hwpm_map.get_active_fbpa_mask) {
|
|
active_fbpa_mask = g->ops.gr.hwpm_map.get_active_fbpa_mask(g);
|
|
} else {
|
|
active_fbpa_mask = ~U32(0U);
|
|
}
|
|
|
|
/* Add entries from _LIST_nv_pm_fbpa_ctx_regs */
|
|
if (add_ctxsw_buffer_map_entries_subunits(map,
|
|
nvgpu_netlist_get_pm_fbpa_ctxsw_regs(g),
|
|
&count, &offset, hwpm_ctxsw_reg_count_max, 0,
|
|
num_fbpas, active_fbpa_mask, fbpa_stride, ~U32(0U))
|
|
!= 0) {
|
|
goto cleanup;
|
|
}
|
|
|
|
/* Add entries from _LIST_nv_pm_rop_ctx_regs */
|
|
if (add_ctxsw_buffer_map_entries(map,
|
|
nvgpu_netlist_get_pm_rop_ctxsw_regs(g), &count, &offset,
|
|
hwpm_ctxsw_reg_count_max, 0, ~U32(0U)) != 0) {
|
|
goto cleanup;
|
|
}
|
|
|
|
/* Add entries from _LIST_compressed_nv_pm_ltc_ctx_regs */
|
|
if (add_ctxsw_buffer_map_entries_subunits(map,
|
|
nvgpu_netlist_get_pm_ltc_ctxsw_regs(g), &count, &offset,
|
|
hwpm_ctxsw_reg_count_max, 0, num_ltc, ~U32(0U),
|
|
ltc_stride, ~U32(0U)) != 0) {
|
|
goto cleanup;
|
|
}
|
|
|
|
offset = ALIGN(offset, 256);
|
|
|
|
/* Add GPC entries */
|
|
if (add_ctxsw_buffer_map_entries_gpcs(g, map, &count, &offset,
|
|
hwpm_ctxsw_reg_count_max, config) != 0) {
|
|
goto cleanup;
|
|
}
|
|
|
|
if (offset > hwpm_ctxsw_buffer_size) {
|
|
nvgpu_err(g, "offset > buffer size");
|
|
goto cleanup;
|
|
}
|
|
|
|
sort(map, count, sizeof(*map), map_cmp, NULL);
|
|
|
|
hwpm_map->map = map;
|
|
hwpm_map->count = count;
|
|
hwpm_map->init = true;
|
|
|
|
nvgpu_log_info(g, "Reg Addr => HWPM Ctxt switch buffer offset");
|
|
|
|
for (i = 0; i < count; i++) {
|
|
nvgpu_log_info(g, "%08x => %08x", map[i].addr, map[i].offset);
|
|
}
|
|
|
|
return 0;
|
|
|
|
cleanup:
|
|
nvgpu_err(g, "Failed to create HWPM buffer offset map");
|
|
nvgpu_big_free(g, map);
|
|
return -EINVAL;
|
|
}
|
|
|
|
/*
|
|
* This function will return the 32 bit offset for a priv register if it is
|
|
* present in the PM context buffer.
|
|
*/
|
|
int nvgpu_gr_hwmp_map_find_priv_offset(struct gk20a *g,
|
|
struct nvgpu_gr_hwpm_map *hwpm_map,
|
|
u32 addr, u32 *priv_offset, struct nvgpu_gr_config *config)
|
|
{
|
|
struct ctxsw_buf_offset_map_entry *map, *result, map_key;
|
|
int err = 0;
|
|
u32 count;
|
|
|
|
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
|
|
|
|
/* Create map of pri address and pm offset if necessary */
|
|
if (!hwpm_map->init) {
|
|
err = nvgpu_gr_hwpm_map_create(g, hwpm_map, config);
|
|
if (err != 0) {
|
|
return err;
|
|
}
|
|
}
|
|
|
|
*priv_offset = 0;
|
|
|
|
map = hwpm_map->map;
|
|
count = hwpm_map->count;
|
|
|
|
map_key.addr = addr;
|
|
result = nvgpu_bsearch(&map_key, map, count, sizeof(*map), map_cmp);
|
|
|
|
if (result != NULL) {
|
|
*priv_offset = result->offset;
|
|
} else {
|
|
nvgpu_err(g, "Lookup failed for address 0x%x", addr);
|
|
err = -EINVAL;
|
|
}
|
|
|
|
return err;
|
|
}
|