mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-25 02:52:51 +03:00
gpu: nvgpu: use HAL for chiplet offset
We currently use hard coded values of NV_PERF_PMMGPC_CHIPLET_OFFSET and NV_PMM_FBP_STRIDE which are incorrect for Volta Add new GR HAL get_pmm_per_chiplet_offset() to get correct value per-chip Set gr_gm20b_get_pmm_per_chiplet_offset() for older chips Set gr_gv11b_get_pmm_per_chiplet_offset() for Volta Use HAL instead of hard coded values wherever required Bug 200398811 Jira NVGPU-556 Change-Id: I947e7febd4f84fae740a1bc74f99d72e1df523aa Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1690028 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
19aa748be5
commit
78151bb6f9
@@ -470,6 +470,7 @@ struct gpu_ops {
|
||||
u32 addr,
|
||||
u32 *priv_addr_table,
|
||||
u32 *num_registers);
|
||||
u32 (*get_pmm_per_chiplet_offset)(void);
|
||||
} gr;
|
||||
struct {
|
||||
void (*init_hw)(struct gk20a *g);
|
||||
|
||||
@@ -63,9 +63,7 @@
|
||||
#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
|
||||
|
||||
#define BLK_SIZE (256)
|
||||
#define NV_PMM_FBP_STRIDE 0x1000
|
||||
#define NV_PERF_PMM_FBP_ROUTER_STRIDE 0x0200
|
||||
#define NV_PERF_PMMGPC_CHIPLET_OFFSET 0x1000
|
||||
#define NV_PERF_PMMGPCROUTER_STRIDE 0x0200
|
||||
#define NV_PCFG_BASE 0x00088000
|
||||
#define NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE 0x0020
|
||||
@@ -7563,7 +7561,7 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
|
||||
count, offset, max_cnt, base, ~0))
|
||||
return -EINVAL;
|
||||
|
||||
base = (NV_PERF_PMMGPC_CHIPLET_OFFSET * gpc_num);
|
||||
base = (g->ops.gr.get_pmm_per_chiplet_offset() * gpc_num);
|
||||
if (add_ctxsw_buffer_map_entries(map,
|
||||
&g->gr.ctx_vars.ctxsw_regs.perf_gpc,
|
||||
count, offset, max_cnt, base, ~0))
|
||||
@@ -7703,7 +7701,9 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
|
||||
&g->gr.ctx_vars.ctxsw_regs.fbp,
|
||||
&count, &offset,
|
||||
hwpm_ctxsw_reg_count_max, 0,
|
||||
g->gr.num_fbps, NV_PMM_FBP_STRIDE, ~0))
|
||||
g->gr.num_fbps,
|
||||
g->ops.gr.get_pmm_per_chiplet_offset(),
|
||||
~0))
|
||||
goto cleanup;
|
||||
|
||||
/* Add entries from _LIST_nv_perf_fbprouter_ctx_regs */
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* GM20B GPC MMU
|
||||
*
|
||||
* Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -41,6 +41,7 @@
|
||||
#include <nvgpu/hw/gm20b/hw_ltc_gm20b.h>
|
||||
#include <nvgpu/hw/gm20b/hw_ctxsw_prog_gm20b.h>
|
||||
#include <nvgpu/hw/gm20b/hw_fuse_gm20b.h>
|
||||
#include <nvgpu/hw/gm20b/hw_perf_gm20b.h>
|
||||
|
||||
void gr_gm20b_init_gpc_mmu(struct gk20a *g)
|
||||
{
|
||||
@@ -1541,3 +1542,8 @@ void gm20a_gr_disable_rd_coalesce(struct gk20a *g)
|
||||
|
||||
gk20a_writel(g, gr_gpcs_tpcs_tex_m_dbg2_r(), dbg2_reg);
|
||||
}
|
||||
|
||||
u32 gr_gm20b_get_pmm_per_chiplet_offset(void)
|
||||
{
|
||||
return (perf_pmmsys_extent_v() - perf_pmmsys_base_v() + 1);
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* GM20B GPC MMU
|
||||
*
|
||||
* Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -135,4 +135,5 @@ void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr,
|
||||
u32 *priv_addr_table_index);
|
||||
void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
|
||||
u32 global_esr);
|
||||
u32 gr_gm20b_get_pmm_per_chiplet_offset(void);
|
||||
#endif
|
||||
|
||||
@@ -322,6 +322,8 @@ static const struct gpu_ops gm20b_ops = {
|
||||
.add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma,
|
||||
.decode_priv_addr = gr_gk20a_decode_priv_addr,
|
||||
.create_priv_addr_table = gr_gk20a_create_priv_addr_table,
|
||||
.get_pmm_per_chiplet_offset =
|
||||
gr_gm20b_get_pmm_per_chiplet_offset,
|
||||
},
|
||||
.fb = {
|
||||
.reset = fb_gk20a_reset,
|
||||
|
||||
@@ -385,6 +385,8 @@ static const struct gpu_ops gp106_ops = {
|
||||
.add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma,
|
||||
.decode_priv_addr = gr_gk20a_decode_priv_addr,
|
||||
.create_priv_addr_table = gr_gk20a_create_priv_addr_table,
|
||||
.get_pmm_per_chiplet_offset =
|
||||
gr_gm20b_get_pmm_per_chiplet_offset,
|
||||
},
|
||||
.fb = {
|
||||
.reset = gp106_fb_reset,
|
||||
|
||||
@@ -353,6 +353,8 @@ static const struct gpu_ops gp10b_ops = {
|
||||
.add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma,
|
||||
.decode_priv_addr = gr_gk20a_decode_priv_addr,
|
||||
.create_priv_addr_table = gr_gk20a_create_priv_addr_table,
|
||||
.get_pmm_per_chiplet_offset =
|
||||
gr_gm20b_get_pmm_per_chiplet_offset,
|
||||
},
|
||||
.fb = {
|
||||
.reset = fb_gk20a_reset,
|
||||
|
||||
@@ -432,6 +432,8 @@ static const struct gpu_ops gv100_ops = {
|
||||
.add_ctxsw_reg_perf_pma = gr_gv100_add_ctxsw_reg_perf_pma,
|
||||
.decode_priv_addr = gr_gv11b_decode_priv_addr,
|
||||
.create_priv_addr_table = gr_gv11b_create_priv_addr_table,
|
||||
.get_pmm_per_chiplet_offset =
|
||||
gr_gv11b_get_pmm_per_chiplet_offset,
|
||||
},
|
||||
.fb = {
|
||||
.reset = gv100_fb_reset,
|
||||
|
||||
@@ -4513,10 +4513,16 @@ int gr_gv11b_decode_priv_addr(struct gk20a *g, u32 addr,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u32 gr_gv11b_pri_pmmgpc_addr(u32 gpc_num, u32 domain_idx, u32 offset)
|
||||
u32 gr_gv11b_get_pmm_per_chiplet_offset(void)
|
||||
{
|
||||
return (perf_pmmsys_extent_v() - perf_pmmsys_base_v() + 1);
|
||||
}
|
||||
|
||||
static u32 gr_gv11b_pri_pmmgpc_addr(struct gk20a *g, u32 gpc_num,
|
||||
u32 domain_idx, u32 offset)
|
||||
{
|
||||
return perf_pmmgpc_base_v() +
|
||||
(gpc_num * (perf_pmmsys_extent_v() - perf_pmmsys_base_v() + 1)) +
|
||||
(gpc_num * g->ops.gr.get_pmm_per_chiplet_offset()) +
|
||||
(domain_idx * perf_pmmgpc_perdomain_offset_v()) +
|
||||
offset;
|
||||
}
|
||||
@@ -4531,8 +4537,7 @@ static void gr_gv11b_split_pmm_fbp_broadcast_address(struct gk20a *g,
|
||||
|
||||
for (fbp_num = 0; fbp_num < g->gr.num_fbps; fbp_num++) {
|
||||
base = perf_pmmfbp_base_v() +
|
||||
(fbp_num *
|
||||
(perf_pmmsys_extent_v() - perf_pmmsys_base_v() + 1));
|
||||
(fbp_num * g->ops.gr.get_pmm_per_chiplet_offset());
|
||||
|
||||
for (domain_idx = domain_start;
|
||||
domain_idx < (domain_start + num_domains);
|
||||
@@ -4653,7 +4658,7 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g,
|
||||
domain_idx < (pmm_domain_start + num_domains);
|
||||
domain_idx++) {
|
||||
priv_addr_table[t++] =
|
||||
gr_gv11b_pri_pmmgpc_addr(gpc_num,
|
||||
gr_gv11b_pri_pmmgpc_addr(g, gpc_num,
|
||||
domain_idx, offset);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -234,6 +234,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
|
||||
int gr_gv11b_handle_ssync_hww(struct gk20a *g);
|
||||
u32 gv11b_gr_sm_offset(struct gk20a *g, u32 sm);
|
||||
|
||||
u32 gr_gv11b_get_pmm_per_chiplet_offset(void);
|
||||
int gr_gv11b_decode_priv_addr(struct gk20a *g, u32 addr,
|
||||
int *addr_type,
|
||||
u32 *gpc_num, u32 *tpc_num, u32 *ppc_num, u32 *be_num,
|
||||
|
||||
@@ -405,6 +405,8 @@ static const struct gpu_ops gv11b_ops = {
|
||||
.add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma,
|
||||
.decode_priv_addr = gr_gv11b_decode_priv_addr,
|
||||
.create_priv_addr_table = gr_gv11b_create_priv_addr_table,
|
||||
.get_pmm_per_chiplet_offset =
|
||||
gr_gv11b_get_pmm_per_chiplet_offset,
|
||||
},
|
||||
.fb = {
|
||||
.reset = gv11b_fb_reset,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -56,6 +56,14 @@
|
||||
#ifndef _hw_perf_gm20b_h_
|
||||
#define _hw_perf_gm20b_h_
|
||||
|
||||
static inline u32 perf_pmmsys_base_v(void)
|
||||
{
|
||||
return 0x001b0000U;
|
||||
}
|
||||
static inline u32 perf_pmmsys_extent_v(void)
|
||||
{
|
||||
return 0x001b0fffU;
|
||||
}
|
||||
static inline u32 perf_pmasys_control_r(void)
|
||||
{
|
||||
return 0x001b4000U;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -56,6 +56,14 @@
|
||||
#ifndef _hw_perf_gp106_h_
|
||||
#define _hw_perf_gp106_h_
|
||||
|
||||
static inline u32 perf_pmmsys_base_v(void)
|
||||
{
|
||||
return 0x001b0000U;
|
||||
}
|
||||
static inline u32 perf_pmmsys_extent_v(void)
|
||||
{
|
||||
return 0x001b0fffU;
|
||||
}
|
||||
static inline u32 perf_pmasys_control_r(void)
|
||||
{
|
||||
return 0x001b4000U;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -56,6 +56,14 @@
|
||||
#ifndef _hw_perf_gp10b_h_
|
||||
#define _hw_perf_gp10b_h_
|
||||
|
||||
static inline u32 perf_pmmsys_base_v(void)
|
||||
{
|
||||
return 0x001b0000U;
|
||||
}
|
||||
static inline u32 perf_pmmsys_extent_v(void)
|
||||
{
|
||||
return 0x001b0fffU;
|
||||
}
|
||||
static inline u32 perf_pmasys_control_r(void)
|
||||
{
|
||||
return 0x001b4000U;
|
||||
|
||||
@@ -227,6 +227,8 @@ static const struct gpu_ops vgpu_gp10b_ops = {
|
||||
.add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma,
|
||||
.decode_priv_addr = gr_gk20a_decode_priv_addr,
|
||||
.create_priv_addr_table = gr_gk20a_create_priv_addr_table,
|
||||
.get_pmm_per_chiplet_offset =
|
||||
gr_gm20b_get_pmm_per_chiplet_offset,
|
||||
},
|
||||
.fb = {
|
||||
.reset = fb_gk20a_reset,
|
||||
|
||||
@@ -263,6 +263,8 @@ static const struct gpu_ops vgpu_gv11b_ops = {
|
||||
.add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma,
|
||||
.decode_priv_addr = gr_gv11b_decode_priv_addr,
|
||||
.create_priv_addr_table = gr_gv11b_create_priv_addr_table,
|
||||
.get_pmm_per_chiplet_offset =
|
||||
gr_gv11b_get_pmm_per_chiplet_offset,
|
||||
},
|
||||
.fb = {
|
||||
.reset = gv11b_fb_reset,
|
||||
|
||||
Reference in New Issue
Block a user