gpu: nvgpu: move get_max_fbps/ltc/lts GR hals to TOP unit

Below HALs to get max FBPs, max LTC per FBP, max LTS pet LTC values are
right now defined by GR unit.

g->ops.gr.get_max_fbps_count()
g->ops.gr.get_max_ltc_per_fbp()
g->ops.gr.get_max_lts_per_ltc()

These HALs only read registers from hw_top_*.h h/w unit, and as such
belong to TOP unit. Move them appropriately as below

g->ops.top.get_max_fbps_count()
g->ops.top.get_max_ltc_per_fbp()
g->ops.top.get_max_lts_per_ltc()

Remove hw_top_*.h h/w header include from gr_gk20a.c and gr_gm20b.c

Jira NVGPU-2894

Change-Id: I995d9f56edb65c9de98d2d15d34ecb72920a65c6
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2030672
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Deepak Nibade
2019-03-01 17:52:38 +05:30
committed by mobile promotions
parent 5785491235
commit fca82e45fb
15 changed files with 68 additions and 68 deletions

View File

@@ -535,7 +535,7 @@ static void gm20b_ltc_update_ltc_lts_addr(struct gk20a *g, u32 addr, u32 ltc_num
u32 *priv_addr_table,
u32 *priv_addr_table_index)
{
u32 num_ltc_slices = g->ops.gr.get_max_lts_per_ltc(g);
u32 num_ltc_slices = g->ops.top.get_max_lts_per_ltc(g);
u32 index = *priv_addr_table_index;
u32 lts_num;
u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);

View File

@@ -221,3 +221,27 @@ u32 gm20b_top_get_max_tpc_per_gpc_count(struct gk20a *g)
tmp = nvgpu_readl(g, top_tpc_per_gpc_r());
return top_tpc_per_gpc_value_v(tmp);
}
u32 gm20b_top_get_max_fbps_count(struct gk20a *g)
{
u32 tmp;
tmp = nvgpu_readl(g, top_num_fbps_r());
return top_num_fbps_value_v(tmp);
}
u32 gm20b_top_get_max_ltc_per_fbp(struct gk20a *g)
{
u32 tmp;
tmp = nvgpu_readl(g, top_ltc_per_fbp_r());
return top_ltc_per_fbp_value_v(tmp);
}
u32 gm20b_top_get_max_lts_per_ltc(struct gk20a *g)
{
u32 tmp;
tmp = nvgpu_readl(g, top_slices_per_ltc_r());
return top_slices_per_ltc_value_v(tmp);
}

View File

@@ -44,4 +44,8 @@ u32 gm20b_get_ce_inst_id(struct gk20a *g, u32 engine_type);
u32 gm20b_top_get_max_gpc_count(struct gk20a *g);
u32 gm20b_top_get_max_tpc_per_gpc_count(struct gk20a *g);
u32 gm20b_top_get_max_fbps_count(struct gk20a *g);
u32 gm20b_top_get_max_ltc_per_fbp(struct gk20a *g);
u32 gm20b_top_get_max_lts_per_ltc(struct gk20a *g);
#endif

View File

@@ -78,7 +78,6 @@
#include <nvgpu/vgpu/ce_vgpu.h>
#include <nvgpu/hw/gp10b/hw_top_gp10b.h>
#include <nvgpu/hw/gp10b/hw_pram_gp10b.h>
#include <nvgpu/hw/gp10b/hw_pwr_gp10b.h>
@@ -148,10 +147,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.dump_gr_regs = NULL,
.update_pc_sampling = vgpu_gr_update_pc_sampling,
.get_fbp_en_mask = vgpu_gr_get_fbp_en_mask,
.get_max_ltc_per_fbp = vgpu_gr_get_max_ltc_per_fbp,
.get_max_lts_per_ltc = vgpu_gr_get_max_lts_per_ltc,
.get_rop_l2_en_mask = vgpu_gr_rop_l2_en_mask,
.get_max_fbps_count = vgpu_gr_get_max_fbps_count,
.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
.wait_empty = NULL,
.init_cyclestats = vgpu_gr_init_cyclestats,
@@ -673,6 +669,11 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.acr = {
.acr_sw_init = NULL,
},
.top = {
.get_max_fbps_count = vgpu_gr_get_max_fbps_count,
.get_max_ltc_per_fbp = vgpu_gr_get_max_ltc_per_fbp,
.get_max_lts_per_ltc = vgpu_gr_get_max_lts_per_ltc,
},
.chip_init_gpu_characteristics = vgpu_init_gpu_characteristics,
.get_litter_value = gp10b_get_litter_value,
};
@@ -723,6 +724,7 @@ int vgpu_gp10b_init_hal(struct gk20a *g)
gops->fuse = vgpu_gp10b_ops.fuse;
gops->acr = vgpu_gp10b_ops.acr;
gops->top = vgpu_gp10b_ops.top;
/* Lone Functions */
gops->chip_init_gpu_characteristics =

View File

@@ -97,7 +97,6 @@
#include "vgpu_subctx_gv11b.h"
#include "vgpu_tsg_gv11b.h"
#include <nvgpu/hw/gv11b/hw_top_gv11b.h>
#include <nvgpu/hw/gv11b/hw_pwr_gv11b.h>
static const struct gpu_ops vgpu_gv11b_ops = {
@@ -165,10 +164,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.dump_gr_regs = NULL,
.update_pc_sampling = vgpu_gr_update_pc_sampling,
.get_fbp_en_mask = vgpu_gr_get_fbp_en_mask,
.get_max_ltc_per_fbp = vgpu_gr_get_max_ltc_per_fbp,
.get_max_lts_per_ltc = vgpu_gr_get_max_lts_per_ltc,
.get_rop_l2_en_mask = vgpu_gr_rop_l2_en_mask,
.get_max_fbps_count = vgpu_gr_get_max_fbps_count,
.init_sm_dsm_reg_info = gv11b_gr_init_sm_dsm_reg_info,
.wait_empty = NULL,
.init_cyclestats = vgpu_gr_init_cyclestats,
@@ -745,6 +741,11 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.read_vin_cal_slope_intercept_fuse = NULL,
.read_vin_cal_gain_offset_fuse = NULL,
},
.top = {
.get_max_fbps_count = vgpu_gr_get_max_fbps_count,
.get_max_ltc_per_fbp = vgpu_gr_get_max_ltc_per_fbp,
.get_max_lts_per_ltc = vgpu_gr_get_max_lts_per_ltc,
},
.chip_init_gpu_characteristics = vgpu_gv11b_init_gpu_characteristics,
.get_litter_value = gv11b_get_litter_value,
};
@@ -791,6 +792,7 @@ int vgpu_gv11b_init_hal(struct gk20a *g)
gops->falcon = vgpu_gv11b_ops.falcon;
gops->priv_ring = vgpu_gv11b_ops.priv_ring;
gops->fuse = vgpu_gv11b_ops.fuse;
gops->top = vgpu_gv11b_ops.top;
/* Lone functions */
gops->chip_init_gpu_characteristics =

View File

@@ -67,7 +67,6 @@
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
#include <nvgpu/hw/gk20a/hw_pri_ringmaster_gk20a.h>
#include <nvgpu/hw/gk20a/hw_top_gk20a.h>
#define BLK_SIZE (256U)
#define NV_PERF_PMM_FBP_ROUTER_STRIDE 0x0200U
@@ -2357,8 +2356,7 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
tmp = gk20a_readl(g, pri_ringmaster_enum_fbp_r());
gr->num_fbps = pri_ringmaster_enum_fbp_count_v(tmp);
tmp = gk20a_readl(g, top_num_fbps_r());
gr->max_fbps_count = top_num_fbps_value_v(tmp);
gr->max_fbps_count = g->ops.top.get_max_fbps_count(g);
gr->fbp_en_mask = g->ops.gr.get_fbp_en_mask(g);
@@ -6180,7 +6178,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
u32 num_fbpas = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS);
u32 fbpa_stride = nvgpu_get_litter_value(g, GPU_LIT_FBPA_STRIDE);
u32 num_ltc = g->ops.gr.get_max_ltc_per_fbp(g) * g->gr.num_fbps;
u32 num_ltc = g->ops.top.get_max_ltc_per_fbp(g) * g->gr.num_fbps;
if (hwpm_ctxsw_buffer_size == 0U) {
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,

View File

@@ -42,7 +42,6 @@
#include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
#include <nvgpu/hw/gm20b/hw_fifo_gm20b.h>
#include <nvgpu/hw/gm20b/hw_top_gm20b.h>
#include <nvgpu/hw/gm20b/hw_perf_gm20b.h>
void gr_gm20b_init_gpc_mmu(struct gk20a *g)
@@ -1082,10 +1081,9 @@ int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g)
{
u32 fbp_en_mask;
u32 tmp, max_fbps_count;
u32 max_fbps_count;
tmp = gk20a_readl(g, top_num_fbps_r());
max_fbps_count = top_num_fbps_value_v(tmp);
max_fbps_count = g->ops.top.get_max_fbps_count(g);
/*
* Read active fbp mask from fuse
@@ -1100,22 +1098,6 @@ u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g)
return fbp_en_mask;
}
u32 gr_gm20b_get_max_ltc_per_fbp(struct gk20a *g)
{
u32 ltc_per_fbp, reg;
reg = gk20a_readl(g, top_ltc_per_fbp_r());
ltc_per_fbp = top_ltc_per_fbp_value_v(reg);
return ltc_per_fbp;
}
u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g)
{
u32 lts_per_ltc, reg;
reg = gk20a_readl(g, top_slices_per_ltc_r());
lts_per_ltc = top_slices_per_ltc_value_v(reg);
return lts_per_ltc;
}
u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g)
{
struct gr_gk20a *gr = &g->gr;
@@ -1124,9 +1106,8 @@ u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g)
unsigned long fbp_en_mask;
u32 rop_l2_all_en;
tmp = gk20a_readl(g, top_num_fbps_r());
max_fbps_count = top_num_fbps_value_v(tmp);
max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp(g);
max_fbps_count = g->ops.top.get_max_fbps_count(g);
max_ltc_per_fbp = g->ops.top.get_max_ltc_per_fbp(g);
rop_l2_all_en = BIT32(max_ltc_per_fbp) - 1U;
fbp_en_mask = gr_gm20b_get_fbp_en_mask(g);
@@ -1139,14 +1120,6 @@ u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g)
return gr->fbp_rop_l2_en_mask;
}
u32 gr_gm20b_get_max_fbps_count(struct gk20a *g)
{
u32 tmp, max_fbps_count;
tmp = gk20a_readl(g, top_num_fbps_r());
max_fbps_count = top_num_fbps_value_v(tmp);
return max_fbps_count;
}
void gr_gm20b_init_cyclestats(struct gk20a *g)
{
#if defined(CONFIG_GK20A_CYCLE_STATS)

View File

@@ -101,10 +101,7 @@ int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
bool enable);
u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g);
u32 gr_gm20b_get_max_ltc_per_fbp(struct gk20a *g);
u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g);
u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g);
u32 gr_gm20b_get_max_fbps_count(struct gk20a *g);
void gr_gm20b_init_cyclestats(struct gk20a *g);
void gr_gm20b_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state);
void gr_gm20b_get_access_map(struct gk20a *g,

View File

@@ -266,10 +266,7 @@ static const struct gpu_ops gm20b_ops = {
.dump_gr_regs = gr_gm20b_dump_gr_status_regs,
.update_pc_sampling = gr_gm20b_update_pc_sampling,
.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
.get_max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp,
.get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
.get_max_fbps_count = gr_gm20b_get_max_fbps_count,
.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
.wait_empty = gr_gk20a_wait_idle,
.init_cyclestats = gr_gm20b_init_cyclestats,
@@ -822,6 +819,9 @@ static const struct gpu_ops gm20b_ops = {
.get_max_gpc_count = gm20b_top_get_max_gpc_count,
.get_max_tpc_per_gpc_count =
gm20b_top_get_max_tpc_per_gpc_count,
.get_max_fbps_count = gm20b_top_get_max_fbps_count,
.get_max_ltc_per_fbp = gm20b_top_get_max_ltc_per_fbp,
.get_max_lts_per_ltc = gm20b_top_get_max_lts_per_ltc,
},
.chip_init_gpu_characteristics = gk20a_init_gpu_characteristics,
.get_litter_value = gm20b_get_litter_value,

View File

@@ -288,10 +288,7 @@ static const struct gpu_ops gp10b_ops = {
.dump_gr_regs = gr_gp10b_dump_gr_status_regs,
.update_pc_sampling = gr_gm20b_update_pc_sampling,
.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
.get_max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp,
.get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
.get_max_fbps_count = gr_gm20b_get_max_fbps_count,
.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
.wait_empty = gr_gp10b_wait_empty,
.init_cyclestats = gr_gm20b_init_cyclestats,
@@ -907,6 +904,9 @@ static const struct gpu_ops gp10b_ops = {
.get_max_gpc_count = gm20b_top_get_max_gpc_count,
.get_max_tpc_per_gpc_count =
gm20b_top_get_max_tpc_per_gpc_count,
.get_max_fbps_count = gm20b_top_get_max_fbps_count,
.get_max_ltc_per_fbp = gm20b_top_get_max_ltc_per_fbp,
.get_max_lts_per_ltc = gm20b_top_get_max_lts_per_ltc,
},
.chip_init_gpu_characteristics = gp10b_init_gpu_characteristics,
.get_litter_value = gp10b_get_litter_value,

View File

@@ -391,10 +391,7 @@ static const struct gpu_ops gv100_ops = {
.dump_gr_regs = gr_gv11b_dump_gr_status_regs,
.update_pc_sampling = gr_gm20b_update_pc_sampling,
.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
.get_max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp,
.get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
.get_max_fbps_count = gr_gm20b_get_max_fbps_count,
.init_sm_dsm_reg_info = gv11b_gr_init_sm_dsm_reg_info,
.wait_empty = gr_gv11b_wait_empty,
.init_cyclestats = gr_gm20b_init_cyclestats,
@@ -1157,6 +1154,9 @@ static const struct gpu_ops gv100_ops = {
.get_max_gpc_count = gm20b_top_get_max_gpc_count,
.get_max_tpc_per_gpc_count =
gm20b_top_get_max_tpc_per_gpc_count,
.get_max_fbps_count = gm20b_top_get_max_fbps_count,
.get_max_ltc_per_fbp = gm20b_top_get_max_ltc_per_fbp,
.get_max_lts_per_ltc = gm20b_top_get_max_lts_per_ltc,
},
.acr = {
.acr_sw_init = nvgpu_gv100_acr_sw_init,

View File

@@ -343,10 +343,7 @@ static const struct gpu_ops gv11b_ops = {
.dump_gr_regs = gr_gv11b_dump_gr_status_regs,
.update_pc_sampling = gr_gm20b_update_pc_sampling,
.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
.get_max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp,
.get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
.get_max_fbps_count = gr_gm20b_get_max_fbps_count,
.init_sm_dsm_reg_info = gv11b_gr_init_sm_dsm_reg_info,
.wait_empty = gr_gv11b_wait_empty,
.init_cyclestats = gr_gm20b_init_cyclestats,
@@ -1038,6 +1035,9 @@ static const struct gpu_ops gv11b_ops = {
.get_max_gpc_count = gm20b_top_get_max_gpc_count,
.get_max_tpc_per_gpc_count =
gm20b_top_get_max_tpc_per_gpc_count,
.get_max_fbps_count = gm20b_top_get_max_fbps_count,
.get_max_ltc_per_fbp = gm20b_top_get_max_ltc_per_fbp,
.get_max_lts_per_ltc = gm20b_top_get_max_lts_per_ltc,
},
.chip_init_gpu_characteristics = gv11b_init_gpu_characteristics,
.get_litter_value = gv11b_get_litter_value,

View File

@@ -355,10 +355,7 @@ struct gpu_ops {
struct gk20a_debug_output *o);
int (*update_pc_sampling)(struct channel_gk20a *ch,
bool enable);
u32 (*get_max_fbps_count)(struct gk20a *g);
u32 (*get_fbp_en_mask)(struct gk20a *g);
u32 (*get_max_ltc_per_fbp)(struct gk20a *g);
u32 (*get_max_lts_per_ltc)(struct gk20a *g);
u32* (*get_rop_l2_en_mask)(struct gk20a *g);
void (*init_sm_dsm_reg_info)(void);
void (*init_ovr_sm_dsm_perf)(void);
@@ -1601,6 +1598,9 @@ struct gpu_ops {
u32 (*get_ce_inst_id)(struct gk20a *g, u32 engine_type);
u32 (*get_max_gpc_count)(struct gk20a *g);
u32 (*get_max_tpc_per_gpc_count)(struct gk20a *g);
u32 (*get_max_fbps_count)(struct gk20a *g);
u32 (*get_max_ltc_per_fbp)(struct gk20a *g);
u32 (*get_max_lts_per_ltc)(struct gk20a *g);
} top;
struct {
void (*acr_sw_init)(struct gk20a *g, struct nvgpu_acr *acr);

View File

@@ -335,10 +335,10 @@ gk20a_ctrl_ioctl_gpu_characteristics(
gpu.gpu_va_bit_count = 40;
strlcpy(gpu.chipname, g->name, sizeof(gpu.chipname));
gpu.max_fbps_count = g->ops.gr.get_max_fbps_count(g);
gpu.max_fbps_count = g->ops.top.get_max_fbps_count(g);
gpu.fbp_en_mask = g->ops.gr.get_fbp_en_mask(g);
gpu.max_ltc_per_fbp = g->ops.gr.get_max_ltc_per_fbp(g);
gpu.max_lts_per_ltc = g->ops.gr.get_max_lts_per_ltc(g);
gpu.max_ltc_per_fbp = g->ops.top.get_max_ltc_per_fbp(g);
gpu.max_lts_per_ltc = g->ops.top.get_max_lts_per_ltc(g);
gpu.gr_compbit_store_base_hw = g->gr.compbit_store.base_hw;
gpu.gr_gobs_per_comptagline_per_slice =
g->gr.gobs_per_comptagline_per_slice;

View File

@@ -406,10 +406,7 @@ static const struct gpu_ops tu104_ops = {
.dump_gr_regs = gr_gv11b_dump_gr_status_regs,
.update_pc_sampling = gr_gm20b_update_pc_sampling,
.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
.get_max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp,
.get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
.get_max_fbps_count = gr_gm20b_get_max_fbps_count,
.init_sm_dsm_reg_info = gr_tu104_init_sm_dsm_reg_info,
.wait_empty = gr_gv11b_wait_empty,
.init_cyclestats = gr_gm20b_init_cyclestats,
@@ -1205,6 +1202,9 @@ static const struct gpu_ops tu104_ops = {
.get_max_gpc_count = gm20b_top_get_max_gpc_count,
.get_max_tpc_per_gpc_count =
gm20b_top_get_max_tpc_per_gpc_count,
.get_max_fbps_count = gm20b_top_get_max_fbps_count,
.get_max_ltc_per_fbp = gm20b_top_get_max_ltc_per_fbp,
.get_max_lts_per_ltc = gm20b_top_get_max_lts_per_ltc,
},
.chip_init_gpu_characteristics = tu104_init_gpu_characteristics,
.get_litter_value = tu104_get_litter_value,