gpu: nvgpu: add gpc_mask to gr/config unit

We get gpc_mask by calling GR HAL g->ops.gr.get_gpc_mask()

But gpc_mask should be logically owned by gr/config unit
Hence add new gpc_mask field to nvgpu_gr_config

Initialize it in nvgpu_gr_config_init() by calling a new HAL
g->ops.gr.config.get_gpc_mask() if available
If HAL is not defined we just initialize it based on gpc_count

Expose new API nvgpu_gr_config_get_gpc_mask() to get gpc_mask
and use this API now

Remove gr_gm20b_get_gpc_mask() and HAL g->ops.gr.get_gpc_mask()

Update GV100 and TU104 chip HALs to remove old and add new HAL

Add gpc_mask to struct tegra_vgpu_constants_params to support this
on vGPU. Also get gpc_mask from vGPU private data in
vgpu_gr_init_gr_config()

Jira NVGPU-1879

Change-Id: Ibdc89ea51df944dc7085920509e3536a5721efc0
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2016084
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Deepak Nibade
2019-02-07 19:39:11 +05:30
committed by mobile promotions
parent 6fb2abb153
commit 00aeab6cca
12 changed files with 39 additions and 27 deletions

View File

@@ -51,6 +51,12 @@ struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g)
goto clean_up; goto clean_up;
} }
if (g->ops.gr.config.get_gpc_mask != NULL) {
config->gpc_mask = g->ops.gr.config.get_gpc_mask(g, config);
} else {
config->gpc_mask = BIT32(config->gpc_count) - 1;
}
config->pe_count_per_gpc = nvgpu_get_litter_value(g, config->pe_count_per_gpc = nvgpu_get_litter_value(g,
GPU_LIT_NUM_PES_PER_GPC); GPU_LIT_NUM_PES_PER_GPC);
if (config->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC) { if (config->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC) {
@@ -531,3 +537,8 @@ u32 nvgpu_gr_config_get_pes_tpc_mask(struct nvgpu_gr_config *config,
{ {
return config->pes_tpc_mask[pes_index][gpc_index]; return config->pes_tpc_mask[pes_index][gpc_index];
} }
u32 nvgpu_gr_config_get_gpc_mask(struct nvgpu_gr_config *config)
{
return config->gpc_mask;
}

View File

@@ -77,3 +77,20 @@ u32 gm20b_gr_config_get_pd_dist_skip_table_size(void)
{ {
return gr_pd_dist_skip_table__size_1_v(); return gr_pd_dist_skip_table__size_1_v();
} }
u32 gm20b_gr_config_get_gpc_mask(struct gk20a *g,
struct nvgpu_gr_config *config)
{
u32 val;
/*
* For register NV_FUSE_STATUS_OPT_GPC a set bit with index i indicates
* corresponding GPC is floorswept
* But for s/w mask a set bit means GPC is enabled and it is disabled
* otherwise
* Hence toggle the bits of register value to get s/w mask
*/
val = g->ops.fuse.fuse_status_opt_gpc(g);
return (~val) & (BIT32(config->max_gpc_count) - 1U);
}

View File

@@ -37,5 +37,7 @@ u32 gm20b_gr_config_get_zcull_count_in_gpc(struct gk20a *g,
u32 gm20b_gr_config_get_pes_tpc_mask(struct gk20a *g, u32 gm20b_gr_config_get_pes_tpc_mask(struct gk20a *g,
struct nvgpu_gr_config *config, u32 gpc_index, u32 pes_index); struct nvgpu_gr_config *config, u32 gpc_index, u32 pes_index);
u32 gm20b_gr_config_get_pd_dist_skip_table_size(void); u32 gm20b_gr_config_get_pd_dist_skip_table_size(void);
u32 gm20b_gr_config_get_gpc_mask(struct gk20a *g,
struct nvgpu_gr_config *config);
#endif /* NVGPU_GR_CONFIG_GM20B_H */ #endif /* NVGPU_GR_CONFIG_GM20B_H */

View File

@@ -558,23 +558,6 @@ void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
g->ops.gr.ctxsw_prog.hw_get_perf_counter_control_register_stride(); g->ops.gr.ctxsw_prog.hw_get_perf_counter_control_register_stride();
} }
u32 gr_gm20b_get_gpc_mask(struct gk20a *g)
{
u32 val;
struct gr_gk20a *gr = &g->gr;
/*
* For register NV_FUSE_STATUS_OPT_GPC a set bit with index i indicates
* corresponding GPC is floorswept
* But for s/w mask a set bit means GPC is enabled and it is disabled
* otherwise
* Hence toggle the bits of register value to get s/w mask
*/
val = g->ops.fuse.fuse_status_opt_gpc(g);
return (~val) & (BIT32(nvgpu_gr_config_get_max_gpc_count(gr->config)) - 1U);
}
void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
{ {
nvgpu_tegra_fuse_write_bypass(g, 0x1); nvgpu_tegra_fuse_write_bypass(g, 0x1);

View File

@@ -80,7 +80,6 @@ void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
u32 **sm_dsm_perf_ctrl_regs, u32 **sm_dsm_perf_ctrl_regs,
u32 *ctrl_register_stride); u32 *ctrl_register_stride);
void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
u32 gr_gm20b_get_gpc_mask(struct gk20a *g);
void gr_gm20b_load_tpc_mask(struct gk20a *g); void gr_gm20b_load_tpc_mask(struct gk20a *g);
void gr_gm20b_program_sm_id_numbering(struct gk20a *g, void gr_gm20b_program_sm_id_numbering(struct gk20a *g,
u32 gpc, u32 tpc, u32 smid); u32 gpc, u32 tpc, u32 smid);

View File

@@ -367,7 +367,6 @@ static const struct gpu_ops gv100_ops = {
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode, .load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask, .set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
.get_gpc_mask = gr_gm20b_get_gpc_mask,
.alloc_obj_ctx = gk20a_alloc_obj_ctx, .alloc_obj_ctx = gk20a_alloc_obj_ctx,
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
.get_zcull_info = gr_gk20a_get_zcull_info, .get_zcull_info = gr_gk20a_get_zcull_info,
@@ -585,6 +584,7 @@ static const struct gpu_ops gv100_ops = {
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats, .dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
}, },
.config = { .config = {
.get_gpc_mask = gm20b_gr_config_get_gpc_mask,
.get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask, .get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask,
.get_tpc_count_in_gpc = .get_tpc_count_in_gpc =
gm20b_gr_config_get_tpc_count_in_gpc, gm20b_gr_config_get_tpc_count_in_gpc,

View File

@@ -292,7 +292,6 @@ struct gpu_ops {
struct gk20a_ctxsw_ucode_segments *segments, struct gk20a_ctxsw_ucode_segments *segments,
u32 reg_offset); u32 reg_offset);
int (*load_ctxsw_ucode)(struct gk20a *g); int (*load_ctxsw_ucode)(struct gk20a *g);
u32 (*get_gpc_mask)(struct gk20a *g);
void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index);
int (*alloc_obj_ctx)(struct channel_gk20a *c, int (*alloc_obj_ctx)(struct channel_gk20a *c,
u32 class_num, u32 flags); u32 class_num, u32 flags);
@@ -619,6 +618,8 @@ struct gpu_ops {
} ctxsw_prog; } ctxsw_prog;
struct { struct {
u32 (*get_gpc_mask)(struct gk20a *g,
struct nvgpu_gr_config *config);
u32 (*get_gpc_tpc_mask)(struct gk20a *g, u32 (*get_gpc_tpc_mask)(struct gk20a *g,
struct nvgpu_gr_config *config, u32 gpc_index); struct nvgpu_gr_config *config, u32 gpc_index);
u32 (*get_tpc_count_in_gpc)(struct gk20a *g, u32 (*get_tpc_count_in_gpc)(struct gk20a *g,

View File

@@ -47,6 +47,7 @@ struct nvgpu_gr_config {
u32 *gpc_zcb_count; u32 *gpc_zcb_count;
u32 *pes_tpc_count[GK20A_GR_MAX_PES_PER_GPC]; u32 *pes_tpc_count[GK20A_GR_MAX_PES_PER_GPC];
u32 gpc_mask;
u32 *gpc_tpc_mask; u32 *gpc_tpc_mask;
u32 *pes_tpc_mask[GK20A_GR_MAX_PES_PER_GPC]; u32 *pes_tpc_mask[GK20A_GR_MAX_PES_PER_GPC];
u32 *gpc_skip_mask; u32 *gpc_skip_mask;
@@ -92,5 +93,6 @@ u32 nvgpu_gr_config_get_gpc_skip_mask(struct nvgpu_gr_config *config,
u32 gpc_index); u32 gpc_index);
u32 nvgpu_gr_config_get_pes_tpc_mask(struct nvgpu_gr_config *config, u32 nvgpu_gr_config_get_pes_tpc_mask(struct nvgpu_gr_config *config,
u32 gpc_index, u32 pes_index); u32 gpc_index, u32 pes_index);
u32 nvgpu_gr_config_get_gpc_mask(struct nvgpu_gr_config *config);
#endif /* NVGPU_GR_CONFIG_H */ #endif /* NVGPU_GR_CONFIG_H */

View File

@@ -501,6 +501,7 @@ struct tegra_vgpu_constants_params {
u32 sm_arch_warp_count; u32 sm_arch_warp_count;
u32 max_gpc_count; u32 max_gpc_count;
u32 gpc_count; u32 gpc_count;
u32 gpc_mask;
u32 max_tpc_per_gpc_count; u32 max_tpc_per_gpc_count;
u32 num_fbps; u32 num_fbps;
u32 fbp_en_mask; u32 fbp_en_mask;

View File

@@ -284,6 +284,7 @@ gk20a_ctrl_ioctl_gpu_characteristics(
gpu.num_gpc = nvgpu_gr_config_get_gpc_count(g->gr.config); gpu.num_gpc = nvgpu_gr_config_get_gpc_count(g->gr.config);
gpu.max_gpc_count = nvgpu_gr_config_get_max_gpc_count(g->gr.config); gpu.max_gpc_count = nvgpu_gr_config_get_max_gpc_count(g->gr.config);
gpu.gpc_mask = nvgpu_gr_config_get_gpc_mask(g->gr.config);
gpu.num_tpc_per_gpc = nvgpu_gr_config_get_max_tpc_per_gpc_count(g->gr.config); gpu.num_tpc_per_gpc = nvgpu_gr_config_get_max_tpc_per_gpc_count(g->gr.config);
@@ -291,12 +292,6 @@ gk20a_ctrl_ioctl_gpu_characteristics(
gpu.compression_page_size = g->ops.fb.compression_page_size(g); gpu.compression_page_size = g->ops.fb.compression_page_size(g);
if (g->ops.gr.get_gpc_mask) {
gpu.gpc_mask = g->ops.gr.get_gpc_mask(g);
} else {
gpu.gpc_mask = BIT32(gpu.num_gpc) - 1;
}
gpu.flags = nvgpu_ctrl_ioctl_gpu_characteristics_flags(g); gpu.flags = nvgpu_ctrl_ioctl_gpu_characteristics_flags(g);
gpu.arch = g->params.gpu_arch; gpu.arch = g->params.gpu_arch;

View File

@@ -384,7 +384,6 @@ static const struct gpu_ops tu104_ops = {
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode, .load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask, .set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
.get_gpc_mask = gr_gm20b_get_gpc_mask,
.alloc_obj_ctx = gk20a_alloc_obj_ctx, .alloc_obj_ctx = gk20a_alloc_obj_ctx,
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
.get_zcull_info = gr_gk20a_get_zcull_info, .get_zcull_info = gr_gk20a_get_zcull_info,
@@ -610,6 +609,7 @@ static const struct gpu_ops tu104_ops = {
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats, .dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
}, },
.config = { .config = {
.get_gpc_mask = gm20b_gr_config_get_gpc_mask,
.get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask, .get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask,
.get_tpc_count_in_gpc = .get_tpc_count_in_gpc =
gm20b_gr_config_get_tpc_count_in_gpc, gm20b_gr_config_get_tpc_count_in_gpc,

View File

@@ -600,6 +600,7 @@ static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
config->max_gpc_count = priv->constants.max_gpc_count; config->max_gpc_count = priv->constants.max_gpc_count;
config->gpc_count = priv->constants.gpc_count; config->gpc_count = priv->constants.gpc_count;
config->gpc_mask = priv->constants.gpc_mask;
config->max_tpc_per_gpc_count = priv->constants.max_tpc_per_gpc_count; config->max_tpc_per_gpc_count = priv->constants.max_tpc_per_gpc_count;
config->max_tpc_count = config->max_gpc_count * config->max_tpc_per_gpc_count; config->max_tpc_count = config->max_gpc_count * config->max_tpc_per_gpc_count;