mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 01:50:07 +03:00
gpu: nvgpu: add gpc_mask to gr/config unit
We get gpc_mask by calling GR HAL g->ops.gr.get_gpc_mask() But gpc_mask should be logically owned by gr/config unit Hence add new gpc_mask field to nvgpu_gr_config Initialize it in nvgpu_gr_config_init() by calling a new HAL g->ops.gr.config.get_gpc_mask() if available If HAL is not defined we just initialize it based on gpc_count Expose new API nvgpu_gr_config_get_gpc_mask() to get gpc_mask and use this API now Remove gr_gm20b_get_gpc_mask() and HAL g->ops.gr.get_gpc_mask() Update GV100 and TU104 chip HALs to remove old and add new HAL Add gpc_mask to struct tegra_vgpu_constants_params to support this on vGPU. Also get gpc_mask from vGPU private data in vgpu_gr_init_gr_config() Jira NVGPU-1879 Change-Id: Ibdc89ea51df944dc7085920509e3536a5721efc0 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2016084 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
6fb2abb153
commit
00aeab6cca
@@ -51,6 +51,12 @@ struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g)
|
|||||||
goto clean_up;
|
goto clean_up;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (g->ops.gr.config.get_gpc_mask != NULL) {
|
||||||
|
config->gpc_mask = g->ops.gr.config.get_gpc_mask(g, config);
|
||||||
|
} else {
|
||||||
|
config->gpc_mask = BIT32(config->gpc_count) - 1;
|
||||||
|
}
|
||||||
|
|
||||||
config->pe_count_per_gpc = nvgpu_get_litter_value(g,
|
config->pe_count_per_gpc = nvgpu_get_litter_value(g,
|
||||||
GPU_LIT_NUM_PES_PER_GPC);
|
GPU_LIT_NUM_PES_PER_GPC);
|
||||||
if (config->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC) {
|
if (config->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC) {
|
||||||
@@ -531,3 +537,8 @@ u32 nvgpu_gr_config_get_pes_tpc_mask(struct nvgpu_gr_config *config,
|
|||||||
{
|
{
|
||||||
return config->pes_tpc_mask[pes_index][gpc_index];
|
return config->pes_tpc_mask[pes_index][gpc_index];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 nvgpu_gr_config_get_gpc_mask(struct nvgpu_gr_config *config)
|
||||||
|
{
|
||||||
|
return config->gpc_mask;
|
||||||
|
}
|
||||||
|
|||||||
@@ -77,3 +77,20 @@ u32 gm20b_gr_config_get_pd_dist_skip_table_size(void)
|
|||||||
{
|
{
|
||||||
return gr_pd_dist_skip_table__size_1_v();
|
return gr_pd_dist_skip_table__size_1_v();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 gm20b_gr_config_get_gpc_mask(struct gk20a *g,
|
||||||
|
struct nvgpu_gr_config *config)
|
||||||
|
{
|
||||||
|
u32 val;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For register NV_FUSE_STATUS_OPT_GPC a set bit with index i indicates
|
||||||
|
* corresponding GPC is floorswept
|
||||||
|
* But for s/w mask a set bit means GPC is enabled and it is disabled
|
||||||
|
* otherwise
|
||||||
|
* Hence toggle the bits of register value to get s/w mask
|
||||||
|
*/
|
||||||
|
val = g->ops.fuse.fuse_status_opt_gpc(g);
|
||||||
|
|
||||||
|
return (~val) & (BIT32(config->max_gpc_count) - 1U);
|
||||||
|
}
|
||||||
|
|||||||
@@ -37,5 +37,7 @@ u32 gm20b_gr_config_get_zcull_count_in_gpc(struct gk20a *g,
|
|||||||
u32 gm20b_gr_config_get_pes_tpc_mask(struct gk20a *g,
|
u32 gm20b_gr_config_get_pes_tpc_mask(struct gk20a *g,
|
||||||
struct nvgpu_gr_config *config, u32 gpc_index, u32 pes_index);
|
struct nvgpu_gr_config *config, u32 gpc_index, u32 pes_index);
|
||||||
u32 gm20b_gr_config_get_pd_dist_skip_table_size(void);
|
u32 gm20b_gr_config_get_pd_dist_skip_table_size(void);
|
||||||
|
u32 gm20b_gr_config_get_gpc_mask(struct gk20a *g,
|
||||||
|
struct nvgpu_gr_config *config);
|
||||||
|
|
||||||
#endif /* NVGPU_GR_CONFIG_GM20B_H */
|
#endif /* NVGPU_GR_CONFIG_GM20B_H */
|
||||||
|
|||||||
@@ -558,23 +558,6 @@ void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
|
|||||||
g->ops.gr.ctxsw_prog.hw_get_perf_counter_control_register_stride();
|
g->ops.gr.ctxsw_prog.hw_get_perf_counter_control_register_stride();
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 gr_gm20b_get_gpc_mask(struct gk20a *g)
|
|
||||||
{
|
|
||||||
u32 val;
|
|
||||||
struct gr_gk20a *gr = &g->gr;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* For register NV_FUSE_STATUS_OPT_GPC a set bit with index i indicates
|
|
||||||
* corresponding GPC is floorswept
|
|
||||||
* But for s/w mask a set bit means GPC is enabled and it is disabled
|
|
||||||
* otherwise
|
|
||||||
* Hence toggle the bits of register value to get s/w mask
|
|
||||||
*/
|
|
||||||
val = g->ops.fuse.fuse_status_opt_gpc(g);
|
|
||||||
|
|
||||||
return (~val) & (BIT32(nvgpu_gr_config_get_max_gpc_count(gr->config)) - 1U);
|
|
||||||
}
|
|
||||||
|
|
||||||
void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
|
void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
|
||||||
{
|
{
|
||||||
nvgpu_tegra_fuse_write_bypass(g, 0x1);
|
nvgpu_tegra_fuse_write_bypass(g, 0x1);
|
||||||
|
|||||||
@@ -80,7 +80,6 @@ void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
|
|||||||
u32 **sm_dsm_perf_ctrl_regs,
|
u32 **sm_dsm_perf_ctrl_regs,
|
||||||
u32 *ctrl_register_stride);
|
u32 *ctrl_register_stride);
|
||||||
void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
|
void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
|
||||||
u32 gr_gm20b_get_gpc_mask(struct gk20a *g);
|
|
||||||
void gr_gm20b_load_tpc_mask(struct gk20a *g);
|
void gr_gm20b_load_tpc_mask(struct gk20a *g);
|
||||||
void gr_gm20b_program_sm_id_numbering(struct gk20a *g,
|
void gr_gm20b_program_sm_id_numbering(struct gk20a *g,
|
||||||
u32 gpc, u32 tpc, u32 smid);
|
u32 gpc, u32 tpc, u32 smid);
|
||||||
|
|||||||
@@ -367,7 +367,6 @@ static const struct gpu_ops gv100_ops = {
|
|||||||
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
|
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
|
||||||
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
|
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
|
||||||
.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
|
.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
|
||||||
.get_gpc_mask = gr_gm20b_get_gpc_mask,
|
|
||||||
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
|
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
|
||||||
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
|
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
|
||||||
.get_zcull_info = gr_gk20a_get_zcull_info,
|
.get_zcull_info = gr_gk20a_get_zcull_info,
|
||||||
@@ -585,6 +584,7 @@ static const struct gpu_ops gv100_ops = {
|
|||||||
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
|
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
|
||||||
},
|
},
|
||||||
.config = {
|
.config = {
|
||||||
|
.get_gpc_mask = gm20b_gr_config_get_gpc_mask,
|
||||||
.get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask,
|
.get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask,
|
||||||
.get_tpc_count_in_gpc =
|
.get_tpc_count_in_gpc =
|
||||||
gm20b_gr_config_get_tpc_count_in_gpc,
|
gm20b_gr_config_get_tpc_count_in_gpc,
|
||||||
|
|||||||
@@ -292,7 +292,6 @@ struct gpu_ops {
|
|||||||
struct gk20a_ctxsw_ucode_segments *segments,
|
struct gk20a_ctxsw_ucode_segments *segments,
|
||||||
u32 reg_offset);
|
u32 reg_offset);
|
||||||
int (*load_ctxsw_ucode)(struct gk20a *g);
|
int (*load_ctxsw_ucode)(struct gk20a *g);
|
||||||
u32 (*get_gpc_mask)(struct gk20a *g);
|
|
||||||
void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index);
|
void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index);
|
||||||
int (*alloc_obj_ctx)(struct channel_gk20a *c,
|
int (*alloc_obj_ctx)(struct channel_gk20a *c,
|
||||||
u32 class_num, u32 flags);
|
u32 class_num, u32 flags);
|
||||||
@@ -619,6 +618,8 @@ struct gpu_ops {
|
|||||||
} ctxsw_prog;
|
} ctxsw_prog;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
|
u32 (*get_gpc_mask)(struct gk20a *g,
|
||||||
|
struct nvgpu_gr_config *config);
|
||||||
u32 (*get_gpc_tpc_mask)(struct gk20a *g,
|
u32 (*get_gpc_tpc_mask)(struct gk20a *g,
|
||||||
struct nvgpu_gr_config *config, u32 gpc_index);
|
struct nvgpu_gr_config *config, u32 gpc_index);
|
||||||
u32 (*get_tpc_count_in_gpc)(struct gk20a *g,
|
u32 (*get_tpc_count_in_gpc)(struct gk20a *g,
|
||||||
|
|||||||
@@ -47,6 +47,7 @@ struct nvgpu_gr_config {
|
|||||||
u32 *gpc_zcb_count;
|
u32 *gpc_zcb_count;
|
||||||
u32 *pes_tpc_count[GK20A_GR_MAX_PES_PER_GPC];
|
u32 *pes_tpc_count[GK20A_GR_MAX_PES_PER_GPC];
|
||||||
|
|
||||||
|
u32 gpc_mask;
|
||||||
u32 *gpc_tpc_mask;
|
u32 *gpc_tpc_mask;
|
||||||
u32 *pes_tpc_mask[GK20A_GR_MAX_PES_PER_GPC];
|
u32 *pes_tpc_mask[GK20A_GR_MAX_PES_PER_GPC];
|
||||||
u32 *gpc_skip_mask;
|
u32 *gpc_skip_mask;
|
||||||
@@ -92,5 +93,6 @@ u32 nvgpu_gr_config_get_gpc_skip_mask(struct nvgpu_gr_config *config,
|
|||||||
u32 gpc_index);
|
u32 gpc_index);
|
||||||
u32 nvgpu_gr_config_get_pes_tpc_mask(struct nvgpu_gr_config *config,
|
u32 nvgpu_gr_config_get_pes_tpc_mask(struct nvgpu_gr_config *config,
|
||||||
u32 gpc_index, u32 pes_index);
|
u32 gpc_index, u32 pes_index);
|
||||||
|
u32 nvgpu_gr_config_get_gpc_mask(struct nvgpu_gr_config *config);
|
||||||
|
|
||||||
#endif /* NVGPU_GR_CONFIG_H */
|
#endif /* NVGPU_GR_CONFIG_H */
|
||||||
|
|||||||
@@ -501,6 +501,7 @@ struct tegra_vgpu_constants_params {
|
|||||||
u32 sm_arch_warp_count;
|
u32 sm_arch_warp_count;
|
||||||
u32 max_gpc_count;
|
u32 max_gpc_count;
|
||||||
u32 gpc_count;
|
u32 gpc_count;
|
||||||
|
u32 gpc_mask;
|
||||||
u32 max_tpc_per_gpc_count;
|
u32 max_tpc_per_gpc_count;
|
||||||
u32 num_fbps;
|
u32 num_fbps;
|
||||||
u32 fbp_en_mask;
|
u32 fbp_en_mask;
|
||||||
|
|||||||
@@ -284,6 +284,7 @@ gk20a_ctrl_ioctl_gpu_characteristics(
|
|||||||
|
|
||||||
gpu.num_gpc = nvgpu_gr_config_get_gpc_count(g->gr.config);
|
gpu.num_gpc = nvgpu_gr_config_get_gpc_count(g->gr.config);
|
||||||
gpu.max_gpc_count = nvgpu_gr_config_get_max_gpc_count(g->gr.config);
|
gpu.max_gpc_count = nvgpu_gr_config_get_max_gpc_count(g->gr.config);
|
||||||
|
gpu.gpc_mask = nvgpu_gr_config_get_gpc_mask(g->gr.config);
|
||||||
|
|
||||||
gpu.num_tpc_per_gpc = nvgpu_gr_config_get_max_tpc_per_gpc_count(g->gr.config);
|
gpu.num_tpc_per_gpc = nvgpu_gr_config_get_max_tpc_per_gpc_count(g->gr.config);
|
||||||
|
|
||||||
@@ -291,12 +292,6 @@ gk20a_ctrl_ioctl_gpu_characteristics(
|
|||||||
|
|
||||||
gpu.compression_page_size = g->ops.fb.compression_page_size(g);
|
gpu.compression_page_size = g->ops.fb.compression_page_size(g);
|
||||||
|
|
||||||
if (g->ops.gr.get_gpc_mask) {
|
|
||||||
gpu.gpc_mask = g->ops.gr.get_gpc_mask(g);
|
|
||||||
} else {
|
|
||||||
gpu.gpc_mask = BIT32(gpu.num_gpc) - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
gpu.flags = nvgpu_ctrl_ioctl_gpu_characteristics_flags(g);
|
gpu.flags = nvgpu_ctrl_ioctl_gpu_characteristics_flags(g);
|
||||||
|
|
||||||
gpu.arch = g->params.gpu_arch;
|
gpu.arch = g->params.gpu_arch;
|
||||||
|
|||||||
@@ -384,7 +384,6 @@ static const struct gpu_ops tu104_ops = {
|
|||||||
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
|
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
|
||||||
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
|
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
|
||||||
.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
|
.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
|
||||||
.get_gpc_mask = gr_gm20b_get_gpc_mask,
|
|
||||||
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
|
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
|
||||||
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
|
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
|
||||||
.get_zcull_info = gr_gk20a_get_zcull_info,
|
.get_zcull_info = gr_gk20a_get_zcull_info,
|
||||||
@@ -610,6 +609,7 @@ static const struct gpu_ops tu104_ops = {
|
|||||||
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
|
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
|
||||||
},
|
},
|
||||||
.config = {
|
.config = {
|
||||||
|
.get_gpc_mask = gm20b_gr_config_get_gpc_mask,
|
||||||
.get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask,
|
.get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask,
|
||||||
.get_tpc_count_in_gpc =
|
.get_tpc_count_in_gpc =
|
||||||
gm20b_gr_config_get_tpc_count_in_gpc,
|
gm20b_gr_config_get_tpc_count_in_gpc,
|
||||||
|
|||||||
@@ -600,6 +600,7 @@ static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
|
|||||||
|
|
||||||
config->max_gpc_count = priv->constants.max_gpc_count;
|
config->max_gpc_count = priv->constants.max_gpc_count;
|
||||||
config->gpc_count = priv->constants.gpc_count;
|
config->gpc_count = priv->constants.gpc_count;
|
||||||
|
config->gpc_mask = priv->constants.gpc_mask;
|
||||||
config->max_tpc_per_gpc_count = priv->constants.max_tpc_per_gpc_count;
|
config->max_tpc_per_gpc_count = priv->constants.max_tpc_per_gpc_count;
|
||||||
|
|
||||||
config->max_tpc_count = config->max_gpc_count * config->max_tpc_per_gpc_count;
|
config->max_tpc_count = config->max_gpc_count * config->max_tpc_per_gpc_count;
|
||||||
|
|||||||
Reference in New Issue
Block a user