gpu: nvgpu: add hal.gr.init hal to detect SM arch

Add new hal g->ops.gr.init.detect_sm_arch() in hal.gr.init unit to get
SM arch information. Remove g->ops.gr.detect_sm_arch().

Move corresponding functions to hal.gr.init unit

Remove unused function declaration for gr_gv11b_init_sw_veid_bundle()

Jira NVGPU-2961

Change-Id: Idfd5ce19c06978dc31cbcec2cd01cb2912eb3cf9
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2097534
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Deepak Nibade
2019-04-08 19:30:12 +05:30
committed by mobile promotions
parent efae66471c
commit 2adcb51a45
17 changed files with 36 additions and 36 deletions

View File

@@ -547,7 +547,7 @@ void gk20a_init_gpu_characteristics(struct gk20a *g)
}
}
g->ops.gr.detect_sm_arch(g);
g->ops.gr.init.detect_sm_arch(g);
if (g->ops.gr.init_cyclestats != NULL) {
g->ops.gr.init_cyclestats(g);

View File

@@ -134,7 +134,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.set_gpc_tpc_mask = NULL,
.is_tpc_addr = gr_gm20b_is_tpc_addr,
.get_tpc_num = gr_gm20b_get_tpc_num,
.detect_sm_arch = vgpu_gr_detect_sm_arch,
.dump_gr_regs = NULL,
.update_pc_sampling = vgpu_gr_update_pc_sampling,
.get_rop_l2_en_mask = vgpu_gr_rop_l2_en_mask,
@@ -357,6 +356,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.commit_ctxsw_spill = gp10b_gr_init_commit_ctxsw_spill,
.commit_cbes_reserve =
gp10b_gr_init_commit_cbes_reserve,
.detect_sm_arch = vgpu_gr_detect_sm_arch,
},
},
.class = {

View File

@@ -158,7 +158,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.set_gpc_tpc_mask = NULL,
.is_tpc_addr = gr_gm20b_is_tpc_addr,
.get_tpc_num = gr_gm20b_get_tpc_num,
.detect_sm_arch = vgpu_gr_detect_sm_arch,
.dump_gr_regs = NULL,
.update_pc_sampling = vgpu_gr_update_pc_sampling,
.get_rop_l2_en_mask = vgpu_gr_rop_l2_en_mask,
@@ -409,6 +408,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
gv11b_gr_init_commit_cbes_reserve,
.gfxp_wfi_timeout =
gv11b_gr_init_commit_gfxp_wfi_timeout,
.detect_sm_arch = vgpu_gr_detect_sm_arch,
},
.intr = {
.handle_gcc_exception =

View File

@@ -341,18 +341,6 @@ u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr)
return 0;
}
void gr_gm20b_detect_sm_arch(struct gk20a *g)
{
u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());
g->params.sm_arch_spa_version =
gr_gpc0_tpc0_sm_arch_spa_version_v(v);
g->params.sm_arch_sm_version =
gr_gpc0_tpc0_sm_arch_sm_version_v(v);
g->params.sm_arch_warp_count =
gr_gpc0_tpc0_sm_arch_warp_count_v(v);
}
int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
struct gk20a_debug_output *o)
{

View File

@@ -61,7 +61,6 @@ void gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr);
u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr);
void gr_gm20b_detect_sm_arch(struct gk20a *g);
int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
struct gk20a_debug_output *o);
int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,

View File

@@ -264,7 +264,6 @@ static const struct gpu_ops gm20b_ops = {
.set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask,
.is_tpc_addr = gr_gm20b_is_tpc_addr,
.get_tpc_num = gr_gm20b_get_tpc_num,
.detect_sm_arch = gr_gm20b_detect_sm_arch,
.dump_gr_regs = gr_gm20b_dump_gr_status_regs,
.update_pc_sampling = gr_gm20b_update_pc_sampling,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
@@ -469,6 +468,7 @@ static const struct gpu_ops gm20b_ops = {
gm20b_gr_init_load_sw_bundle_init,
.get_gfxp_rtv_cb_size = NULL,
.get_patch_slots = gm20b_gr_init_get_patch_slots,
.detect_sm_arch = gm20b_gr_init_detect_sm_arch,
},
.intr = {
.set_shader_exceptions =

View File

@@ -298,7 +298,6 @@ static const struct gpu_ops gp10b_ops = {
.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask,
.is_tpc_addr = gr_gm20b_is_tpc_addr,
.get_tpc_num = gr_gm20b_get_tpc_num,
.detect_sm_arch = gr_gm20b_detect_sm_arch,
.dump_gr_regs = gr_gp10b_dump_gr_status_regs,
.update_pc_sampling = gr_gm20b_update_pc_sampling,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
@@ -564,6 +563,7 @@ static const struct gpu_ops gp10b_ops = {
.commit_cbes_reserve =
gp10b_gr_init_commit_cbes_reserve,
.get_patch_slots = gm20b_gr_init_get_patch_slots,
.detect_sm_arch = gm20b_gr_init_detect_sm_arch,
},
.intr = {
.set_shader_exceptions =

View File

@@ -401,7 +401,6 @@ static const struct gpu_ops gv100_ops = {
.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
.is_tpc_addr = gr_gm20b_is_tpc_addr,
.get_tpc_num = gr_gm20b_get_tpc_num,
.detect_sm_arch = gr_gv11b_detect_sm_arch,
.dump_gr_regs = gr_gv11b_dump_gr_status_regs,
.update_pc_sampling = gr_gm20b_update_pc_sampling,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
@@ -696,6 +695,7 @@ static const struct gpu_ops gv100_ops = {
.get_max_subctx_count =
gv11b_gr_init_get_max_subctx_count,
.get_patch_slots = gv11b_gr_init_get_patch_slots,
.detect_sm_arch = gv11b_gr_init_detect_sm_arch,
},
.intr = {
.set_shader_exceptions =

View File

@@ -1619,18 +1619,6 @@ int gr_gv11b_handle_fecs_error(struct gk20a *g,
return ret;
}
void gr_gv11b_detect_sm_arch(struct gk20a *g)
{
u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());
g->params.sm_arch_spa_version =
gr_gpc0_tpc0_sm_arch_spa_version_v(v);
g->params.sm_arch_sm_version =
gr_gpc0_tpc0_sm_arch_sm_version_v(v);
g->params.sm_arch_warp_count =
gr_gpc0_tpc0_sm_arch_warp_count_v(v);
}
void gv11b_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc,
u32 *esr_sm_sel)
{

View File

@@ -89,8 +89,6 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g,
int gr_gv11b_handle_fecs_error(struct gk20a *g,
struct channel_gk20a *__ch,
struct nvgpu_gr_isr_data *isr_data);
int gr_gv11b_init_sw_veid_bundle(struct gk20a *g);
void gr_gv11b_detect_sm_arch(struct gk20a *g);
void gv11b_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc,
u32 *esr_sm_sel);
int gv11b_gr_sm_trigger_suspend(struct gk20a *g);

View File

@@ -366,7 +366,6 @@ static const struct gpu_ops gv11b_ops = {
.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask,
.is_tpc_addr = gr_gm20b_is_tpc_addr,
.get_tpc_num = gr_gm20b_get_tpc_num,
.detect_sm_arch = gr_gv11b_detect_sm_arch,
.powergate_tpc = gr_gv11b_powergate_tpc,
.dump_gr_regs = gr_gv11b_dump_gr_status_regs,
.update_pc_sampling = gr_gm20b_update_pc_sampling,
@@ -672,6 +671,7 @@ static const struct gpu_ops gv11b_ops = {
.get_max_subctx_count =
gv11b_gr_init_get_max_subctx_count,
.get_patch_slots = gv11b_gr_init_get_patch_slots,
.detect_sm_arch = gv11b_gr_init_detect_sm_arch,
},
.intr = {
.set_shader_exceptions =

View File

@@ -1097,3 +1097,16 @@ u32 gm20b_gr_init_get_patch_slots(struct gk20a *g,
{
return PATCH_CTX_SLOTS_PER_PAGE;
}
void gm20b_gr_init_detect_sm_arch(struct gk20a *g)
{
u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());
g->params.sm_arch_spa_version =
gr_gpc0_tpc0_sm_arch_spa_version_v(v);
g->params.sm_arch_sm_version =
gr_gpc0_tpc0_sm_arch_sm_version_v(v);
g->params.sm_arch_warp_count =
gr_gpc0_tpc0_sm_arch_warp_count_v(v);
}

View File

@@ -97,5 +97,6 @@ void gm20b_gr_init_commit_global_cb_manager(struct gk20a *g,
u32 gm20b_gr_init_get_patch_slots(struct gk20a *g,
struct nvgpu_gr_config *config);
void gm20b_gr_init_detect_sm_arch(struct gk20a *g);
#endif /* NVGPU_GR_INIT_GM20B_H */

View File

@@ -948,3 +948,15 @@ u32 gv11b_gr_init_get_patch_slots(struct gk20a *g,
return size;
}
void gv11b_gr_init_detect_sm_arch(struct gk20a *g)
{
u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());
g->params.sm_arch_spa_version =
gr_gpc0_tpc0_sm_arch_spa_version_v(v);
g->params.sm_arch_sm_version =
gr_gpc0_tpc0_sm_arch_sm_version_v(v);
g->params.sm_arch_warp_count =
gr_gpc0_tpc0_sm_arch_warp_count_v(v);
}

View File

@@ -81,5 +81,6 @@ void gv11b_gr_init_commit_gfxp_wfi_timeout(struct gk20a *g,
u32 gv11b_gr_init_get_max_subctx_count(void);
u32 gv11b_gr_init_get_patch_slots(struct gk20a *g,
struct nvgpu_gr_config *config);
void gv11b_gr_init_detect_sm_arch(struct gk20a *g);
#endif /* NVGPU_GR_INIT_GV11B_H */

View File

@@ -304,7 +304,6 @@ struct gpu_ops {
u32 *gpc_num, u32 *tpc_num);
u32 (*get_tpc_num)(struct gk20a *g, u32 addr);
u32 (*get_egpc_base)(struct gk20a *g);
void (*detect_sm_arch)(struct gk20a *g);
void (*powergate_tpc)(struct gk20a *g);
int (*init_ctxsw_preemption_mode)(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
@@ -789,6 +788,7 @@ struct gpu_ops {
u32 (*get_max_subctx_count)(void);
u32 (*get_patch_slots)(struct gk20a *g,
struct nvgpu_gr_config *config);
void (*detect_sm_arch)(struct gk20a *g);
} init;
struct {

View File

@@ -419,7 +419,6 @@ static const struct gpu_ops tu104_ops = {
.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
.is_tpc_addr = gr_gm20b_is_tpc_addr,
.get_tpc_num = gr_gm20b_get_tpc_num,
.detect_sm_arch = gr_gv11b_detect_sm_arch,
.dump_gr_regs = gr_gv11b_dump_gr_status_regs,
.update_pc_sampling = gr_gm20b_update_pc_sampling,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
@@ -729,6 +728,7 @@ static const struct gpu_ops tu104_ops = {
.get_max_subctx_count =
gv11b_gr_init_get_max_subctx_count,
.get_patch_slots = gv11b_gr_init_get_patch_slots,
.detect_sm_arch = gv11b_gr_init_detect_sm_arch,
},
.intr = {
.set_shader_exceptions =