diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c index 2bf480c48..1c78a6d8f 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c @@ -179,15 +179,21 @@ void gm20b_gr_init_sm_id_numbering(struct gk20a *g, u32 gpc, u32 tpc, u32 smid, u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); - u32 gpc_offset = gpc_stride * gpc; - u32 tpc_offset = tpc_in_gpc_stride * tpc; + u32 gpc_offset = nvgpu_safe_mult_u32(gpc_stride, gpc); + u32 tpc_offset = nvgpu_safe_mult_u32(tpc_in_gpc_stride, tpc); + u32 offset_sum = nvgpu_safe_add_u32(gpc_offset, tpc_offset); - nvgpu_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, - gr_gpc0_tpc0_sm_cfg_sm_id_f(smid)); - nvgpu_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset, - gr_gpc0_gpm_pd_sm_id_id_f(smid)); - nvgpu_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, - gr_gpc0_tpc0_pe_cfg_smid_value_f(smid)); + nvgpu_writel(g, + nvgpu_safe_add_u32(gr_gpc0_tpc0_sm_cfg_r(), offset_sum), + gr_gpc0_tpc0_sm_cfg_sm_id_f(smid)); + nvgpu_writel(g, + nvgpu_safe_add_u32(gr_gpc0_gpm_pd_sm_id_r(tpc), + gpc_offset), + gr_gpc0_gpm_pd_sm_id_id_f(smid)); + nvgpu_writel(g, + nvgpu_safe_add_u32(gr_gpc0_tpc0_pe_cfg_smid_r(), + offset_sum), + gr_gpc0_tpc0_pe_cfg_smid_value_f(smid)); } u32 gm20b_gr_init_get_sm_id_size(void) @@ -200,17 +206,21 @@ int gm20b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id, { u32 i, j; u32 tpc_index, gpc_index; + u32 tpc_cnt = nvgpu_safe_sub_u32( + nvgpu_gr_config_get_tpc_count(gr_config), 1U); /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ for (i = 0U; - i <= ((nvgpu_gr_config_get_tpc_count(gr_config) - 1U) / 4U); + i <= (tpc_cnt / 4U); i++) { u32 reg = 0; - u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() + - gr_cwd_gpc_tpc_id_tpc0_s(); + u32 bit_stride = nvgpu_safe_add_u32( + gr_cwd_gpc_tpc_id_gpc0_s(), + gr_cwd_gpc_tpc_id_tpc0_s()); for (j = 0U; j < 4U; j++) { - u32 sm_id = (i * 4U) + j; + u32 sm_id = nvgpu_safe_add_u32( + nvgpu_safe_mult_u32(i, 4U), j); u32 bits; struct nvgpu_sm_info *sm_info; @@ -227,10 +237,11 @@ int gm20b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id, bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) | gr_cwd_gpc_tpc_id_tpc0_f(tpc_index); - reg |= bits << (j * bit_stride); + reg |= bits << nvgpu_safe_mult_u32(j, bit_stride); tpc_sm_id[gpc_index] |= - (sm_id << tpc_index * bit_stride); + (sm_id << + nvgpu_safe_mult_u32(tpc_index, bit_stride)); } nvgpu_writel(g, gr_cwd_gpc_tpc_id_r(i), reg); } @@ -555,7 +566,7 @@ int gm20b_gr_init_wait_idle(struct gk20a *g) return 0; } - nvgpu_usleep_range(delay, delay * 2U); + nvgpu_usleep_range(delay, nvgpu_safe_mult_u32(delay, 2U)); delay = min_t(u32, delay << 1, POLL_DELAY_MAX_US); } while (nvgpu_timeout_expired(&timeout) == 0); @@ -593,7 +604,7 @@ int gm20b_gr_init_wait_fe_idle(struct gk20a *g) return 0; } - nvgpu_usleep_range(delay, delay * 2U); + nvgpu_usleep_range(delay, nvgpu_safe_mult_u32(delay, 2U)); delay = min_t(u32, delay << 1, POLL_DELAY_MAX_US); } while (nvgpu_timeout_expired(&timeout) == 0); @@ -831,14 +842,16 @@ u32 gm20b_gr_init_get_alpha_cb_default_size(struct gk20a *g) u32 gm20b_gr_init_get_attrib_cb_size(struct gk20a *g, u32 tpc_count) { - return g->ops.gr.init.get_attrib_cb_default_size(g) - + (g->ops.gr.init.get_attrib_cb_default_size(g) >> 1); + return nvgpu_safe_add_u32( + g->ops.gr.init.get_attrib_cb_default_size(g), + (g->ops.gr.init.get_attrib_cb_default_size(g) >> 1)); } u32 gm20b_gr_init_get_alpha_cb_size(struct gk20a *g, u32 tpc_count) { - return g->ops.gr.init.get_alpha_cb_default_size(g) - + (g->ops.gr.init.get_alpha_cb_default_size(g) >> 1); + return nvgpu_safe_add_u32( + g->ops.gr.init.get_alpha_cb_default_size(g), + (g->ops.gr.init.get_alpha_cb_default_size(g) >> 1)); } u32 gm20b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count, @@ -846,25 +859,33 @@ u32 gm20b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count, { u32 size; - size = g->ops.gr.init.get_attrib_cb_size(g, tpc_count) * - gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * max_tpc; + size = nvgpu_safe_mult_u32( + g->ops.gr.init.get_attrib_cb_size(g, tpc_count), + nvgpu_safe_mult_u32( + gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v(), + max_tpc)); - size += g->ops.gr.init.get_alpha_cb_size(g, tpc_count) * - gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * max_tpc; + size += nvgpu_safe_mult_u32( + g->ops.gr.init.get_alpha_cb_size(g, tpc_count), + nvgpu_safe_mult_u32( + gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v(), + max_tpc)); return size; } u32 gm20b_gr_init_get_global_ctx_cb_buffer_size(struct gk20a *g) { - return g->ops.gr.init.get_bundle_cb_default_size(g) * - gr_scc_bundle_cb_size_div_256b_byte_granularity_v(); + return nvgpu_safe_mult_u32( + g->ops.gr.init.get_bundle_cb_default_size(g), + gr_scc_bundle_cb_size_div_256b_byte_granularity_v()); } u32 gm20b_gr_init_get_global_ctx_pagepool_buffer_size(struct gk20a *g) { - return g->ops.gr.init.pagepool_default_size(g) * - gr_scc_pagepool_total_pages_byte_granularity_v(); + return nvgpu_safe_mult_u32( + g->ops.gr.init.pagepool_default_size(g), + gr_scc_pagepool_total_pages_byte_granularity_v()); } void gm20b_gr_init_commit_global_bundle_cb(struct gk20a *g, @@ -896,7 +917,8 @@ void gm20b_gr_init_commit_global_bundle_cb(struct gk20a *g, gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch); /* data for state_limit */ - data = (g->ops.gr.init.get_bundle_cb_default_size(g) * + data = nvgpu_safe_mult_u32( + g->ops.gr.init.get_bundle_cb_default_size(g), gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) / gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v(); @@ -937,7 +959,7 @@ void gm20b_gr_init_commit_global_pagepool(struct gk20a *g, addr, size); pp_addr = (u32)addr; - pp_size = (u32)size; + pp_size = nvgpu_safe_cast_u64_to_u32(size); nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_base_r(), gr_scc_pagepool_base_addr_39_8_f(pp_addr), patch); @@ -1011,7 +1033,7 @@ void gm20b_gr_init_commit_global_cb_manager(struct gk20a *g, gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_default_size), patch); - pd_ab_max_output = (alpha_cb_default_size * + pd_ab_max_output = nvgpu_safe_mult_u32(alpha_cb_default_size, gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) / gr_pd_ab_dist_cfg1_max_output_granularity_v(); @@ -1019,55 +1041,70 @@ void gm20b_gr_init_commit_global_cb_manager(struct gk20a *g, gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); - alpha_offset_in_chunk = attrib_offset_in_chunk + - nvgpu_gr_config_get_tpc_count(config) * attrib_cb_size; + alpha_offset_in_chunk = + nvgpu_safe_add_u32(attrib_offset_in_chunk, + nvgpu_safe_mult_u32( + nvgpu_gr_config_get_tpc_count(config), + attrib_cb_size)); for (gpc_index = 0; gpc_index < nvgpu_gr_config_get_gpc_count(config); gpc_index++) { - u32 temp = gpc_stride * gpc_index; - u32 temp2 = num_pes_per_gpc * gpc_index; + u32 temp = nvgpu_safe_mult_u32(gpc_stride, gpc_index); + u32 temp2 = nvgpu_safe_mult_u32(num_pes_per_gpc, gpc_index); for (ppc_index = 0; ppc_index < nvgpu_gr_config_get_gpc_ppc_count(config, gpc_index); ppc_index++) { - cbm_cfg_size1 = attrib_cb_default_size * - nvgpu_gr_config_get_pes_tpc_count(config, - gpc_index, ppc_index); - cbm_cfg_size2 = alpha_cb_default_size * + u32 pes_tpc_count = nvgpu_gr_config_get_pes_tpc_count(config, gpc_index, ppc_index); + u32 ppc_posn = nvgpu_safe_mult_u32(ppc_in_gpc_stride, + ppc_index); + u32 sum_temp_pcc = nvgpu_safe_add_u32(temp, ppc_posn); + + cbm_cfg_size1 = + nvgpu_safe_mult_u32(attrib_cb_default_size, + pes_tpc_count); + cbm_cfg_size2 = + nvgpu_safe_mult_u32(alpha_cb_default_size, + pes_tpc_count); nvgpu_gr_ctx_patch_write(g, gr_ctx, - gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + - ppc_in_gpc_stride * ppc_index, + nvgpu_safe_add_u32( + gr_gpc0_ppc0_cbm_beta_cb_size_r(), + sum_temp_pcc), cbm_cfg_size1, patch); nvgpu_gr_ctx_patch_write(g, gr_ctx, - gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + - ppc_in_gpc_stride * ppc_index, + nvgpu_safe_add_u32( + gr_gpc0_ppc0_cbm_beta_cb_offset_r(), + sum_temp_pcc), attrib_offset_in_chunk, patch); - attrib_offset_in_chunk += attrib_cb_size * - nvgpu_gr_config_get_pes_tpc_count(config, - gpc_index, ppc_index); + attrib_offset_in_chunk += + nvgpu_safe_mult_u32(attrib_cb_size, + pes_tpc_count); nvgpu_gr_ctx_patch_write(g, gr_ctx, - gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + - ppc_in_gpc_stride * ppc_index, + nvgpu_safe_add_u32( + gr_gpc0_ppc0_cbm_alpha_cb_size_r(), + sum_temp_pcc), cbm_cfg_size2, patch); nvgpu_gr_ctx_patch_write(g, gr_ctx, - gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + - ppc_in_gpc_stride * ppc_index, + nvgpu_safe_add_u32( + gr_gpc0_ppc0_cbm_alpha_cb_offset_r(), + sum_temp_pcc), alpha_offset_in_chunk, patch); - alpha_offset_in_chunk += alpha_cb_size * - nvgpu_gr_config_get_pes_tpc_count(config, - gpc_index, ppc_index); + alpha_offset_in_chunk += + nvgpu_safe_mult_u32(alpha_cb_size, + pes_tpc_count); nvgpu_gr_ctx_patch_write(g, gr_ctx, - gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2), + gr_gpcs_swdx_tc_beta_cb_size_r( + nvgpu_safe_add_u32(ppc_index, temp2)), gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size1) | gr_gpcs_swdx_tc_beta_cb_size_div3_f(cbm_cfg_size1/3U), patch); diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.c index f059decb2..c9ede8dde 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.c @@ -91,18 +91,20 @@ int gp10b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id, u32 i, j; u32 tpc_index, gpc_index; u32 max_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); + u32 tpc_cnt = nvgpu_safe_sub_u32( + nvgpu_gr_config_get_tpc_count(gr_config), 1U); /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ - for (i = 0U; - i <= ((nvgpu_gr_config_get_tpc_count(gr_config) - 1U) / 4U); - i++) { + for (i = 0U; i <= (tpc_cnt / 4U); i++) { u32 reg = 0; - u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() + - gr_cwd_gpc_tpc_id_tpc0_s(); + u32 bit_stride = nvgpu_safe_add_u32( + gr_cwd_gpc_tpc_id_gpc0_s(), + gr_cwd_gpc_tpc_id_tpc0_s()); for (j = 0U; j < 4U; j++) { - u32 sm_id = (i * 4U) + j; + u32 sm_id = nvgpu_safe_mult_u32(i, 4U) + j; u32 bits; + u32 index = 0U; struct nvgpu_sm_info *sm_info; if (sm_id >= @@ -118,10 +120,15 @@ int gp10b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id, bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) | gr_cwd_gpc_tpc_id_tpc0_f(tpc_index); - reg |= bits << (j * bit_stride); + reg |= bits << nvgpu_safe_mult_u32(j, bit_stride); - tpc_sm_id[gpc_index + max_gpcs * ((tpc_index & 4U) >> 2U)] - |= sm_id << (bit_stride * (tpc_index & 3U)); + index = nvgpu_safe_mult_u32(max_gpcs, + ((tpc_index & 4U) >> 2U)); + index = nvgpu_safe_add_u32(gpc_index, index); + tpc_sm_id[index] + |= (sm_id << + nvgpu_safe_mult_u32(bit_stride, + (tpc_index & 3U))); } nvgpu_writel(g, gr_cwd_gpc_tpc_id_r(i), reg); } @@ -189,7 +196,7 @@ int gp10b_gr_init_wait_empty(struct gk20a *g) return 0; } - nvgpu_usleep_range(delay, delay * 2U); + nvgpu_usleep_range(delay, nvgpu_safe_mult_u32(delay, 2U)); delay = min_t(u32, delay << 1, POLL_DELAY_MAX_US); } while (nvgpu_timeout_expired(&timeout) == 0); @@ -255,16 +262,20 @@ u32 gp10b_gr_init_get_alpha_cb_default_size(struct gk20a *g) u32 gp10b_gr_init_get_attrib_cb_gfxp_default_size(struct gk20a *g) { - return g->ops.gr.init.get_attrib_cb_default_size(g) + - (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - - gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); + return nvgpu_safe_add_u32( + g->ops.gr.init.get_attrib_cb_default_size(g), + nvgpu_safe_sub_u32( + gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(), + gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v())); } u32 gp10b_gr_init_get_attrib_cb_gfxp_size(struct gk20a *g) { - return g->ops.gr.init.get_attrib_cb_default_size(g) + - (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - - gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); + return nvgpu_safe_add_u32( + g->ops.gr.init.get_attrib_cb_default_size(g), + nvgpu_safe_sub_u32( + gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(), + gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v())); } u32 gp10b_gr_init_get_attrib_cb_size(struct gk20a *g, u32 tpc_count) @@ -284,11 +295,17 @@ u32 gp10b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count, { u32 size; - size = g->ops.gr.init.get_attrib_cb_size(g, tpc_count) * - gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * max_tpc; + size = nvgpu_safe_mult_u32( + g->ops.gr.init.get_attrib_cb_size(g, tpc_count), + nvgpu_safe_mult_u32( + gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v(), + max_tpc)); - size += g->ops.gr.init.get_alpha_cb_size(g, tpc_count) * - gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * max_tpc; + size += nvgpu_safe_mult_u32( + g->ops.gr.init.get_alpha_cb_size(g, tpc_count), + nvgpu_safe_mult_u32( + gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v(), + max_tpc)); size = ALIGN(size, 128); @@ -326,8 +343,9 @@ void gp10b_gr_init_commit_global_bundle_cb(struct gk20a *g, gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch); /* data for state_limit */ - data = (g->ops.gr.init.get_bundle_cb_default_size(g) * - gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) / + data = nvgpu_safe_mult_u32( + g->ops.gr.init.get_bundle_cb_default_size(g), + gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) / gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v(); data = min_t(u32, data, g->ops.gr.init.get_min_gpm_fifo_depth(g)); @@ -367,7 +385,7 @@ void gp10b_gr_init_commit_global_pagepool(struct gk20a *g, addr, size); pp_addr = (u32)addr; - pp_size = (u32)size; + pp_size = nvgpu_safe_cast_u64_to_u32(size); nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_base_r(), gr_scc_pagepool_base_addr_39_8_f(pp_addr), patch); @@ -395,8 +413,8 @@ void gp10b_gr_init_commit_global_attrib_cb(struct gk20a *g, addr = addr >> gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v(); if (nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx)->gpu_va != 0ULL) { - attrBufferSize = - U32(nvgpu_gr_ctx_get_betacb_ctxsw_buffer(gr_ctx)->size); + attrBufferSize = nvgpu_safe_cast_u64_to_u32( + nvgpu_gr_ctx_get_betacb_ctxsw_buffer(gr_ctx)->size); } else { attrBufferSize = g->ops.gr.init.get_global_attr_cb_size(g, tpc_count, max_tpc); @@ -457,7 +475,7 @@ void gp10b_gr_init_commit_global_cb_manager(struct gk20a *g, nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_alpha_r(), alpha_cb_default_size, patch); - pd_ab_max_output = (alpha_cb_default_size * + pd_ab_max_output = nvgpu_safe_mult_u32(alpha_cb_default_size, gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) / gr_pd_ab_dist_cfg1_max_output_granularity_v(); @@ -465,63 +483,78 @@ void gp10b_gr_init_commit_global_cb_manager(struct gk20a *g, gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); - attrib_offset_in_chunk = alpha_offset_in_chunk + - nvgpu_gr_config_get_tpc_count(config) * alpha_cb_size; + attrib_offset_in_chunk = nvgpu_safe_add_u32(alpha_offset_in_chunk, + nvgpu_safe_mult_u32( + nvgpu_gr_config_get_tpc_count(config), + alpha_cb_size)); for (gpc_index = 0; gpc_index < nvgpu_gr_config_get_gpc_count(config); gpc_index++) { - temp = gpc_stride * gpc_index; - temp2 = num_pes_per_gpc * gpc_index; + temp = nvgpu_safe_mult_u32(gpc_stride, gpc_index); + temp2 = nvgpu_safe_mult_u32(num_pes_per_gpc, gpc_index); for (ppc_index = 0; ppc_index < nvgpu_gr_config_get_gpc_ppc_count(config, gpc_index); ppc_index++) { - cbm_cfg_size_beta = cb_attrib_cache_size_init * + u32 pes_tpc_count = nvgpu_gr_config_get_pes_tpc_count(config, - gpc_index, ppc_index); - cbm_cfg_size_alpha = alpha_cb_default_size * - nvgpu_gr_config_get_pes_tpc_count(config, - gpc_index, ppc_index); - cbm_cfg_size_steadystate = attrib_cb_default_size * - nvgpu_gr_config_get_pes_tpc_count(config, - gpc_index, ppc_index); + gpc_index, ppc_index); + u32 ppc_posn = nvgpu_safe_mult_u32(ppc_in_gpc_stride, + ppc_index); + u32 sum_temp_pcc = nvgpu_safe_add_u32(temp, ppc_posn); + + cbm_cfg_size_beta = + nvgpu_safe_mult_u32( + cb_attrib_cache_size_init, + pes_tpc_count); + cbm_cfg_size_alpha = + nvgpu_safe_mult_u32(alpha_cb_default_size, + pes_tpc_count); + cbm_cfg_size_steadystate = + nvgpu_safe_mult_u32(attrib_cb_default_size, + pes_tpc_count); nvgpu_gr_ctx_patch_write(g, gr_ctx, - gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + - ppc_in_gpc_stride * ppc_index, + nvgpu_safe_add_u32( + gr_gpc0_ppc0_cbm_beta_cb_size_r(), + sum_temp_pcc), cbm_cfg_size_beta, patch); nvgpu_gr_ctx_patch_write(g, gr_ctx, - gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + - ppc_in_gpc_stride * ppc_index, + nvgpu_safe_add_u32( + gr_gpc0_ppc0_cbm_beta_cb_offset_r(), + sum_temp_pcc), attrib_offset_in_chunk, patch); nvgpu_gr_ctx_patch_write(g, gr_ctx, - gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp + - ppc_in_gpc_stride * ppc_index, - cbm_cfg_size_steadystate, - patch); + nvgpu_safe_add_u32( + gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r(), + sum_temp_pcc), + cbm_cfg_size_steadystate, patch); - attrib_offset_in_chunk += attrib_size_in_chunk * - nvgpu_gr_config_get_pes_tpc_count(config, - gpc_index, ppc_index); + attrib_offset_in_chunk += + nvgpu_safe_mult_u32(attrib_size_in_chunk, + pes_tpc_count); nvgpu_gr_ctx_patch_write(g, gr_ctx, - gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + - ppc_in_gpc_stride * ppc_index, + nvgpu_safe_add_u32( + gr_gpc0_ppc0_cbm_alpha_cb_size_r(), + sum_temp_pcc), cbm_cfg_size_alpha, patch); nvgpu_gr_ctx_patch_write(g, gr_ctx, - gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + - ppc_in_gpc_stride * ppc_index, + nvgpu_safe_add_u32( + gr_gpc0_ppc0_cbm_alpha_cb_offset_r(), + sum_temp_pcc), alpha_offset_in_chunk, patch); - alpha_offset_in_chunk += alpha_cb_size * - nvgpu_gr_config_get_pes_tpc_count(config, - gpc_index, ppc_index); + alpha_offset_in_chunk += + nvgpu_safe_mult_u32(alpha_cb_size, + pes_tpc_count); nvgpu_gr_ctx_patch_write(g, gr_ctx, - gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2), + gr_gpcs_swdx_tc_beta_cb_size_r( + nvgpu_safe_add_u32(ppc_index, temp2)), gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size_steadystate), patch); } @@ -530,21 +563,25 @@ void gp10b_gr_init_commit_global_cb_manager(struct gk20a *g, u32 gp10b_gr_init_get_ctx_spill_size(struct gk20a *g) { - return gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() * - gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); + return nvgpu_safe_mult_u32( + gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v(), + gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v()); } u32 gp10b_gr_init_get_ctx_pagepool_size(struct gk20a *g) { - return g->ops.gr.init.pagepool_default_size(g) * - gr_scc_pagepool_total_pages_byte_granularity_v(); + return nvgpu_safe_mult_u32( + g->ops.gr.init.pagepool_default_size(g), + gr_scc_pagepool_total_pages_byte_granularity_v()); } u32 gp10b_gr_init_get_ctx_betacb_size(struct gk20a *g) { - return g->ops.gr.init.get_attrib_cb_default_size(g) + - (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - - gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); + return nvgpu_safe_add_u32( + g->ops.gr.init.get_attrib_cb_default_size(g), + nvgpu_safe_sub_u32( + gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(), + gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v())); } u32 gp10b_gr_init_get_ctx_attrib_cb_size(struct gk20a *g, u32 betacb_size, @@ -553,8 +590,11 @@ u32 gp10b_gr_init_get_ctx_attrib_cb_size(struct gk20a *g, u32 betacb_size, u32 alpha_cb_size = g->ops.gr.init.get_alpha_cb_size(g, tpc_count); u32 size; - size = (betacb_size + alpha_cb_size) * - gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * max_tpc; + size = nvgpu_safe_mult_u32( + nvgpu_safe_add_u32(betacb_size, alpha_cb_size), + nvgpu_safe_mult_u32( + gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v(), + max_tpc)); return ALIGN(size, 128); } diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv100.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv100.c index 7913988b2..9349d82c5 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv100.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv100.c @@ -21,6 +21,7 @@ */ #include +#include #include #include "gr_init_gv100.h" @@ -64,13 +65,16 @@ u32 gv100_gr_init_get_attrib_cb_gfxp_size(struct gk20a *g) u32 gv100_gr_init_get_ctx_spill_size(struct gk20a *g) { - return gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() * - gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); + return nvgpu_safe_mult_u32( + gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v(), + gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v()); } u32 gv100_gr_init_get_ctx_betacb_size(struct gk20a *g) { - return g->ops.gr.init.get_attrib_cb_default_size(g) + - (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - - gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); + return nvgpu_safe_add_u32( + g->ops.gr.init.get_attrib_cb_default_size(g), + nvgpu_safe_sub_u32( + gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(), + gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v())); } diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c index ed39e2b86..1703bf78a 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c @@ -73,16 +73,21 @@ static int gr_gv11b_ecc_scrub_is_done(struct gk20a *g, } for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr_config); gpc++) { - gpc_offset = gpc_stride * gpc; + gpc_offset = nvgpu_safe_mult_u32(gpc_stride, gpc); for (tpc = 0; tpc < nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc); tpc++) { - tpc_offset = tpc_in_gpc_stride * tpc; + tpc_offset = nvgpu_safe_mult_u32(tpc_in_gpc_stride, + tpc); do { val = nvgpu_readl(g, - gpc_offset + tpc_offset + scrub_reg); + nvgpu_safe_add_u32( + nvgpu_safe_add_u32( + gpc_offset, + tpc_offset), + scrub_reg)); if ((val & scrub_mask) == scrub_done) { break; } @@ -287,7 +292,7 @@ u32 gv11b_gr_init_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc, tpc_new += nvgpu_gr_config_get_pes_tpc_count(gr_config, gpc, pes); } - temp = (BIT32(tpc) - 1U) & + temp = nvgpu_safe_sub_u32(BIT32(tpc), 1U) & nvgpu_gr_config_get_pes_tpc_mask(gr_config, gpc, pes); temp = (u32)hweight32(temp); tpc_new += temp; @@ -375,9 +380,10 @@ void gv11b_gr_init_sm_id_numbering(struct gk20a *g, u32 gpc, u32 tpc, u32 smid, u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); - u32 gpc_offset = gpc_stride * gpc; + u32 gpc_offset = nvgpu_safe_mult_u32(gpc_stride, gpc); u32 global_tpc_index; u32 tpc_offset; + u32 offset_sum = 0U; struct nvgpu_sm_info *sm_info = nvgpu_gr_config_get_sm_info(gr_config, smid); @@ -385,13 +391,19 @@ void gv11b_gr_init_sm_id_numbering(struct gk20a *g, u32 gpc, u32 tpc, u32 smid, nvgpu_gr_config_get_sm_info_global_tpc_index(sm_info); tpc = g->ops.gr.init.get_nonpes_aware_tpc(g, gpc, tpc, gr_config); - tpc_offset = tpc_in_gpc_stride * tpc; + tpc_offset = nvgpu_safe_mult_u32(tpc_in_gpc_stride, tpc); - nvgpu_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, - gr_gpc0_tpc0_sm_cfg_tpc_id_f(global_tpc_index)); - nvgpu_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset, - gr_gpc0_gpm_pd_sm_id_id_f(global_tpc_index)); - nvgpu_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, + offset_sum = nvgpu_safe_add_u32(gpc_offset, tpc_offset); + nvgpu_writel(g, + nvgpu_safe_add_u32(gr_gpc0_tpc0_sm_cfg_r(), offset_sum), + gr_gpc0_tpc0_sm_cfg_tpc_id_f(global_tpc_index)); + nvgpu_writel(g, + nvgpu_safe_add_u32( + gr_gpc0_gpm_pd_sm_id_r(tpc), gpc_offset), + gr_gpc0_gpm_pd_sm_id_id_f(global_tpc_index)); + nvgpu_writel(g, + nvgpu_safe_add_u32( + gr_gpc0_tpc0_pe_cfg_smid_r(), offset_sum), gr_gpc0_tpc0_pe_cfg_smid_value_f(global_tpc_index)); } @@ -403,22 +415,26 @@ int gv11b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id, u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); u32 no_of_sm = g->ops.gr.init.get_no_of_sm(g); + u32 tpc_cnt = nvgpu_safe_sub_u32( + nvgpu_gr_config_get_tpc_count(gr_config), 1U); /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ for (i = 0U; - i <= ((nvgpu_gr_config_get_tpc_count(gr_config) - 1U) / 4U); + i <= (tpc_cnt / 4U); i++) { u32 reg = 0; - u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() + - gr_cwd_gpc_tpc_id_tpc0_s(); + u32 bit_stride = nvgpu_safe_add_u32( + gr_cwd_gpc_tpc_id_gpc0_s(), + gr_cwd_gpc_tpc_id_tpc0_s()); for (j = 0U; j < 4U; j++) { u32 sm_id; u32 bits; struct nvgpu_sm_info *sm_info; + u32 index = 0U; tpc_id = (i << 2) + j; - sm_id = tpc_id * sm_per_tpc; + sm_id = nvgpu_safe_mult_u32(tpc_id, sm_per_tpc); if (sm_id >= no_of_sm) { break; @@ -432,10 +448,15 @@ int gv11b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id, bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) | gr_cwd_gpc_tpc_id_tpc0_f(tpc_index); - reg |= bits << (j * bit_stride); + reg |= bits << nvgpu_safe_mult_u32(j, bit_stride); - tpc_sm_id[gpc_index + (num_gpcs * ((tpc_index & 4U) - >> 2U))] |= tpc_id << tpc_index * bit_stride; + index = nvgpu_safe_mult_u32(num_gpcs, + ((tpc_index & 4U) >> 2U)); + index = nvgpu_safe_add_u32(gpc_index, index); + tpc_sm_id[index] |= (tpc_id << + nvgpu_safe_mult_u32( + tpc_index, + bit_stride)); } nvgpu_writel(g, gr_cwd_gpc_tpc_id_r(i), reg); } @@ -462,7 +483,7 @@ void gv11b_gr_init_rop_mapping(struct gk20a *g, u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); - u32 num_tpcs = num_gpcs * num_tpc_per_gpc; + u32 num_tpcs = nvgpu_safe_mult_u32(num_gpcs, num_tpc_per_gpc); nvgpu_log_fn(g, " "); @@ -510,7 +531,7 @@ void gv11b_gr_init_rop_mapping(struct gk20a *g, nvgpu_writel(g, gr_ppcs_wwdx_map_gpc_map_r(mapreg_num), map); nvgpu_writel(g, gr_rstr2d_gpc_map_r(mapreg_num), map); - base = base + GR_TPCS_INFO_FOR_MAPREGISTER; + base = nvgpu_safe_add_u32(base, GR_TPCS_INFO_FOR_MAPREGISTER); } nvgpu_writel(g, gr_ppcs_wwdx_map_table_cfg_r(), @@ -523,14 +544,14 @@ void gv11b_gr_init_rop_mapping(struct gk20a *g, tpc_cnt = nvgpu_gr_config_get_tpc_count(gr_config); nvgpu_writel(g, gr_ppcs_wwdx_map_table_cfg_coeff_r(i), gr_ppcs_wwdx_map_table_cfg_coeff_0_mod_value_f( - (BIT32(j) % tpc_cnt)) | + (BIT32(j) % tpc_cnt)) | gr_ppcs_wwdx_map_table_cfg_coeff_1_mod_value_f( - (BIT32(j + 1U) % tpc_cnt)) | + (BIT32(nvgpu_safe_add_u32(j, 1U)) % tpc_cnt)) | gr_ppcs_wwdx_map_table_cfg_coeff_2_mod_value_f( - (BIT32(j + 2U) % tpc_cnt)) | + (BIT32(nvgpu_safe_add_u32(j, 2U)) % tpc_cnt)) | gr_ppcs_wwdx_map_table_cfg_coeff_3_mod_value_f( - (BIT32(j + 3U) % tpc_cnt))); - j = j + 4U; + (BIT32(nvgpu_safe_add_u32(j, 3U)) % tpc_cnt))); + j = nvgpu_safe_add_u32(j, 4U); } nvgpu_writel(g, gr_rstr2d_map_table_cfg_r(), @@ -713,11 +734,17 @@ u32 gv11b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count, { u32 size; - size = g->ops.gr.init.get_attrib_cb_size(g, tpc_count) * - gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * max_tpc; + size = nvgpu_safe_mult_u32( + g->ops.gr.init.get_attrib_cb_size(g, tpc_count), + nvgpu_safe_mult_u32( + gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v(), + max_tpc)); - size += g->ops.gr.init.get_alpha_cb_size(g, tpc_count) * - gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * max_tpc; + size += nvgpu_safe_mult_u32( + g->ops.gr.init.get_alpha_cb_size(g, tpc_count), + nvgpu_safe_mult_u32( + gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v(), + max_tpc)); size = ALIGN(size, 128); @@ -737,8 +764,8 @@ void gv11b_gr_init_commit_global_attrib_cb(struct gk20a *g, addr = addr >> gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v(); if (nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx)->gpu_va != 0ULL) { - attrBufferSize = - U32(nvgpu_gr_ctx_get_betacb_ctxsw_buffer(gr_ctx)->size); + attrBufferSize = nvgpu_safe_cast_u64_to_u32( + nvgpu_gr_ctx_get_betacb_ctxsw_buffer(gr_ctx)->size); } else { attrBufferSize = g->ops.gr.init.get_global_attr_cb_size(g, tpc_count, max_tpc); @@ -823,15 +850,18 @@ int gv11b_gr_init_load_sw_veid_bundle(struct gk20a *g, u32 gv11b_gr_init_get_ctx_spill_size(struct gk20a *g) { - return gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() * - gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); + return nvgpu_safe_mult_u32( + gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v(), + gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v()); } u32 gv11b_gr_init_get_ctx_betacb_size(struct gk20a *g) { - return g->ops.gr.init.get_attrib_cb_default_size(g) + - (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - - gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); + return nvgpu_safe_add_u32( + g->ops.gr.init.get_attrib_cb_default_size(g), + nvgpu_safe_sub_u32( + gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(), + gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v())); } void gv11b_gr_init_commit_ctxsw_spill(struct gk20a *g, @@ -927,7 +957,7 @@ u32 gv11b_gr_init_get_patch_slots(struct gk20a *g, /* * Increase the size to accommodate for additional TPC partition update */ - size += 2U * slot_size; + size += nvgpu_safe_mult_u32(2U, slot_size); return size; } diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.c index 3d56cdb75..f69b13a3c 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -33,9 +34,11 @@ u32 tu104_gr_init_get_rtv_cb_size(struct gk20a *g) { - return (gr_scc_rm_rtv_cb_size_div_256b_default_f() + - gr_scc_rm_rtv_cb_size_div_256b_db_adder_f()) * - gr_scc_bundle_cb_size_div_256b_byte_granularity_v(); + return nvgpu_safe_mult_u32( + nvgpu_safe_add_u32( + gr_scc_rm_rtv_cb_size_div_256b_default_f(), + gr_scc_rm_rtv_cb_size_div_256b_db_adder_f()), + gr_scc_bundle_cb_size_div_256b_byte_granularity_v()); } static void tu104_gr_init_patch_rtv_cb(struct gk20a *g, @@ -56,7 +59,8 @@ static void tu104_gr_init_patch_rtv_cb(struct gk20a *g, void tu104_gr_init_commit_rtv_cb(struct gk20a *g, u64 addr, struct nvgpu_gr_ctx *gr_ctx, bool patch) { - u32 size = (gr_scc_rm_rtv_cb_size_div_256b_default_f() + + u32 size = nvgpu_safe_add_u32( + gr_scc_rm_rtv_cb_size_div_256b_default_f(), gr_scc_rm_rtv_cb_size_div_256b_db_adder_f()); addr = addr >> gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f(); @@ -75,9 +79,10 @@ void tu104_gr_init_commit_gfxp_rtv_cb(struct gk20a *g, nvgpu_log_fn(g, " "); - rtv_cb_size = - (gr_scc_rm_rtv_cb_size_div_256b_default_f() + - gr_scc_rm_rtv_cb_size_div_256b_db_adder_f() + + rtv_cb_size = nvgpu_safe_add_u32( + nvgpu_safe_add_u32( + gr_scc_rm_rtv_cb_size_div_256b_default_f(), + gr_scc_rm_rtv_cb_size_div_256b_db_adder_f()), gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f()); gfxp_addr_size = gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f(); @@ -168,22 +173,28 @@ int tu104_gr_init_load_sw_bundle64(struct gk20a *g, u32 tu104_gr_init_get_ctx_spill_size(struct gk20a *g) { - return gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() * - gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); + return nvgpu_safe_mult_u32( + gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v(), + gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v()); } u32 tu104_gr_init_get_ctx_betacb_size(struct gk20a *g) { - return g->ops.gr.init.get_attrib_cb_default_size(g) + - (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - - gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); + return nvgpu_safe_add_u32( + g->ops.gr.init.get_attrib_cb_default_size(g), + nvgpu_safe_sub_u32( + gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(), + gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v())); } u32 tu104_gr_init_get_gfxp_rtv_cb_size(struct gk20a *g) { - return (gr_scc_rm_rtv_cb_size_div_256b_default_f() + - gr_scc_rm_rtv_cb_size_div_256b_db_adder_f() + - gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f()) * - gr_scc_rm_rtv_cb_size_div_256b_byte_granularity_v(); + return nvgpu_safe_mult_u32( + nvgpu_safe_add_u32( + nvgpu_safe_add_u32( + gr_scc_rm_rtv_cb_size_div_256b_default_f(), + gr_scc_rm_rtv_cb_size_div_256b_db_adder_f()), + gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f()), + gr_scc_rm_rtv_cb_size_div_256b_byte_granularity_v()); }