mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: Fix CERT INT30-C errors in hal.gr.init unit
Add fixes for CERT INT30-C errors in hal.gr.init unit cert_violation: Unsigned integer operation may wrap. Use safe_ops macros to perform addition, subtraction, multiplication and u64 to u32 casting Jira NVGPU-3411 Change-Id: Ib7d472c7a5903263ab50c2769387f2a68bf7c695 Signed-off-by: Vinod G <vinodg@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2122289 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com> Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
1f85c3190b
commit
d652c16fa3
@@ -179,15 +179,21 @@ void gm20b_gr_init_sm_id_numbering(struct gk20a *g, u32 gpc, u32 tpc, u32 smid,
|
||||
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
||||
u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
|
||||
GPU_LIT_TPC_IN_GPC_STRIDE);
|
||||
u32 gpc_offset = gpc_stride * gpc;
|
||||
u32 tpc_offset = tpc_in_gpc_stride * tpc;
|
||||
u32 gpc_offset = nvgpu_safe_mult_u32(gpc_stride, gpc);
|
||||
u32 tpc_offset = nvgpu_safe_mult_u32(tpc_in_gpc_stride, tpc);
|
||||
u32 offset_sum = nvgpu_safe_add_u32(gpc_offset, tpc_offset);
|
||||
|
||||
nvgpu_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset,
|
||||
gr_gpc0_tpc0_sm_cfg_sm_id_f(smid));
|
||||
nvgpu_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset,
|
||||
gr_gpc0_gpm_pd_sm_id_id_f(smid));
|
||||
nvgpu_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset,
|
||||
gr_gpc0_tpc0_pe_cfg_smid_value_f(smid));
|
||||
nvgpu_writel(g,
|
||||
nvgpu_safe_add_u32(gr_gpc0_tpc0_sm_cfg_r(), offset_sum),
|
||||
gr_gpc0_tpc0_sm_cfg_sm_id_f(smid));
|
||||
nvgpu_writel(g,
|
||||
nvgpu_safe_add_u32(gr_gpc0_gpm_pd_sm_id_r(tpc),
|
||||
gpc_offset),
|
||||
gr_gpc0_gpm_pd_sm_id_id_f(smid));
|
||||
nvgpu_writel(g,
|
||||
nvgpu_safe_add_u32(gr_gpc0_tpc0_pe_cfg_smid_r(),
|
||||
offset_sum),
|
||||
gr_gpc0_tpc0_pe_cfg_smid_value_f(smid));
|
||||
}
|
||||
|
||||
u32 gm20b_gr_init_get_sm_id_size(void)
|
||||
@@ -200,17 +206,21 @@ int gm20b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id,
|
||||
{
|
||||
u32 i, j;
|
||||
u32 tpc_index, gpc_index;
|
||||
u32 tpc_cnt = nvgpu_safe_sub_u32(
|
||||
nvgpu_gr_config_get_tpc_count(gr_config), 1U);
|
||||
|
||||
/* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/
|
||||
for (i = 0U;
|
||||
i <= ((nvgpu_gr_config_get_tpc_count(gr_config) - 1U) / 4U);
|
||||
i <= (tpc_cnt / 4U);
|
||||
i++) {
|
||||
u32 reg = 0;
|
||||
u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() +
|
||||
gr_cwd_gpc_tpc_id_tpc0_s();
|
||||
u32 bit_stride = nvgpu_safe_add_u32(
|
||||
gr_cwd_gpc_tpc_id_gpc0_s(),
|
||||
gr_cwd_gpc_tpc_id_tpc0_s());
|
||||
|
||||
for (j = 0U; j < 4U; j++) {
|
||||
u32 sm_id = (i * 4U) + j;
|
||||
u32 sm_id = nvgpu_safe_add_u32(
|
||||
nvgpu_safe_mult_u32(i, 4U), j);
|
||||
u32 bits;
|
||||
struct nvgpu_sm_info *sm_info;
|
||||
|
||||
@@ -227,10 +237,11 @@ int gm20b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id,
|
||||
|
||||
bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) |
|
||||
gr_cwd_gpc_tpc_id_tpc0_f(tpc_index);
|
||||
reg |= bits << (j * bit_stride);
|
||||
reg |= bits << nvgpu_safe_mult_u32(j, bit_stride);
|
||||
|
||||
tpc_sm_id[gpc_index] |=
|
||||
(sm_id << tpc_index * bit_stride);
|
||||
(sm_id <<
|
||||
nvgpu_safe_mult_u32(tpc_index, bit_stride));
|
||||
}
|
||||
nvgpu_writel(g, gr_cwd_gpc_tpc_id_r(i), reg);
|
||||
}
|
||||
@@ -555,7 +566,7 @@ int gm20b_gr_init_wait_idle(struct gk20a *g)
|
||||
return 0;
|
||||
}
|
||||
|
||||
nvgpu_usleep_range(delay, delay * 2U);
|
||||
nvgpu_usleep_range(delay, nvgpu_safe_mult_u32(delay, 2U));
|
||||
delay = min_t(u32, delay << 1, POLL_DELAY_MAX_US);
|
||||
|
||||
} while (nvgpu_timeout_expired(&timeout) == 0);
|
||||
@@ -593,7 +604,7 @@ int gm20b_gr_init_wait_fe_idle(struct gk20a *g)
|
||||
return 0;
|
||||
}
|
||||
|
||||
nvgpu_usleep_range(delay, delay * 2U);
|
||||
nvgpu_usleep_range(delay, nvgpu_safe_mult_u32(delay, 2U));
|
||||
delay = min_t(u32, delay << 1, POLL_DELAY_MAX_US);
|
||||
} while (nvgpu_timeout_expired(&timeout) == 0);
|
||||
|
||||
@@ -831,14 +842,16 @@ u32 gm20b_gr_init_get_alpha_cb_default_size(struct gk20a *g)
|
||||
|
||||
u32 gm20b_gr_init_get_attrib_cb_size(struct gk20a *g, u32 tpc_count)
|
||||
{
|
||||
return g->ops.gr.init.get_attrib_cb_default_size(g)
|
||||
+ (g->ops.gr.init.get_attrib_cb_default_size(g) >> 1);
|
||||
return nvgpu_safe_add_u32(
|
||||
g->ops.gr.init.get_attrib_cb_default_size(g),
|
||||
(g->ops.gr.init.get_attrib_cb_default_size(g) >> 1));
|
||||
}
|
||||
|
||||
u32 gm20b_gr_init_get_alpha_cb_size(struct gk20a *g, u32 tpc_count)
|
||||
{
|
||||
return g->ops.gr.init.get_alpha_cb_default_size(g)
|
||||
+ (g->ops.gr.init.get_alpha_cb_default_size(g) >> 1);
|
||||
return nvgpu_safe_add_u32(
|
||||
g->ops.gr.init.get_alpha_cb_default_size(g),
|
||||
(g->ops.gr.init.get_alpha_cb_default_size(g) >> 1));
|
||||
}
|
||||
|
||||
u32 gm20b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count,
|
||||
@@ -846,25 +859,33 @@ u32 gm20b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count,
|
||||
{
|
||||
u32 size;
|
||||
|
||||
size = g->ops.gr.init.get_attrib_cb_size(g, tpc_count) *
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * max_tpc;
|
||||
size = nvgpu_safe_mult_u32(
|
||||
g->ops.gr.init.get_attrib_cb_size(g, tpc_count),
|
||||
nvgpu_safe_mult_u32(
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v(),
|
||||
max_tpc));
|
||||
|
||||
size += g->ops.gr.init.get_alpha_cb_size(g, tpc_count) *
|
||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * max_tpc;
|
||||
size += nvgpu_safe_mult_u32(
|
||||
g->ops.gr.init.get_alpha_cb_size(g, tpc_count),
|
||||
nvgpu_safe_mult_u32(
|
||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v(),
|
||||
max_tpc));
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
u32 gm20b_gr_init_get_global_ctx_cb_buffer_size(struct gk20a *g)
|
||||
{
|
||||
return g->ops.gr.init.get_bundle_cb_default_size(g) *
|
||||
gr_scc_bundle_cb_size_div_256b_byte_granularity_v();
|
||||
return nvgpu_safe_mult_u32(
|
||||
g->ops.gr.init.get_bundle_cb_default_size(g),
|
||||
gr_scc_bundle_cb_size_div_256b_byte_granularity_v());
|
||||
}
|
||||
|
||||
u32 gm20b_gr_init_get_global_ctx_pagepool_buffer_size(struct gk20a *g)
|
||||
{
|
||||
return g->ops.gr.init.pagepool_default_size(g) *
|
||||
gr_scc_pagepool_total_pages_byte_granularity_v();
|
||||
return nvgpu_safe_mult_u32(
|
||||
g->ops.gr.init.pagepool_default_size(g),
|
||||
gr_scc_pagepool_total_pages_byte_granularity_v());
|
||||
}
|
||||
|
||||
void gm20b_gr_init_commit_global_bundle_cb(struct gk20a *g,
|
||||
@@ -896,7 +917,8 @@ void gm20b_gr_init_commit_global_bundle_cb(struct gk20a *g,
|
||||
gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch);
|
||||
|
||||
/* data for state_limit */
|
||||
data = (g->ops.gr.init.get_bundle_cb_default_size(g) *
|
||||
data = nvgpu_safe_mult_u32(
|
||||
g->ops.gr.init.get_bundle_cb_default_size(g),
|
||||
gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
|
||||
gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
|
||||
|
||||
@@ -937,7 +959,7 @@ void gm20b_gr_init_commit_global_pagepool(struct gk20a *g,
|
||||
addr, size);
|
||||
|
||||
pp_addr = (u32)addr;
|
||||
pp_size = (u32)size;
|
||||
pp_size = nvgpu_safe_cast_u64_to_u32(size);
|
||||
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_base_r(),
|
||||
gr_scc_pagepool_base_addr_39_8_f(pp_addr), patch);
|
||||
|
||||
@@ -1011,7 +1033,7 @@ void gm20b_gr_init_commit_global_cb_manager(struct gk20a *g,
|
||||
gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_default_size),
|
||||
patch);
|
||||
|
||||
pd_ab_max_output = (alpha_cb_default_size *
|
||||
pd_ab_max_output = nvgpu_safe_mult_u32(alpha_cb_default_size,
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) /
|
||||
gr_pd_ab_dist_cfg1_max_output_granularity_v();
|
||||
|
||||
@@ -1019,55 +1041,70 @@ void gm20b_gr_init_commit_global_cb_manager(struct gk20a *g,
|
||||
gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
|
||||
gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
|
||||
|
||||
alpha_offset_in_chunk = attrib_offset_in_chunk +
|
||||
nvgpu_gr_config_get_tpc_count(config) * attrib_cb_size;
|
||||
alpha_offset_in_chunk =
|
||||
nvgpu_safe_add_u32(attrib_offset_in_chunk,
|
||||
nvgpu_safe_mult_u32(
|
||||
nvgpu_gr_config_get_tpc_count(config),
|
||||
attrib_cb_size));
|
||||
|
||||
for (gpc_index = 0;
|
||||
gpc_index < nvgpu_gr_config_get_gpc_count(config);
|
||||
gpc_index++) {
|
||||
u32 temp = gpc_stride * gpc_index;
|
||||
u32 temp2 = num_pes_per_gpc * gpc_index;
|
||||
u32 temp = nvgpu_safe_mult_u32(gpc_stride, gpc_index);
|
||||
u32 temp2 = nvgpu_safe_mult_u32(num_pes_per_gpc, gpc_index);
|
||||
for (ppc_index = 0;
|
||||
ppc_index < nvgpu_gr_config_get_gpc_ppc_count(config,
|
||||
gpc_index);
|
||||
ppc_index++) {
|
||||
cbm_cfg_size1 = attrib_cb_default_size *
|
||||
nvgpu_gr_config_get_pes_tpc_count(config,
|
||||
gpc_index, ppc_index);
|
||||
cbm_cfg_size2 = alpha_cb_default_size *
|
||||
u32 pes_tpc_count =
|
||||
nvgpu_gr_config_get_pes_tpc_count(config,
|
||||
gpc_index, ppc_index);
|
||||
u32 ppc_posn = nvgpu_safe_mult_u32(ppc_in_gpc_stride,
|
||||
ppc_index);
|
||||
u32 sum_temp_pcc = nvgpu_safe_add_u32(temp, ppc_posn);
|
||||
|
||||
cbm_cfg_size1 =
|
||||
nvgpu_safe_mult_u32(attrib_cb_default_size,
|
||||
pes_tpc_count);
|
||||
cbm_cfg_size2 =
|
||||
nvgpu_safe_mult_u32(alpha_cb_default_size,
|
||||
pes_tpc_count);
|
||||
|
||||
nvgpu_gr_ctx_patch_write(g, gr_ctx,
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
|
||||
ppc_in_gpc_stride * ppc_index,
|
||||
nvgpu_safe_add_u32(
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_r(),
|
||||
sum_temp_pcc),
|
||||
cbm_cfg_size1, patch);
|
||||
|
||||
nvgpu_gr_ctx_patch_write(g, gr_ctx,
|
||||
gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp +
|
||||
ppc_in_gpc_stride * ppc_index,
|
||||
nvgpu_safe_add_u32(
|
||||
gr_gpc0_ppc0_cbm_beta_cb_offset_r(),
|
||||
sum_temp_pcc),
|
||||
attrib_offset_in_chunk, patch);
|
||||
|
||||
attrib_offset_in_chunk += attrib_cb_size *
|
||||
nvgpu_gr_config_get_pes_tpc_count(config,
|
||||
gpc_index, ppc_index);
|
||||
attrib_offset_in_chunk +=
|
||||
nvgpu_safe_mult_u32(attrib_cb_size,
|
||||
pes_tpc_count);
|
||||
|
||||
nvgpu_gr_ctx_patch_write(g, gr_ctx,
|
||||
gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
|
||||
ppc_in_gpc_stride * ppc_index,
|
||||
nvgpu_safe_add_u32(
|
||||
gr_gpc0_ppc0_cbm_alpha_cb_size_r(),
|
||||
sum_temp_pcc),
|
||||
cbm_cfg_size2, patch);
|
||||
|
||||
nvgpu_gr_ctx_patch_write(g, gr_ctx,
|
||||
gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp +
|
||||
ppc_in_gpc_stride * ppc_index,
|
||||
nvgpu_safe_add_u32(
|
||||
gr_gpc0_ppc0_cbm_alpha_cb_offset_r(),
|
||||
sum_temp_pcc),
|
||||
alpha_offset_in_chunk, patch);
|
||||
|
||||
alpha_offset_in_chunk += alpha_cb_size *
|
||||
nvgpu_gr_config_get_pes_tpc_count(config,
|
||||
gpc_index, ppc_index);
|
||||
alpha_offset_in_chunk +=
|
||||
nvgpu_safe_mult_u32(alpha_cb_size,
|
||||
pes_tpc_count);
|
||||
|
||||
nvgpu_gr_ctx_patch_write(g, gr_ctx,
|
||||
gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2),
|
||||
gr_gpcs_swdx_tc_beta_cb_size_r(
|
||||
nvgpu_safe_add_u32(ppc_index, temp2)),
|
||||
gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size1) |
|
||||
gr_gpcs_swdx_tc_beta_cb_size_div3_f(cbm_cfg_size1/3U),
|
||||
patch);
|
||||
|
||||
@@ -91,18 +91,20 @@ int gp10b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id,
|
||||
u32 i, j;
|
||||
u32 tpc_index, gpc_index;
|
||||
u32 max_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
|
||||
u32 tpc_cnt = nvgpu_safe_sub_u32(
|
||||
nvgpu_gr_config_get_tpc_count(gr_config), 1U);
|
||||
|
||||
/* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/
|
||||
for (i = 0U;
|
||||
i <= ((nvgpu_gr_config_get_tpc_count(gr_config) - 1U) / 4U);
|
||||
i++) {
|
||||
for (i = 0U; i <= (tpc_cnt / 4U); i++) {
|
||||
u32 reg = 0;
|
||||
u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() +
|
||||
gr_cwd_gpc_tpc_id_tpc0_s();
|
||||
u32 bit_stride = nvgpu_safe_add_u32(
|
||||
gr_cwd_gpc_tpc_id_gpc0_s(),
|
||||
gr_cwd_gpc_tpc_id_tpc0_s());
|
||||
|
||||
for (j = 0U; j < 4U; j++) {
|
||||
u32 sm_id = (i * 4U) + j;
|
||||
u32 sm_id = nvgpu_safe_mult_u32(i, 4U) + j;
|
||||
u32 bits;
|
||||
u32 index = 0U;
|
||||
struct nvgpu_sm_info *sm_info;
|
||||
|
||||
if (sm_id >=
|
||||
@@ -118,10 +120,15 @@ int gp10b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id,
|
||||
|
||||
bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) |
|
||||
gr_cwd_gpc_tpc_id_tpc0_f(tpc_index);
|
||||
reg |= bits << (j * bit_stride);
|
||||
reg |= bits << nvgpu_safe_mult_u32(j, bit_stride);
|
||||
|
||||
tpc_sm_id[gpc_index + max_gpcs * ((tpc_index & 4U) >> 2U)]
|
||||
|= sm_id << (bit_stride * (tpc_index & 3U));
|
||||
index = nvgpu_safe_mult_u32(max_gpcs,
|
||||
((tpc_index & 4U) >> 2U));
|
||||
index = nvgpu_safe_add_u32(gpc_index, index);
|
||||
tpc_sm_id[index]
|
||||
|= (sm_id <<
|
||||
nvgpu_safe_mult_u32(bit_stride,
|
||||
(tpc_index & 3U)));
|
||||
}
|
||||
nvgpu_writel(g, gr_cwd_gpc_tpc_id_r(i), reg);
|
||||
}
|
||||
@@ -189,7 +196,7 @@ int gp10b_gr_init_wait_empty(struct gk20a *g)
|
||||
return 0;
|
||||
}
|
||||
|
||||
nvgpu_usleep_range(delay, delay * 2U);
|
||||
nvgpu_usleep_range(delay, nvgpu_safe_mult_u32(delay, 2U));
|
||||
delay = min_t(u32, delay << 1, POLL_DELAY_MAX_US);
|
||||
} while (nvgpu_timeout_expired(&timeout) == 0);
|
||||
|
||||
@@ -255,16 +262,20 @@ u32 gp10b_gr_init_get_alpha_cb_default_size(struct gk20a *g)
|
||||
|
||||
u32 gp10b_gr_init_get_attrib_cb_gfxp_default_size(struct gk20a *g)
|
||||
{
|
||||
return g->ops.gr.init.get_attrib_cb_default_size(g) +
|
||||
(gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
||||
return nvgpu_safe_add_u32(
|
||||
g->ops.gr.init.get_attrib_cb_default_size(g),
|
||||
nvgpu_safe_sub_u32(
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(),
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()));
|
||||
}
|
||||
|
||||
u32 gp10b_gr_init_get_attrib_cb_gfxp_size(struct gk20a *g)
|
||||
{
|
||||
return g->ops.gr.init.get_attrib_cb_default_size(g) +
|
||||
(gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
||||
return nvgpu_safe_add_u32(
|
||||
g->ops.gr.init.get_attrib_cb_default_size(g),
|
||||
nvgpu_safe_sub_u32(
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(),
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()));
|
||||
}
|
||||
|
||||
u32 gp10b_gr_init_get_attrib_cb_size(struct gk20a *g, u32 tpc_count)
|
||||
@@ -284,11 +295,17 @@ u32 gp10b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count,
|
||||
{
|
||||
u32 size;
|
||||
|
||||
size = g->ops.gr.init.get_attrib_cb_size(g, tpc_count) *
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * max_tpc;
|
||||
size = nvgpu_safe_mult_u32(
|
||||
g->ops.gr.init.get_attrib_cb_size(g, tpc_count),
|
||||
nvgpu_safe_mult_u32(
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v(),
|
||||
max_tpc));
|
||||
|
||||
size += g->ops.gr.init.get_alpha_cb_size(g, tpc_count) *
|
||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * max_tpc;
|
||||
size += nvgpu_safe_mult_u32(
|
||||
g->ops.gr.init.get_alpha_cb_size(g, tpc_count),
|
||||
nvgpu_safe_mult_u32(
|
||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v(),
|
||||
max_tpc));
|
||||
|
||||
size = ALIGN(size, 128);
|
||||
|
||||
@@ -326,8 +343,9 @@ void gp10b_gr_init_commit_global_bundle_cb(struct gk20a *g,
|
||||
gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch);
|
||||
|
||||
/* data for state_limit */
|
||||
data = (g->ops.gr.init.get_bundle_cb_default_size(g) *
|
||||
gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
|
||||
data = nvgpu_safe_mult_u32(
|
||||
g->ops.gr.init.get_bundle_cb_default_size(g),
|
||||
gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
|
||||
gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
|
||||
|
||||
data = min_t(u32, data, g->ops.gr.init.get_min_gpm_fifo_depth(g));
|
||||
@@ -367,7 +385,7 @@ void gp10b_gr_init_commit_global_pagepool(struct gk20a *g,
|
||||
addr, size);
|
||||
|
||||
pp_addr = (u32)addr;
|
||||
pp_size = (u32)size;
|
||||
pp_size = nvgpu_safe_cast_u64_to_u32(size);
|
||||
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_base_r(),
|
||||
gr_scc_pagepool_base_addr_39_8_f(pp_addr), patch);
|
||||
|
||||
@@ -395,8 +413,8 @@ void gp10b_gr_init_commit_global_attrib_cb(struct gk20a *g,
|
||||
addr = addr >> gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v();
|
||||
|
||||
if (nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx)->gpu_va != 0ULL) {
|
||||
attrBufferSize =
|
||||
U32(nvgpu_gr_ctx_get_betacb_ctxsw_buffer(gr_ctx)->size);
|
||||
attrBufferSize = nvgpu_safe_cast_u64_to_u32(
|
||||
nvgpu_gr_ctx_get_betacb_ctxsw_buffer(gr_ctx)->size);
|
||||
} else {
|
||||
attrBufferSize = g->ops.gr.init.get_global_attr_cb_size(g,
|
||||
tpc_count, max_tpc);
|
||||
@@ -457,7 +475,7 @@ void gp10b_gr_init_commit_global_cb_manager(struct gk20a *g,
|
||||
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_alpha_r(),
|
||||
alpha_cb_default_size, patch);
|
||||
|
||||
pd_ab_max_output = (alpha_cb_default_size *
|
||||
pd_ab_max_output = nvgpu_safe_mult_u32(alpha_cb_default_size,
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) /
|
||||
gr_pd_ab_dist_cfg1_max_output_granularity_v();
|
||||
|
||||
@@ -465,63 +483,78 @@ void gp10b_gr_init_commit_global_cb_manager(struct gk20a *g,
|
||||
gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
|
||||
gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
|
||||
|
||||
attrib_offset_in_chunk = alpha_offset_in_chunk +
|
||||
nvgpu_gr_config_get_tpc_count(config) * alpha_cb_size;
|
||||
attrib_offset_in_chunk = nvgpu_safe_add_u32(alpha_offset_in_chunk,
|
||||
nvgpu_safe_mult_u32(
|
||||
nvgpu_gr_config_get_tpc_count(config),
|
||||
alpha_cb_size));
|
||||
|
||||
for (gpc_index = 0;
|
||||
gpc_index < nvgpu_gr_config_get_gpc_count(config);
|
||||
gpc_index++) {
|
||||
temp = gpc_stride * gpc_index;
|
||||
temp2 = num_pes_per_gpc * gpc_index;
|
||||
temp = nvgpu_safe_mult_u32(gpc_stride, gpc_index);
|
||||
temp2 = nvgpu_safe_mult_u32(num_pes_per_gpc, gpc_index);
|
||||
for (ppc_index = 0;
|
||||
ppc_index < nvgpu_gr_config_get_gpc_ppc_count(config, gpc_index);
|
||||
ppc_index++) {
|
||||
cbm_cfg_size_beta = cb_attrib_cache_size_init *
|
||||
u32 pes_tpc_count =
|
||||
nvgpu_gr_config_get_pes_tpc_count(config,
|
||||
gpc_index, ppc_index);
|
||||
cbm_cfg_size_alpha = alpha_cb_default_size *
|
||||
nvgpu_gr_config_get_pes_tpc_count(config,
|
||||
gpc_index, ppc_index);
|
||||
cbm_cfg_size_steadystate = attrib_cb_default_size *
|
||||
nvgpu_gr_config_get_pes_tpc_count(config,
|
||||
gpc_index, ppc_index);
|
||||
gpc_index, ppc_index);
|
||||
u32 ppc_posn = nvgpu_safe_mult_u32(ppc_in_gpc_stride,
|
||||
ppc_index);
|
||||
u32 sum_temp_pcc = nvgpu_safe_add_u32(temp, ppc_posn);
|
||||
|
||||
cbm_cfg_size_beta =
|
||||
nvgpu_safe_mult_u32(
|
||||
cb_attrib_cache_size_init,
|
||||
pes_tpc_count);
|
||||
cbm_cfg_size_alpha =
|
||||
nvgpu_safe_mult_u32(alpha_cb_default_size,
|
||||
pes_tpc_count);
|
||||
cbm_cfg_size_steadystate =
|
||||
nvgpu_safe_mult_u32(attrib_cb_default_size,
|
||||
pes_tpc_count);
|
||||
|
||||
nvgpu_gr_ctx_patch_write(g, gr_ctx,
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
|
||||
ppc_in_gpc_stride * ppc_index,
|
||||
nvgpu_safe_add_u32(
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_r(),
|
||||
sum_temp_pcc),
|
||||
cbm_cfg_size_beta, patch);
|
||||
|
||||
nvgpu_gr_ctx_patch_write(g, gr_ctx,
|
||||
gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp +
|
||||
ppc_in_gpc_stride * ppc_index,
|
||||
nvgpu_safe_add_u32(
|
||||
gr_gpc0_ppc0_cbm_beta_cb_offset_r(),
|
||||
sum_temp_pcc),
|
||||
attrib_offset_in_chunk, patch);
|
||||
|
||||
nvgpu_gr_ctx_patch_write(g, gr_ctx,
|
||||
gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp +
|
||||
ppc_in_gpc_stride * ppc_index,
|
||||
cbm_cfg_size_steadystate,
|
||||
patch);
|
||||
nvgpu_safe_add_u32(
|
||||
gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r(),
|
||||
sum_temp_pcc),
|
||||
cbm_cfg_size_steadystate, patch);
|
||||
|
||||
attrib_offset_in_chunk += attrib_size_in_chunk *
|
||||
nvgpu_gr_config_get_pes_tpc_count(config,
|
||||
gpc_index, ppc_index);
|
||||
attrib_offset_in_chunk +=
|
||||
nvgpu_safe_mult_u32(attrib_size_in_chunk,
|
||||
pes_tpc_count);
|
||||
|
||||
nvgpu_gr_ctx_patch_write(g, gr_ctx,
|
||||
gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
|
||||
ppc_in_gpc_stride * ppc_index,
|
||||
nvgpu_safe_add_u32(
|
||||
gr_gpc0_ppc0_cbm_alpha_cb_size_r(),
|
||||
sum_temp_pcc),
|
||||
cbm_cfg_size_alpha, patch);
|
||||
|
||||
nvgpu_gr_ctx_patch_write(g, gr_ctx,
|
||||
gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp +
|
||||
ppc_in_gpc_stride * ppc_index,
|
||||
nvgpu_safe_add_u32(
|
||||
gr_gpc0_ppc0_cbm_alpha_cb_offset_r(),
|
||||
sum_temp_pcc),
|
||||
alpha_offset_in_chunk, patch);
|
||||
|
||||
alpha_offset_in_chunk += alpha_cb_size *
|
||||
nvgpu_gr_config_get_pes_tpc_count(config,
|
||||
gpc_index, ppc_index);
|
||||
alpha_offset_in_chunk +=
|
||||
nvgpu_safe_mult_u32(alpha_cb_size,
|
||||
pes_tpc_count);
|
||||
|
||||
nvgpu_gr_ctx_patch_write(g, gr_ctx,
|
||||
gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2),
|
||||
gr_gpcs_swdx_tc_beta_cb_size_r(
|
||||
nvgpu_safe_add_u32(ppc_index, temp2)),
|
||||
gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size_steadystate),
|
||||
patch);
|
||||
}
|
||||
@@ -530,21 +563,25 @@ void gp10b_gr_init_commit_global_cb_manager(struct gk20a *g,
|
||||
|
||||
u32 gp10b_gr_init_get_ctx_spill_size(struct gk20a *g)
|
||||
{
|
||||
return gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
|
||||
gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
|
||||
return nvgpu_safe_mult_u32(
|
||||
gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v(),
|
||||
gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v());
|
||||
}
|
||||
|
||||
u32 gp10b_gr_init_get_ctx_pagepool_size(struct gk20a *g)
|
||||
{
|
||||
return g->ops.gr.init.pagepool_default_size(g) *
|
||||
gr_scc_pagepool_total_pages_byte_granularity_v();
|
||||
return nvgpu_safe_mult_u32(
|
||||
g->ops.gr.init.pagepool_default_size(g),
|
||||
gr_scc_pagepool_total_pages_byte_granularity_v());
|
||||
}
|
||||
|
||||
u32 gp10b_gr_init_get_ctx_betacb_size(struct gk20a *g)
|
||||
{
|
||||
return g->ops.gr.init.get_attrib_cb_default_size(g) +
|
||||
(gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
||||
return nvgpu_safe_add_u32(
|
||||
g->ops.gr.init.get_attrib_cb_default_size(g),
|
||||
nvgpu_safe_sub_u32(
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(),
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()));
|
||||
}
|
||||
|
||||
u32 gp10b_gr_init_get_ctx_attrib_cb_size(struct gk20a *g, u32 betacb_size,
|
||||
@@ -553,8 +590,11 @@ u32 gp10b_gr_init_get_ctx_attrib_cb_size(struct gk20a *g, u32 betacb_size,
|
||||
u32 alpha_cb_size = g->ops.gr.init.get_alpha_cb_size(g, tpc_count);
|
||||
u32 size;
|
||||
|
||||
size = (betacb_size + alpha_cb_size) *
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * max_tpc;
|
||||
size = nvgpu_safe_mult_u32(
|
||||
nvgpu_safe_add_u32(betacb_size, alpha_cb_size),
|
||||
nvgpu_safe_mult_u32(
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v(),
|
||||
max_tpc));
|
||||
|
||||
return ALIGN(size, 128);
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/safe_ops.h>
|
||||
#include <nvgpu/gr/ctx.h>
|
||||
|
||||
#include "gr_init_gv100.h"
|
||||
@@ -64,13 +65,16 @@ u32 gv100_gr_init_get_attrib_cb_gfxp_size(struct gk20a *g)
|
||||
|
||||
u32 gv100_gr_init_get_ctx_spill_size(struct gk20a *g)
|
||||
{
|
||||
return gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
|
||||
gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
|
||||
return nvgpu_safe_mult_u32(
|
||||
gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v(),
|
||||
gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v());
|
||||
}
|
||||
|
||||
u32 gv100_gr_init_get_ctx_betacb_size(struct gk20a *g)
|
||||
{
|
||||
return g->ops.gr.init.get_attrib_cb_default_size(g) +
|
||||
(gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
||||
return nvgpu_safe_add_u32(
|
||||
g->ops.gr.init.get_attrib_cb_default_size(g),
|
||||
nvgpu_safe_sub_u32(
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(),
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()));
|
||||
}
|
||||
|
||||
@@ -73,16 +73,21 @@ static int gr_gv11b_ecc_scrub_is_done(struct gk20a *g,
|
||||
}
|
||||
|
||||
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr_config); gpc++) {
|
||||
gpc_offset = gpc_stride * gpc;
|
||||
gpc_offset = nvgpu_safe_mult_u32(gpc_stride, gpc);
|
||||
|
||||
for (tpc = 0;
|
||||
tpc < nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc);
|
||||
tpc++) {
|
||||
tpc_offset = tpc_in_gpc_stride * tpc;
|
||||
tpc_offset = nvgpu_safe_mult_u32(tpc_in_gpc_stride,
|
||||
tpc);
|
||||
|
||||
do {
|
||||
val = nvgpu_readl(g,
|
||||
gpc_offset + tpc_offset + scrub_reg);
|
||||
nvgpu_safe_add_u32(
|
||||
nvgpu_safe_add_u32(
|
||||
gpc_offset,
|
||||
tpc_offset),
|
||||
scrub_reg));
|
||||
if ((val & scrub_mask) == scrub_done) {
|
||||
break;
|
||||
}
|
||||
@@ -287,7 +292,7 @@ u32 gv11b_gr_init_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
tpc_new += nvgpu_gr_config_get_pes_tpc_count(gr_config,
|
||||
gpc, pes);
|
||||
}
|
||||
temp = (BIT32(tpc) - 1U) &
|
||||
temp = nvgpu_safe_sub_u32(BIT32(tpc), 1U) &
|
||||
nvgpu_gr_config_get_pes_tpc_mask(gr_config, gpc, pes);
|
||||
temp = (u32)hweight32(temp);
|
||||
tpc_new += temp;
|
||||
@@ -375,9 +380,10 @@ void gv11b_gr_init_sm_id_numbering(struct gk20a *g, u32 gpc, u32 tpc, u32 smid,
|
||||
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
||||
u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
|
||||
GPU_LIT_TPC_IN_GPC_STRIDE);
|
||||
u32 gpc_offset = gpc_stride * gpc;
|
||||
u32 gpc_offset = nvgpu_safe_mult_u32(gpc_stride, gpc);
|
||||
u32 global_tpc_index;
|
||||
u32 tpc_offset;
|
||||
u32 offset_sum = 0U;
|
||||
struct nvgpu_sm_info *sm_info =
|
||||
nvgpu_gr_config_get_sm_info(gr_config, smid);
|
||||
|
||||
@@ -385,13 +391,19 @@ void gv11b_gr_init_sm_id_numbering(struct gk20a *g, u32 gpc, u32 tpc, u32 smid,
|
||||
nvgpu_gr_config_get_sm_info_global_tpc_index(sm_info);
|
||||
|
||||
tpc = g->ops.gr.init.get_nonpes_aware_tpc(g, gpc, tpc, gr_config);
|
||||
tpc_offset = tpc_in_gpc_stride * tpc;
|
||||
tpc_offset = nvgpu_safe_mult_u32(tpc_in_gpc_stride, tpc);
|
||||
|
||||
nvgpu_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset,
|
||||
gr_gpc0_tpc0_sm_cfg_tpc_id_f(global_tpc_index));
|
||||
nvgpu_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset,
|
||||
gr_gpc0_gpm_pd_sm_id_id_f(global_tpc_index));
|
||||
nvgpu_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset,
|
||||
offset_sum = nvgpu_safe_add_u32(gpc_offset, tpc_offset);
|
||||
nvgpu_writel(g,
|
||||
nvgpu_safe_add_u32(gr_gpc0_tpc0_sm_cfg_r(), offset_sum),
|
||||
gr_gpc0_tpc0_sm_cfg_tpc_id_f(global_tpc_index));
|
||||
nvgpu_writel(g,
|
||||
nvgpu_safe_add_u32(
|
||||
gr_gpc0_gpm_pd_sm_id_r(tpc), gpc_offset),
|
||||
gr_gpc0_gpm_pd_sm_id_id_f(global_tpc_index));
|
||||
nvgpu_writel(g,
|
||||
nvgpu_safe_add_u32(
|
||||
gr_gpc0_tpc0_pe_cfg_smid_r(), offset_sum),
|
||||
gr_gpc0_tpc0_pe_cfg_smid_value_f(global_tpc_index));
|
||||
}
|
||||
|
||||
@@ -403,22 +415,26 @@ int gv11b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id,
|
||||
u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
|
||||
u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
|
||||
u32 no_of_sm = g->ops.gr.init.get_no_of_sm(g);
|
||||
u32 tpc_cnt = nvgpu_safe_sub_u32(
|
||||
nvgpu_gr_config_get_tpc_count(gr_config), 1U);
|
||||
|
||||
/* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/
|
||||
for (i = 0U;
|
||||
i <= ((nvgpu_gr_config_get_tpc_count(gr_config) - 1U) / 4U);
|
||||
i <= (tpc_cnt / 4U);
|
||||
i++) {
|
||||
u32 reg = 0;
|
||||
u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() +
|
||||
gr_cwd_gpc_tpc_id_tpc0_s();
|
||||
u32 bit_stride = nvgpu_safe_add_u32(
|
||||
gr_cwd_gpc_tpc_id_gpc0_s(),
|
||||
gr_cwd_gpc_tpc_id_tpc0_s());
|
||||
|
||||
for (j = 0U; j < 4U; j++) {
|
||||
u32 sm_id;
|
||||
u32 bits;
|
||||
struct nvgpu_sm_info *sm_info;
|
||||
u32 index = 0U;
|
||||
|
||||
tpc_id = (i << 2) + j;
|
||||
sm_id = tpc_id * sm_per_tpc;
|
||||
sm_id = nvgpu_safe_mult_u32(tpc_id, sm_per_tpc);
|
||||
|
||||
if (sm_id >= no_of_sm) {
|
||||
break;
|
||||
@@ -432,10 +448,15 @@ int gv11b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id,
|
||||
|
||||
bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) |
|
||||
gr_cwd_gpc_tpc_id_tpc0_f(tpc_index);
|
||||
reg |= bits << (j * bit_stride);
|
||||
reg |= bits << nvgpu_safe_mult_u32(j, bit_stride);
|
||||
|
||||
tpc_sm_id[gpc_index + (num_gpcs * ((tpc_index & 4U)
|
||||
>> 2U))] |= tpc_id << tpc_index * bit_stride;
|
||||
index = nvgpu_safe_mult_u32(num_gpcs,
|
||||
((tpc_index & 4U) >> 2U));
|
||||
index = nvgpu_safe_add_u32(gpc_index, index);
|
||||
tpc_sm_id[index] |= (tpc_id <<
|
||||
nvgpu_safe_mult_u32(
|
||||
tpc_index,
|
||||
bit_stride));
|
||||
}
|
||||
nvgpu_writel(g, gr_cwd_gpc_tpc_id_r(i), reg);
|
||||
}
|
||||
@@ -462,7 +483,7 @@ void gv11b_gr_init_rop_mapping(struct gk20a *g,
|
||||
u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
|
||||
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
|
||||
GPU_LIT_NUM_TPC_PER_GPC);
|
||||
u32 num_tpcs = num_gpcs * num_tpc_per_gpc;
|
||||
u32 num_tpcs = nvgpu_safe_mult_u32(num_gpcs, num_tpc_per_gpc);
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
@@ -510,7 +531,7 @@ void gv11b_gr_init_rop_mapping(struct gk20a *g,
|
||||
nvgpu_writel(g, gr_ppcs_wwdx_map_gpc_map_r(mapreg_num), map);
|
||||
nvgpu_writel(g, gr_rstr2d_gpc_map_r(mapreg_num), map);
|
||||
|
||||
base = base + GR_TPCS_INFO_FOR_MAPREGISTER;
|
||||
base = nvgpu_safe_add_u32(base, GR_TPCS_INFO_FOR_MAPREGISTER);
|
||||
}
|
||||
|
||||
nvgpu_writel(g, gr_ppcs_wwdx_map_table_cfg_r(),
|
||||
@@ -523,14 +544,14 @@ void gv11b_gr_init_rop_mapping(struct gk20a *g,
|
||||
tpc_cnt = nvgpu_gr_config_get_tpc_count(gr_config);
|
||||
nvgpu_writel(g, gr_ppcs_wwdx_map_table_cfg_coeff_r(i),
|
||||
gr_ppcs_wwdx_map_table_cfg_coeff_0_mod_value_f(
|
||||
(BIT32(j) % tpc_cnt)) |
|
||||
(BIT32(j) % tpc_cnt)) |
|
||||
gr_ppcs_wwdx_map_table_cfg_coeff_1_mod_value_f(
|
||||
(BIT32(j + 1U) % tpc_cnt)) |
|
||||
(BIT32(nvgpu_safe_add_u32(j, 1U)) % tpc_cnt)) |
|
||||
gr_ppcs_wwdx_map_table_cfg_coeff_2_mod_value_f(
|
||||
(BIT32(j + 2U) % tpc_cnt)) |
|
||||
(BIT32(nvgpu_safe_add_u32(j, 2U)) % tpc_cnt)) |
|
||||
gr_ppcs_wwdx_map_table_cfg_coeff_3_mod_value_f(
|
||||
(BIT32(j + 3U) % tpc_cnt)));
|
||||
j = j + 4U;
|
||||
(BIT32(nvgpu_safe_add_u32(j, 3U)) % tpc_cnt)));
|
||||
j = nvgpu_safe_add_u32(j, 4U);
|
||||
}
|
||||
|
||||
nvgpu_writel(g, gr_rstr2d_map_table_cfg_r(),
|
||||
@@ -713,11 +734,17 @@ u32 gv11b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count,
|
||||
{
|
||||
u32 size;
|
||||
|
||||
size = g->ops.gr.init.get_attrib_cb_size(g, tpc_count) *
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * max_tpc;
|
||||
size = nvgpu_safe_mult_u32(
|
||||
g->ops.gr.init.get_attrib_cb_size(g, tpc_count),
|
||||
nvgpu_safe_mult_u32(
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v(),
|
||||
max_tpc));
|
||||
|
||||
size += g->ops.gr.init.get_alpha_cb_size(g, tpc_count) *
|
||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * max_tpc;
|
||||
size += nvgpu_safe_mult_u32(
|
||||
g->ops.gr.init.get_alpha_cb_size(g, tpc_count),
|
||||
nvgpu_safe_mult_u32(
|
||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v(),
|
||||
max_tpc));
|
||||
|
||||
size = ALIGN(size, 128);
|
||||
|
||||
@@ -737,8 +764,8 @@ void gv11b_gr_init_commit_global_attrib_cb(struct gk20a *g,
|
||||
addr = addr >> gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v();
|
||||
|
||||
if (nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx)->gpu_va != 0ULL) {
|
||||
attrBufferSize =
|
||||
U32(nvgpu_gr_ctx_get_betacb_ctxsw_buffer(gr_ctx)->size);
|
||||
attrBufferSize = nvgpu_safe_cast_u64_to_u32(
|
||||
nvgpu_gr_ctx_get_betacb_ctxsw_buffer(gr_ctx)->size);
|
||||
} else {
|
||||
attrBufferSize = g->ops.gr.init.get_global_attr_cb_size(g,
|
||||
tpc_count, max_tpc);
|
||||
@@ -823,15 +850,18 @@ int gv11b_gr_init_load_sw_veid_bundle(struct gk20a *g,
|
||||
|
||||
u32 gv11b_gr_init_get_ctx_spill_size(struct gk20a *g)
|
||||
{
|
||||
return gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
|
||||
gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
|
||||
return nvgpu_safe_mult_u32(
|
||||
gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v(),
|
||||
gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v());
|
||||
}
|
||||
|
||||
u32 gv11b_gr_init_get_ctx_betacb_size(struct gk20a *g)
|
||||
{
|
||||
return g->ops.gr.init.get_attrib_cb_default_size(g) +
|
||||
(gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
||||
return nvgpu_safe_add_u32(
|
||||
g->ops.gr.init.get_attrib_cb_default_size(g),
|
||||
nvgpu_safe_sub_u32(
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(),
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()));
|
||||
}
|
||||
|
||||
void gv11b_gr_init_commit_ctxsw_spill(struct gk20a *g,
|
||||
@@ -927,7 +957,7 @@ u32 gv11b_gr_init_get_patch_slots(struct gk20a *g,
|
||||
/*
|
||||
* Increase the size to accommodate for additional TPC partition update
|
||||
*/
|
||||
size += 2U * slot_size;
|
||||
size += nvgpu_safe_mult_u32(2U, slot_size);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/soc.h>
|
||||
#include <nvgpu/io.h>
|
||||
#include <nvgpu/safe_ops.h>
|
||||
#include <nvgpu/netlist.h>
|
||||
#include <nvgpu/gr/ctx.h>
|
||||
|
||||
@@ -33,9 +34,11 @@
|
||||
|
||||
u32 tu104_gr_init_get_rtv_cb_size(struct gk20a *g)
|
||||
{
|
||||
return (gr_scc_rm_rtv_cb_size_div_256b_default_f() +
|
||||
gr_scc_rm_rtv_cb_size_div_256b_db_adder_f()) *
|
||||
gr_scc_bundle_cb_size_div_256b_byte_granularity_v();
|
||||
return nvgpu_safe_mult_u32(
|
||||
nvgpu_safe_add_u32(
|
||||
gr_scc_rm_rtv_cb_size_div_256b_default_f(),
|
||||
gr_scc_rm_rtv_cb_size_div_256b_db_adder_f()),
|
||||
gr_scc_bundle_cb_size_div_256b_byte_granularity_v());
|
||||
}
|
||||
|
||||
static void tu104_gr_init_patch_rtv_cb(struct gk20a *g,
|
||||
@@ -56,7 +59,8 @@ static void tu104_gr_init_patch_rtv_cb(struct gk20a *g,
|
||||
void tu104_gr_init_commit_rtv_cb(struct gk20a *g, u64 addr,
|
||||
struct nvgpu_gr_ctx *gr_ctx, bool patch)
|
||||
{
|
||||
u32 size = (gr_scc_rm_rtv_cb_size_div_256b_default_f() +
|
||||
u32 size = nvgpu_safe_add_u32(
|
||||
gr_scc_rm_rtv_cb_size_div_256b_default_f(),
|
||||
gr_scc_rm_rtv_cb_size_div_256b_db_adder_f());
|
||||
|
||||
addr = addr >> gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f();
|
||||
@@ -75,9 +79,10 @@ void tu104_gr_init_commit_gfxp_rtv_cb(struct gk20a *g,
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
rtv_cb_size =
|
||||
(gr_scc_rm_rtv_cb_size_div_256b_default_f() +
|
||||
gr_scc_rm_rtv_cb_size_div_256b_db_adder_f() +
|
||||
rtv_cb_size = nvgpu_safe_add_u32(
|
||||
nvgpu_safe_add_u32(
|
||||
gr_scc_rm_rtv_cb_size_div_256b_default_f(),
|
||||
gr_scc_rm_rtv_cb_size_div_256b_db_adder_f()),
|
||||
gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f());
|
||||
gfxp_addr_size = gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f();
|
||||
|
||||
@@ -168,22 +173,28 @@ int tu104_gr_init_load_sw_bundle64(struct gk20a *g,
|
||||
|
||||
u32 tu104_gr_init_get_ctx_spill_size(struct gk20a *g)
|
||||
{
|
||||
return gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
|
||||
gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
|
||||
return nvgpu_safe_mult_u32(
|
||||
gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v(),
|
||||
gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v());
|
||||
}
|
||||
|
||||
u32 tu104_gr_init_get_ctx_betacb_size(struct gk20a *g)
|
||||
{
|
||||
return g->ops.gr.init.get_attrib_cb_default_size(g) +
|
||||
(gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
||||
return nvgpu_safe_add_u32(
|
||||
g->ops.gr.init.get_attrib_cb_default_size(g),
|
||||
nvgpu_safe_sub_u32(
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(),
|
||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()));
|
||||
}
|
||||
|
||||
u32 tu104_gr_init_get_gfxp_rtv_cb_size(struct gk20a *g)
|
||||
{
|
||||
return (gr_scc_rm_rtv_cb_size_div_256b_default_f() +
|
||||
gr_scc_rm_rtv_cb_size_div_256b_db_adder_f() +
|
||||
gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f()) *
|
||||
gr_scc_rm_rtv_cb_size_div_256b_byte_granularity_v();
|
||||
return nvgpu_safe_mult_u32(
|
||||
nvgpu_safe_add_u32(
|
||||
nvgpu_safe_add_u32(
|
||||
gr_scc_rm_rtv_cb_size_div_256b_default_f(),
|
||||
gr_scc_rm_rtv_cb_size_div_256b_db_adder_f()),
|
||||
gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f()),
|
||||
gr_scc_rm_rtv_cb_size_div_256b_byte_granularity_v());
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user