diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 6155887af..a3afc6c02 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -168,6 +168,7 @@ nvgpu-y += \ hal/gr/fecs_trace/fecs_trace_gv11b.o \ hal/gr/init/gr_init_gm20b.o \ hal/gr/init/gr_init_gp10b.o \ + hal/gr/init/gr_init_gv100.o \ hal/gr/init/gr_init_gv11b.o \ hal/gr/init/gr_init_tu104.o \ hal/gr/hwpm_map/hwpm_map_gv100.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index f8773981d..e34d7927c 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -322,6 +322,7 @@ srcs += common/sim.c \ hal/gr/fecs_trace/fecs_trace_gv11b.c \ hal/gr/init/gr_init_gm20b.c \ hal/gr/init/gr_init_gp10b.c \ + hal/gr/init/gr_init_gv100.c \ hal/gr/init/gr_init_gv11b.c \ hal/gr/init/gr_init_tu104.c \ hal/gr/hwpm_map/hwpm_map_gv100.c \ diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c index e1b31e16b..99212341f 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c @@ -33,6 +33,8 @@ #include "hal/fb/fb_gm20b.h" #include "hal/fb/fb_gp10b.h" #include "hal/gr/fecs_trace/fecs_trace_gm20b.h" +#include "hal/gr/init/gr_init_gm20b.h" +#include "hal/gr/init/gr_init_gp10b.h" #include "common/netlist/netlist_gp10b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h" @@ -112,10 +114,6 @@ static const struct gpu_ops vgpu_gp10b_ops = { .gr = { .get_patch_slots = gr_gk20a_get_patch_slots, .init_gpc_mmu = NULL, - .bundle_cb_defaults = gr_gm20b_bundle_cb_defaults, - .cb_size_default = gr_gp10b_cb_size_default, - .calc_global_ctx_buffer_size = - gr_gp10b_calc_global_ctx_buffer_size, .commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb, .commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb, .commit_global_cb_manager = gr_gp10b_commit_global_cb_manager, @@ -326,7 +324,27 @@ static const struct gpu_ops vgpu_gp10b_ops = { #endif /* CONFIG_GK20A_CTXSW_TRACE */ .init = { .fs_state = vgpu_gr_init_fs_state, - } + .get_bundle_cb_default_size = + gm20b_gr_init_get_bundle_cb_default_size, + .get_min_gpm_fifo_depth = + gm20b_gr_init_get_min_gpm_fifo_depth, + .get_bundle_cb_token_limit = + gm20b_gr_init_get_bundle_cb_token_limit, + .get_attrib_cb_default_size = + gp10b_gr_init_get_attrib_cb_default_size, + .get_alpha_cb_default_size = + gp10b_gr_init_get_alpha_cb_default_size, + .get_attrib_cb_gfxp_default_size = + gp10b_gr_init_get_attrib_cb_gfxp_default_size, + .get_attrib_cb_gfxp_size = + gp10b_gr_init_get_attrib_cb_gfxp_size, + .get_attrib_cb_size = + gp10b_gr_init_get_attrib_cb_size, + .get_alpha_cb_size = + gp10b_gr_init_get_alpha_cb_size, + .get_global_attr_cb_size = + gp10b_gr_init_get_global_attr_cb_size, + }, }, .perf = { .get_pmm_per_chiplet_offset = diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c index cce121bf3..6b20e675b 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c @@ -165,7 +165,9 @@ int vgpu_gr_alloc_global_ctx_buffers(struct gk20a *g) nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer, NVGPU_GR_GLOBAL_CTX_PAGEPOOL, size); - size = g->ops.gr.calc_global_ctx_buffer_size(g); + size = g->ops.gr.init.get_global_attr_cb_size(g, + nvgpu_gr_config_get_tpc_count(g->gr.config), + nvgpu_gr_config_get_max_tpc_count(g->gr.config)); nvgpu_log_info(g, "attr_buffer_size : %u", size); nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer, @@ -416,9 +418,6 @@ static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) } } - g->ops.gr.bundle_cb_defaults(g); - g->ops.gr.cb_size_default(g); - g->ops.gr.calc_global_ctx_buffer_size(g); err = g->ops.gr.init.fs_state(g); if (err) { goto cleanup; diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c index 9214e9e85..6c6c1f464 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c @@ -40,6 +40,7 @@ #include "hal/fb/fb_gm20b.h" #include "hal/fb/fb_gp10b.h" #include "hal/fb/fb_gv11b.h" +#include "hal/gr/init/gr_init_gv11b.h" #include "common/netlist/netlist_gv11b.h" #include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h" @@ -131,10 +132,6 @@ static const struct gpu_ops vgpu_gv11b_ops = { }, .gr = { .init_gpc_mmu = NULL, - .bundle_cb_defaults = gr_gv11b_bundle_cb_defaults, - .cb_size_default = gr_gv11b_cb_size_default, - .calc_global_ctx_buffer_size = - gr_gv11b_calc_global_ctx_buffer_size, .commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb, .commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb, .commit_global_cb_manager = gr_gp10b_commit_global_cb_manager, @@ -375,7 +372,27 @@ static const struct gpu_ops vgpu_gv11b_ops = { #endif /* CONFIG_GK20A_CTXSW_TRACE */ .init = { .fs_state = vgpu_gr_init_fs_state, - } + .get_bundle_cb_default_size = + gv11b_gr_init_get_bundle_cb_default_size, + .get_min_gpm_fifo_depth = + gv11b_gr_init_get_min_gpm_fifo_depth, + .get_bundle_cb_token_limit = + gv11b_gr_init_get_bundle_cb_token_limit, + .get_attrib_cb_default_size = + gv11b_gr_init_get_attrib_cb_default_size, + .get_alpha_cb_default_size = + gv11b_gr_init_get_alpha_cb_default_size, + .get_attrib_cb_gfxp_default_size = + gv11b_gr_init_get_attrib_cb_gfxp_default_size, + .get_attrib_cb_gfxp_size = + gv11b_gr_init_get_attrib_cb_gfxp_size, + .get_attrib_cb_size = + gv11b_gr_init_get_attrib_cb_size, + .get_alpha_cb_size = + gv11b_gr_init_get_alpha_cb_size, + .get_global_attr_cb_size = + gv11b_gr_init_get_global_attr_cb_size, + }, }, .perf = { .get_pmm_per_chiplet_offset = diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 272f637cc..a73230fdc 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -709,7 +709,7 @@ int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, NVGPU_GR_CTX_CIRCULAR_VA) >> U64(gr_scc_bundle_cb_base_addr_39_8_align_bits_v()); - size = gr->bundle_cb_default_size; + size = g->ops.gr.init.get_bundle_cb_default_size(g); nvgpu_log_info(g, "bundle cb addr : 0x%016llx, size : %d", addr, size); @@ -1676,7 +1676,9 @@ int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g) nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer, NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR, size); - size = g->ops.gr.calc_global_ctx_buffer_size(g); + size = g->ops.gr.init.get_global_attr_cb_size(g, + nvgpu_gr_config_get_tpc_count(g->gr.config), + nvgpu_gr_config_get_max_tpc_count(g->gr.config)); nvgpu_log_info(g, "attr_buffer_size : %u", size); nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer, @@ -1993,19 +1995,22 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) nvgpu_log_info(g, "fbps: %d", gr->num_fbps); nvgpu_log_info(g, "max_fbps_count: %d", gr->max_fbps_count); - g->ops.gr.bundle_cb_defaults(g); - g->ops.gr.cb_size_default(g); - g->ops.gr.calc_global_ctx_buffer_size(g); - nvgpu_log_info(g, "bundle_cb_default_size: %d", - gr->bundle_cb_default_size); - nvgpu_log_info(g, "min_gpm_fifo_depth: %d", gr->min_gpm_fifo_depth); - nvgpu_log_info(g, "bundle_cb_token_limit: %d", gr->bundle_cb_token_limit); + g->ops.gr.init.get_bundle_cb_default_size(g)); + nvgpu_log_info(g, "min_gpm_fifo_depth: %d", + g->ops.gr.init.get_min_gpm_fifo_depth(g)); + nvgpu_log_info(g, "bundle_cb_token_limit: %d", + g->ops.gr.init.get_bundle_cb_token_limit(g)); nvgpu_log_info(g, "attrib_cb_default_size: %d", - gr->attrib_cb_default_size); - nvgpu_log_info(g, "attrib_cb_size: %d", gr->attrib_cb_size); - nvgpu_log_info(g, "alpha_cb_default_size: %d", gr->alpha_cb_default_size); - nvgpu_log_info(g, "alpha_cb_size: %d", gr->alpha_cb_size); + g->ops.gr.init.get_attrib_cb_default_size(g)); + nvgpu_log_info(g, "attrib_cb_size: %d", + g->ops.gr.init.get_attrib_cb_size(g, + nvgpu_gr_config_get_tpc_count(gr->config))); + nvgpu_log_info(g, "alpha_cb_default_size: %d", + g->ops.gr.init.get_alpha_cb_default_size(g)); + nvgpu_log_info(g, "alpha_cb_size: %d", + g->ops.gr.init.get_alpha_cb_size(g, + nvgpu_gr_config_get_tpc_count(gr->config))); return 0; @@ -6267,9 +6272,7 @@ u32 gr_gk20a_gpccs_falcon_base_addr(void) u32 gk20a_gr_get_global_ctx_cb_buffer_size(struct gk20a *g) { - struct gr_gk20a *gr = &g->gr; - - return gr->bundle_cb_default_size * + return g->ops.gr.init.get_bundle_cb_default_size(g) * gr_scc_bundle_cb_size_div_256b_byte_granularity_v(); } diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index eaf6cd954..72db5676e 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -201,16 +201,6 @@ struct gr_gk20a { u32 num_fbps; u32 max_fbps_count; - u32 bundle_cb_default_size; - u32 min_gpm_fifo_depth; - u32 bundle_cb_token_limit; - u32 attrib_cb_default_size; - u32 attrib_cb_size; - u32 attrib_cb_gfxp_default_size; - u32 attrib_cb_gfxp_size; - u32 alpha_cb_default_size; - u32 alpha_cb_size; - u32 gfxp_wfi_timeout_count; bool gfxp_wfi_timeout_unit_usec; diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index d56392e62..dfa86e6ef 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -76,51 +76,6 @@ void gr_gm20b_init_gpc_mmu(struct gk20a *g) gk20a_writel(g, gr_gpcs_mmu_num_active_ltcs_r(), g->ltc_count); } -void gr_gm20b_bundle_cb_defaults(struct gk20a *g) -{ - struct gr_gk20a *gr = &g->gr; - - gr->bundle_cb_default_size = - gr_scc_bundle_cb_size_div_256b__prod_v(); - gr->min_gpm_fifo_depth = - gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(); - gr->bundle_cb_token_limit = - gr_pd_ab_dist_cfg2_token_limit_init_v(); -} - -void gr_gm20b_cb_size_default(struct gk20a *g) -{ - struct gr_gk20a *gr = &g->gr; - - if (gr->attrib_cb_default_size == 0U) { - gr->attrib_cb_default_size = - gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(); - } - gr->alpha_cb_default_size = - gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); -} - -u32 gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g) -{ - struct gr_gk20a *gr = &g->gr; - u32 size; - - gr->attrib_cb_size = gr->attrib_cb_default_size - + (gr->attrib_cb_default_size >> 1); - gr->alpha_cb_size = gr->alpha_cb_default_size - + (gr->alpha_cb_default_size >> 1); - - size = gr->attrib_cb_size * - gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * - nvgpu_gr_config_get_max_tpc_count(gr->config); - - size += gr->alpha_cb_size * - gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * - nvgpu_gr_config_get_max_tpc_count(gr->config); - - return size; -} - void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, struct nvgpu_gr_ctx *ch_ctx, u64 addr, bool patch) @@ -143,6 +98,7 @@ void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, u64 addr, u64 size, bool patch) { u32 data; + u32 bundle_cb_token_limit = g->ops.gr.init.get_bundle_cb_token_limit(g); nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(), gr_scc_bundle_cb_base_addr_39_8_f(addr), patch); @@ -159,17 +115,17 @@ void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch); /* data for state_limit */ - data = (g->gr.bundle_cb_default_size * + data = (g->ops.gr.init.get_bundle_cb_default_size(g) * gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) / gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v(); - data = min_t(u32, data, g->gr.min_gpm_fifo_depth); + data = min_t(u32, data, g->ops.gr.init.get_min_gpm_fifo_depth(g)); nvgpu_log_info(g, "bundle cb token limit : %d, state limit : %d", - g->gr.bundle_cb_token_limit, data); + bundle_cb_token_limit, data); nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(), - gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) | + gr_pd_ab_dist_cfg2_token_limit_f(bundle_cb_token_limit) | gr_pd_ab_dist_cfg2_state_limit_f(data), patch); } @@ -183,6 +139,12 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g, u32 pd_ab_max_output; u32 gpc_index, ppc_index; u32 cbm_cfg_size1, cbm_cfg_size2; + u32 attrib_cb_default_size = g->ops.gr.init.get_attrib_cb_default_size(g); + u32 alpha_cb_default_size = g->ops.gr.init.get_alpha_cb_default_size(g); + u32 attrib_cb_size = g->ops.gr.init.get_attrib_cb_size(g, + nvgpu_gr_config_get_tpc_count(gr->config)); + u32 alpha_cb_size = g->ops.gr.init.get_alpha_cb_size(g, + nvgpu_gr_config_get_tpc_count(gr->config)); u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); u32 num_pes_per_gpc = nvgpu_get_litter_value(g, @@ -191,11 +153,11 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g, nvgpu_log_fn(g, " "); nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_r(), - gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) | - gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size), + gr_ds_tga_constraintlogic_beta_cbsize_f(attrib_cb_default_size) | + gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_default_size), patch); - pd_ab_max_output = (gr->alpha_cb_default_size * + pd_ab_max_output = (alpha_cb_default_size * gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) / gr_pd_ab_dist_cfg1_max_output_granularity_v(); @@ -204,7 +166,7 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g, gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); alpha_offset_in_chunk = attrib_offset_in_chunk + - nvgpu_gr_config_get_tpc_count(gr->config) * gr->attrib_cb_size; + nvgpu_gr_config_get_tpc_count(gr->config) * attrib_cb_size; for (gpc_index = 0; gpc_index < nvgpu_gr_config_get_gpc_count(gr->config); @@ -215,10 +177,10 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g, ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index); ppc_index++) { - cbm_cfg_size1 = gr->attrib_cb_default_size * + cbm_cfg_size1 = attrib_cb_default_size * nvgpu_gr_config_get_pes_tpc_count(gr->config, gpc_index, ppc_index); - cbm_cfg_size2 = gr->alpha_cb_default_size * + cbm_cfg_size2 = alpha_cb_default_size * nvgpu_gr_config_get_pes_tpc_count(gr->config, gpc_index, ppc_index); @@ -232,7 +194,7 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g, ppc_in_gpc_stride * ppc_index, attrib_offset_in_chunk, patch); - attrib_offset_in_chunk += gr->attrib_cb_size * + attrib_offset_in_chunk += attrib_cb_size * nvgpu_gr_config_get_pes_tpc_count(gr->config, gpc_index, ppc_index); @@ -246,7 +208,7 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g, ppc_in_gpc_stride * ppc_index, alpha_offset_in_chunk, patch); - alpha_offset_in_chunk += gr->alpha_cb_size * + alpha_offset_in_chunk += alpha_cb_size * nvgpu_gr_config_get_pes_tpc_count(gr->config, gpc_index, ppc_index); @@ -335,6 +297,8 @@ void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) u32 gpc_index, ppc_index, stride, val; u32 pd_ab_max_output; u32 alpha_cb_size = data * 4U; + u32 alpha_cb_size_max = g->ops.gr.init.get_alpha_cb_size(g, + nvgpu_gr_config_get_tpc_count(gr->config)); u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); @@ -342,8 +306,8 @@ void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF) return; */ - if (alpha_cb_size > gr->alpha_cb_size) { - alpha_cb_size = gr->alpha_cb_size; + if (alpha_cb_size > alpha_cb_size_max) { + alpha_cb_size = alpha_cb_size_max; } gk20a_writel(g, gr_ds_tga_constraintlogic_r(), @@ -391,11 +355,13 @@ void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data) u32 cb_size = data * 4U; u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); + u32 attrib_cb_size = g->ops.gr.init.get_attrib_cb_size(g, + nvgpu_gr_config_get_tpc_count(gr->config)); nvgpu_log_fn(g, " "); - if (cb_size > gr->attrib_cb_size) { - cb_size = gr->attrib_cb_size; + if (cb_size > attrib_cb_size) { + cb_size = attrib_cb_size; } gk20a_writel(g, gr_ds_tga_constraintlogic_r(), diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h index 11ca094d7..2f18b6e18 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h @@ -50,9 +50,6 @@ int gm20b_gr_tpc_disable_override(struct gk20a *g, u32 mask); void gr_gm20b_set_rd_coalesce(struct gk20a *g, u32 data); void gm20a_gr_disable_rd_coalesce(struct gk20a *g); void gr_gm20b_init_gpc_mmu(struct gk20a *g); -void gr_gm20b_bundle_cb_defaults(struct gk20a *g); -void gr_gm20b_cb_size_default(struct gk20a *g); -u32 gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g); void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, struct nvgpu_gr_ctx *ch_ctx, u64 addr, u64 size, bool patch); diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 8f3fff426..a98acd7be 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -231,10 +231,6 @@ static const struct gpu_ops gm20b_ops = { .gr = { .get_patch_slots = gr_gk20a_get_patch_slots, .init_gpc_mmu = gr_gm20b_init_gpc_mmu, - .bundle_cb_defaults = gr_gm20b_bundle_cb_defaults, - .cb_size_default = gr_gm20b_cb_size_default, - .calc_global_ctx_buffer_size = - gr_gm20b_calc_global_ctx_buffer_size, .commit_global_attrib_cb = gr_gm20b_commit_global_attrib_cb, .commit_global_bundle_cb = gr_gm20b_commit_global_bundle_cb, .commit_global_cb_manager = gr_gm20b_commit_global_cb_manager, @@ -441,6 +437,22 @@ static const struct gpu_ops gm20b_ops = { .load_method_init = gm20b_gr_init_load_method_init, .commit_global_timeslice = gm20b_gr_init_commit_global_timeslice, + .get_bundle_cb_default_size = + gm20b_gr_init_get_bundle_cb_default_size, + .get_min_gpm_fifo_depth = + gm20b_gr_init_get_min_gpm_fifo_depth, + .get_bundle_cb_token_limit = + gm20b_gr_init_get_bundle_cb_token_limit, + .get_attrib_cb_default_size = + gm20b_gr_init_get_attrib_cb_default_size, + .get_alpha_cb_default_size = + gm20b_gr_init_get_alpha_cb_default_size, + .get_attrib_cb_size = + gm20b_gr_init_get_attrib_cb_size, + .get_alpha_cb_size = + gm20b_gr_init_get_alpha_cb_size, + .get_global_attr_cb_size = + gm20b_gr_init_get_global_attr_cb_size, }, }, .fb = { diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.c b/drivers/gpu/nvgpu/gp106/gr_gp106.c index 5032c022f..7d7be7710 100644 --- a/drivers/gpu/nvgpu/gp106/gr_gp106.c +++ b/drivers/gpu/nvgpu/gp106/gr_gp106.c @@ -127,21 +127,6 @@ fail: return -EINVAL; } -void gr_gp106_cb_size_default(struct gk20a *g) -{ - struct gr_gk20a *gr = &g->gr; - - if (gr->attrib_cb_default_size == 0U) { - gr->attrib_cb_default_size = 0x800; - } - gr->alpha_cb_default_size = - gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); - gr->attrib_cb_gfxp_default_size = - gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(); - gr->attrib_cb_gfxp_size = - gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(); -} - int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class, @@ -178,10 +163,12 @@ int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g, gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); u32 pagepool_size = g->ops.gr.pagepool_default_size(g) * gr_scc_pagepool_total_pages_byte_granularity_v(); - u32 betacb_size = g->gr.attrib_cb_default_size + + u32 betacb_size = g->ops.gr.init.get_attrib_cb_default_size(g) + (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); - u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) * + u32 alpha_cb_size = g->ops.gr.init.get_alpha_cb_size(g, + nvgpu_gr_config_get_tpc_count(g->gr.config)); + u32 attrib_cb_size = (betacb_size + alpha_cb_size) * gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * nvgpu_gr_config_get_max_tpc_count(g->gr.config); attrib_cb_size = ALIGN(attrib_cb_size, 128); diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.h b/drivers/gpu/nvgpu/gp106/gr_gp106.h index a741ea431..afcdc7c82 100644 --- a/drivers/gpu/nvgpu/gp106/gr_gp106.h +++ b/drivers/gpu/nvgpu/gp106/gr_gp106.h @@ -34,7 +34,6 @@ bool gr_gp106_is_valid_class(struct gk20a *g, u32 class_num); u32 gr_gp106_pagepool_default_size(struct gk20a *g); int gr_gp106_handle_sw_method(struct gk20a *g, u32 addr, u32 class_num, u32 offset, u32 data); -void gr_gp106_cb_size_default(struct gk20a *g); int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class, diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 7ea3d87c2..4e30e134b 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -403,6 +403,12 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, u32 temp, temp2; u32 cbm_cfg_size_beta, cbm_cfg_size_alpha, cbm_cfg_size_steadystate; u32 attrib_size_in_chunk, cb_attrib_cache_size_init; + u32 attrib_cb_default_size = g->ops.gr.init.get_attrib_cb_default_size(g); + u32 alpha_cb_default_size = g->ops.gr.init.get_alpha_cb_default_size(g); + u32 attrib_cb_size = g->ops.gr.init.get_attrib_cb_size(g, + nvgpu_gr_config_get_tpc_count(gr->config)); + u32 alpha_cb_size = g->ops.gr.init.get_alpha_cb_size(g, + nvgpu_gr_config_get_tpc_count(gr->config)); u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); u32 num_pes_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC); @@ -410,19 +416,21 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, nvgpu_log_fn(g, " "); if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { - attrib_size_in_chunk = gr->attrib_cb_gfxp_size; - cb_attrib_cache_size_init = gr->attrib_cb_gfxp_default_size; + attrib_size_in_chunk = + g->ops.gr.init.get_attrib_cb_gfxp_size(g); + cb_attrib_cache_size_init = + g->ops.gr.init.get_attrib_cb_gfxp_default_size(g); } else { - attrib_size_in_chunk = gr->attrib_cb_size; - cb_attrib_cache_size_init = gr->attrib_cb_default_size; + attrib_size_in_chunk = attrib_cb_size; + cb_attrib_cache_size_init = attrib_cb_default_size; } nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_beta_r(), - gr->attrib_cb_default_size, patch); + attrib_cb_default_size, patch); nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_alpha_r(), - gr->alpha_cb_default_size, patch); + alpha_cb_default_size, patch); - pd_ab_max_output = (gr->alpha_cb_default_size * + pd_ab_max_output = (alpha_cb_default_size * gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) / gr_pd_ab_dist_cfg1_max_output_granularity_v(); @@ -431,7 +439,7 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); attrib_offset_in_chunk = alpha_offset_in_chunk + - nvgpu_gr_config_get_tpc_count(gr->config) * gr->alpha_cb_size; + nvgpu_gr_config_get_tpc_count(gr->config) * alpha_cb_size; for (gpc_index = 0; gpc_index < nvgpu_gr_config_get_gpc_count(gr->config); @@ -444,10 +452,10 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, cbm_cfg_size_beta = cb_attrib_cache_size_init * nvgpu_gr_config_get_pes_tpc_count(gr->config, gpc_index, ppc_index); - cbm_cfg_size_alpha = gr->alpha_cb_default_size * + cbm_cfg_size_alpha = alpha_cb_default_size * nvgpu_gr_config_get_pes_tpc_count(gr->config, gpc_index, ppc_index); - cbm_cfg_size_steadystate = gr->attrib_cb_default_size * + cbm_cfg_size_steadystate = attrib_cb_default_size * nvgpu_gr_config_get_pes_tpc_count(gr->config, gpc_index, ppc_index); @@ -481,7 +489,7 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, ppc_in_gpc_stride * ppc_index, alpha_offset_in_chunk, patch); - alpha_offset_in_chunk += gr->alpha_cb_size * + alpha_offset_in_chunk += alpha_cb_size * nvgpu_gr_config_get_pes_tpc_count(gr->config, gpc_index, ppc_index); @@ -520,34 +528,6 @@ u32 gr_gp10b_pagepool_default_size(struct gk20a *g) return gr_scc_pagepool_total_pages_hwmax_value_v(); } -u32 gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g) -{ - struct gr_gk20a *gr = &g->gr; - u32 size; - - gr->attrib_cb_size = gr->attrib_cb_default_size; - gr->alpha_cb_size = gr->alpha_cb_default_size; - - gr->attrib_cb_size = min(gr->attrib_cb_size, - gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~U32(0U)) / - nvgpu_gr_config_get_tpc_count(gr->config)); - gr->alpha_cb_size = min(gr->alpha_cb_size, - gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~U32(0U)) / - nvgpu_gr_config_get_tpc_count(gr->config)); - - size = gr->attrib_cb_size * - gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * - nvgpu_gr_config_get_max_tpc_count(gr->config); - - size += gr->alpha_cb_size * - gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * - nvgpu_gr_config_get_max_tpc_count(gr->config); - - size = ALIGN(size, 128); - - return size; -} - static void gr_gp10b_set_go_idle_timeout(struct gk20a *g, u32 data) { gk20a_writel(g, gr_fe_go_idle_timeout_r(), data); @@ -670,38 +650,21 @@ fail: return -EINVAL; } -void gr_gp10b_cb_size_default(struct gk20a *g) -{ - struct gr_gk20a *gr = &g->gr; - - if (gr->attrib_cb_default_size == 0U) { - gr->attrib_cb_default_size = 0x800; - } - gr->alpha_cb_default_size = - gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); - gr->attrib_cb_gfxp_default_size = - gr->attrib_cb_default_size + - (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - - gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); - gr->attrib_cb_gfxp_size = - gr->attrib_cb_default_size + - (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - - gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); -} - void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) { struct gr_gk20a *gr = &g->gr; u32 gpc_index, ppc_index, stride, val; u32 pd_ab_max_output; u32 alpha_cb_size = data * 4U; + u32 alpha_cb_size_max = g->ops.gr.init.get_alpha_cb_size(g, + nvgpu_gr_config_get_tpc_count(gr->config)); u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); nvgpu_log_fn(g, " "); - if (alpha_cb_size > gr->alpha_cb_size) { - alpha_cb_size = gr->alpha_cb_size; + if (alpha_cb_size > alpha_cb_size_max) { + alpha_cb_size = alpha_cb_size_max; } gk20a_writel(g, gr_ds_tga_constraintlogic_alpha_r(), @@ -749,11 +712,13 @@ void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data) u32 cb_size_steady = data * 4U, cb_size; u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); + u32 attrib_cb_size = g->ops.gr.init.get_attrib_cb_size(g, + nvgpu_gr_config_get_tpc_count(gr->config)); nvgpu_log_fn(g, " "); - if (cb_size_steady > gr->attrib_cb_size) { - cb_size_steady = gr->attrib_cb_size; + if (cb_size_steady > attrib_cb_size) { + cb_size_steady = attrib_cb_size; } if (gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r()) != gk20a_readl(g, @@ -1216,7 +1181,9 @@ void gr_gp10b_commit_global_attrib_cb(struct gk20a *g, if (gr_ctx->preempt_ctxsw_buffer.gpu_va != 0ULL) { attrBufferSize = U32(gr_ctx->betacb_ctxsw_buffer.size); } else { - attrBufferSize = g->ops.gr.calc_global_ctx_buffer_size(g); + attrBufferSize = g->ops.gr.init.get_global_attr_cb_size(g, + nvgpu_gr_config_get_tpc_count(g->gr.config), + nvgpu_gr_config_get_max_tpc_count(g->gr.config)); } attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f(); @@ -1241,6 +1208,7 @@ void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, u64 addr, u64 size, bool patch) { u32 data; + u32 bundle_cb_token_limit = g->ops.gr.init.get_bundle_cb_token_limit(g); nvgpu_assert(u64_hi32(addr) == 0U); nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_base_r(), @@ -1259,17 +1227,17 @@ void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch); /* data for state_limit */ - data = (g->gr.bundle_cb_default_size * + data = (g->ops.gr.init.get_bundle_cb_default_size(g) * gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) / gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v(); - data = min_t(u32, data, g->gr.min_gpm_fifo_depth); + data = min_t(u32, data, g->ops.gr.init.get_min_gpm_fifo_depth(g)); nvgpu_log_info(g, "bundle cb token limit : %d, state limit : %d", - g->gr.bundle_cb_token_limit, data); + bundle_cb_token_limit, data); nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg2_r(), - gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) | + gr_pd_ab_dist_cfg2_token_limit_f(bundle_cb_token_limit) | gr_pd_ab_dist_cfg2_state_limit_f(data), patch); } @@ -1955,24 +1923,31 @@ unsigned long gr_gp10b_get_max_gfxp_wfi_timeout_count(struct gk20a *g) return (100UL * 1000UL * 1000UL); } -u32 gp10b_gr_get_ctx_spill_size(struct gk20a *g) { +u32 gp10b_gr_get_ctx_spill_size(struct gk20a *g) +{ return gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() * gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); } -u32 gp10b_gr_get_ctx_pagepool_size(struct gk20a *g) { +u32 gp10b_gr_get_ctx_pagepool_size(struct gk20a *g) +{ return g->ops.gr.pagepool_default_size(g) * gr_scc_pagepool_total_pages_byte_granularity_v(); } -u32 gp10b_gr_get_ctx_betacb_size(struct gk20a *g) { - return g->gr.attrib_cb_default_size + +u32 gp10b_gr_get_ctx_betacb_size(struct gk20a *g) +{ + return g->ops.gr.init.get_attrib_cb_default_size(g) + (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); } -u32 gp10b_gr_get_ctx_attrib_cb_size(struct gk20a *g, u32 betacb_size) { - return (betacb_size + g->gr.alpha_cb_size) * +u32 gp10b_gr_get_ctx_attrib_cb_size(struct gk20a *g, u32 betacb_size) +{ + u32 alpha_cb_size = g->ops.gr.init.get_alpha_cb_size(g, + nvgpu_gr_config_get_tpc_count(g->gr.config)); + + return (betacb_size + alpha_cb_size) * gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * nvgpu_gr_config_get_max_tpc_count(g->gr.config); } diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h index d6e4e10dd..1e3984c85 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h @@ -76,12 +76,10 @@ void gr_gp10b_commit_global_pagepool(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, u64 addr, u32 size, bool patch); u32 gr_gp10b_pagepool_default_size(struct gk20a *g); -u32 gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g); void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data); void gr_gp10b_set_bes_crop_debug4(struct gk20a *g, u32 data); int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr, u32 class_num, u32 offset, u32 data); -void gr_gp10b_cb_size_default(struct gk20a *g); void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data); void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data); int gr_gp10b_init_ctx_state(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index f291f5f6c..97e323c17 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -254,10 +254,6 @@ static const struct gpu_ops gp10b_ops = { .gr = { .get_patch_slots = gr_gk20a_get_patch_slots, .init_gpc_mmu = gr_gm20b_init_gpc_mmu, - .bundle_cb_defaults = gr_gm20b_bundle_cb_defaults, - .cb_size_default = gr_gp10b_cb_size_default, - .calc_global_ctx_buffer_size = - gr_gp10b_calc_global_ctx_buffer_size, .commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb, .commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb, .commit_global_cb_manager = gr_gp10b_commit_global_cb_manager, @@ -513,6 +509,26 @@ static const struct gpu_ops gp10b_ops = { .load_method_init = gm20b_gr_init_load_method_init, .commit_global_timeslice = gm20b_gr_init_commit_global_timeslice, + .get_bundle_cb_default_size = + gm20b_gr_init_get_bundle_cb_default_size, + .get_min_gpm_fifo_depth = + gm20b_gr_init_get_min_gpm_fifo_depth, + .get_bundle_cb_token_limit = + gm20b_gr_init_get_bundle_cb_token_limit, + .get_attrib_cb_default_size = + gp10b_gr_init_get_attrib_cb_default_size, + .get_alpha_cb_default_size = + gp10b_gr_init_get_alpha_cb_default_size, + .get_attrib_cb_gfxp_default_size = + gp10b_gr_init_get_attrib_cb_gfxp_default_size, + .get_attrib_cb_gfxp_size = + gp10b_gr_init_get_attrib_cb_gfxp_size, + .get_attrib_cb_size = + gp10b_gr_init_get_attrib_cb_size, + .get_alpha_cb_size = + gp10b_gr_init_get_alpha_cb_size, + .get_global_attr_cb_size = + gp10b_gr_init_get_global_attr_cb_size, }, }, .fb = { diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c index 7faf793ec..53a65839a 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.c +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c @@ -191,30 +191,6 @@ free_resources: return err; } -void gr_gv100_bundle_cb_defaults(struct gk20a *g) -{ - struct gr_gk20a *gr = &g->gr; - - gr->bundle_cb_default_size = - gr_scc_bundle_cb_size_div_256b__prod_v(); - gr->min_gpm_fifo_depth = - gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(); - gr->bundle_cb_token_limit = - gr_pd_ab_dist_cfg2_token_limit_init_v(); -} - -void gr_gv100_cb_size_default(struct gk20a *g) -{ - struct gr_gk20a *gr = &g->gr; - - if (gr->attrib_cb_default_size == 0U) { - gr->attrib_cb_default_size = - gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(); - } - gr->alpha_cb_default_size = - gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); -} - void gr_gv100_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) { } diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h index 964d92ad9..23d389576 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.h +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h @@ -25,8 +25,6 @@ #ifndef NVGPU_GR_GV100_H #define NVGPU_GR_GV100_H -void gr_gv100_bundle_cb_defaults(struct gk20a *g); -void gr_gv100_cb_size_default(struct gk20a *g); void gr_gv100_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); int gr_gv100_init_sm_id_table(struct gk20a *g); void gr_gv100_program_sm_id_numbering(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index e10521d2f..6738910ae 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -58,6 +58,7 @@ #include "hal/gr/zbc/zbc_gv11b.h" #include "hal/gr/init/gr_init_gm20b.h" #include "hal/gr/init/gr_init_gp10b.h" +#include "hal/gr/init/gr_init_gv100.h" #include "hal/gr/init/gr_init_gv11b.h" #include "hal/gr/hwpm_map/hwpm_map_gv100.h" #include "hal/falcon/falcon_gk20a.h" @@ -361,10 +362,6 @@ static const struct gpu_ops gv100_ops = { .gr = { .get_patch_slots = gr_gv100_get_patch_slots, .init_gpc_mmu = gr_gv11b_init_gpc_mmu, - .bundle_cb_defaults = gr_gv100_bundle_cb_defaults, - .cb_size_default = gr_gv100_cb_size_default, - .calc_global_ctx_buffer_size = - gr_gv11b_calc_global_ctx_buffer_size, .commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb, .commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb, .commit_global_cb_manager = gr_gp10b_commit_global_cb_manager, @@ -651,6 +648,26 @@ static const struct gpu_ops gv100_ops = { .load_method_init = gm20b_gr_init_load_method_init, .commit_global_timeslice = gv11b_gr_init_commit_global_timeslice, + .get_bundle_cb_default_size = + gv100_gr_init_get_bundle_cb_default_size, + .get_min_gpm_fifo_depth = + gv100_gr_init_get_min_gpm_fifo_depth, + .get_bundle_cb_token_limit = + gv100_gr_init_get_bundle_cb_token_limit, + .get_attrib_cb_default_size = + gv100_gr_init_get_attrib_cb_default_size, + .get_alpha_cb_default_size = + gv100_gr_init_get_alpha_cb_default_size, + .get_attrib_cb_gfxp_default_size = + gv100_gr_init_get_attrib_cb_gfxp_default_size, + .get_attrib_cb_gfxp_size = + gv100_gr_init_get_attrib_cb_gfxp_size, + .get_attrib_cb_size = + gv11b_gr_init_get_attrib_cb_size, + .get_alpha_cb_size = + gv11b_gr_init_get_alpha_cb_size, + .get_global_attr_cb_size = + gv11b_gr_init_get_global_attr_cb_size, }, }, .fb = { diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 6acdc4314..7da8d3741 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1158,34 +1158,6 @@ u32 gr_gv11b_pagepool_default_size(struct gk20a *g) return gr_scc_pagepool_total_pages_hwmax_value_v(); } -u32 gr_gv11b_calc_global_ctx_buffer_size(struct gk20a *g) -{ - struct gr_gk20a *gr = &g->gr; - u32 size; - - gr->attrib_cb_size = gr->attrib_cb_default_size; - gr->alpha_cb_size = gr->alpha_cb_default_size; - - gr->attrib_cb_size = min(gr->attrib_cb_size, - gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~U32(0U)) / - nvgpu_gr_config_get_tpc_count(gr->config)); - gr->alpha_cb_size = min(gr->alpha_cb_size, - gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~U32(0U)) / - nvgpu_gr_config_get_tpc_count(gr->config)); - - size = gr->attrib_cb_size * - gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * - nvgpu_gr_config_get_max_tpc_count(gr->config); - - size += gr->alpha_cb_size * - gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * - nvgpu_gr_config_get_max_tpc_count(gr->config); - - size = ALIGN(size, 128); - - return size; -} - void gr_gv11b_set_go_idle_timeout(struct gk20a *g, u32 data) { gk20a_writel(g, gr_fe_go_idle_timeout_r(), data); @@ -1355,45 +1327,19 @@ fail: return -EINVAL; } -void gr_gv11b_bundle_cb_defaults(struct gk20a *g) -{ - struct gr_gk20a *gr = &g->gr; - - gr->bundle_cb_default_size = - gr_scc_bundle_cb_size_div_256b__prod_v(); - gr->min_gpm_fifo_depth = - gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(); - gr->bundle_cb_token_limit = - gr_pd_ab_dist_cfg2_token_limit_init_v(); -} - -void gr_gv11b_cb_size_default(struct gk20a *g) -{ - struct gr_gk20a *gr = &g->gr; - - if (gr->attrib_cb_default_size == 0U) { - gr->attrib_cb_default_size = - gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(); - } - gr->alpha_cb_default_size = - gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); - gr->attrib_cb_gfxp_default_size = - gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(); - gr->attrib_cb_gfxp_size = - gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(); -} - void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) { struct gr_gk20a *gr = &g->gr; u32 gpc_index, ppc_index, stride, val; u32 pd_ab_max_output; u32 alpha_cb_size = data * 4U; + u32 alpha_cb_size_max = g->ops.gr.init.get_alpha_cb_size(g, + nvgpu_gr_config_get_tpc_count(gr->config)); nvgpu_log_fn(g, " "); - if (alpha_cb_size > gr->alpha_cb_size) { - alpha_cb_size = gr->alpha_cb_size; + if (alpha_cb_size > alpha_cb_size_max) { + alpha_cb_size = alpha_cb_size_max; } gk20a_writel(g, gr_ds_tga_constraintlogic_alpha_r(), @@ -1438,11 +1384,13 @@ void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data) struct gr_gk20a *gr = &g->gr; u32 gpc_index, ppc_index, stride, val; u32 cb_size_steady = data * 4U, cb_size; + u32 attrib_cb_size = g->ops.gr.init.get_attrib_cb_size(g, + nvgpu_gr_config_get_tpc_count(gr->config)); nvgpu_log_fn(g, " "); - if (cb_size_steady > gr->attrib_cb_size) { - cb_size_steady = gr->attrib_cb_size; + if (cb_size_steady > attrib_cb_size) { + cb_size_steady = attrib_cb_size; } if (gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r()) != gk20a_readl(g, @@ -1834,7 +1782,9 @@ void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, if (gr_ctx->preempt_ctxsw_buffer.gpu_va != 0ULL) { attrBufferSize = U32(gr_ctx->betacb_ctxsw_buffer.size); } else { - attrBufferSize = g->ops.gr.calc_global_ctx_buffer_size(g); + attrBufferSize = g->ops.gr.init.get_global_attr_cb_size(g, + nvgpu_gr_config_get_tpc_count(g->gr.config), + nvgpu_gr_config_get_max_tpc_count(g->gr.config)); } attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f(); @@ -4441,24 +4391,31 @@ fail: return err; } -u32 gv11b_gr_get_ctx_spill_size(struct gk20a *g) { +u32 gv11b_gr_get_ctx_spill_size(struct gk20a *g) +{ return gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() * gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); } -u32 gv11b_gr_get_ctx_pagepool_size(struct gk20a *g) { +u32 gv11b_gr_get_ctx_pagepool_size(struct gk20a *g) +{ return g->ops.gr.pagepool_default_size(g) * gr_scc_pagepool_total_pages_byte_granularity_v(); } -u32 gv11b_gr_get_ctx_betacb_size(struct gk20a *g) { - return g->gr.attrib_cb_default_size + +u32 gv11b_gr_get_ctx_betacb_size(struct gk20a *g) +{ + return g->ops.gr.init.get_attrib_cb_default_size(g) + (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); } -u32 gv11b_gr_get_ctx_attrib_cb_size(struct gk20a *g, u32 betacb_size) { - return (betacb_size + g->gr.alpha_cb_size) * +u32 gv11b_gr_get_ctx_attrib_cb_size(struct gk20a *g, u32 betacb_size) +{ + u32 alpha_cb_size = g->ops.gr.init.get_alpha_cb_size(g, + nvgpu_gr_config_get_tpc_count(g->gr.config)); + + return (betacb_size + alpha_cb_size) * gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * nvgpu_gr_config_get_max_tpc_count(g->gr.config); } diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h index 70ca8fd06..b60e09720 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h @@ -97,11 +97,8 @@ void gr_gv11b_enable_gpc_exceptions(struct gk20a *g); int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, bool *post_event); u32 gr_gv11b_pagepool_default_size(struct gk20a *g); -u32 gr_gv11b_calc_global_ctx_buffer_size(struct gk20a *g); int gr_gv11b_handle_sw_method(struct gk20a *g, u32 addr, u32 class_num, u32 offset, u32 data); -void gr_gv11b_bundle_cb_defaults(struct gk20a *g); -void gr_gv11b_cb_size_default(struct gk20a *g); void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data); void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data); int gr_gv11b_dump_gr_status_regs(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index d86695e97..6b9b148eb 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -312,10 +312,6 @@ static const struct gpu_ops gv11b_ops = { .gr = { .get_patch_slots = gr_gv100_get_patch_slots, .init_gpc_mmu = gr_gv11b_init_gpc_mmu, - .bundle_cb_defaults = gr_gv11b_bundle_cb_defaults, - .cb_size_default = gr_gv11b_cb_size_default, - .calc_global_ctx_buffer_size = - gr_gv11b_calc_global_ctx_buffer_size, .commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb, .commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb, .commit_global_cb_manager = gr_gp10b_commit_global_cb_manager, @@ -611,6 +607,26 @@ static const struct gpu_ops gv11b_ops = { .load_method_init = gm20b_gr_init_load_method_init, .commit_global_timeslice = gv11b_gr_init_commit_global_timeslice, + .get_bundle_cb_default_size = + gv11b_gr_init_get_bundle_cb_default_size, + .get_min_gpm_fifo_depth = + gv11b_gr_init_get_min_gpm_fifo_depth, + .get_bundle_cb_token_limit = + gv11b_gr_init_get_bundle_cb_token_limit, + .get_attrib_cb_default_size = + gv11b_gr_init_get_attrib_cb_default_size, + .get_alpha_cb_default_size = + gv11b_gr_init_get_alpha_cb_default_size, + .get_attrib_cb_gfxp_default_size = + gv11b_gr_init_get_attrib_cb_gfxp_default_size, + .get_attrib_cb_gfxp_size = + gv11b_gr_init_get_attrib_cb_gfxp_size, + .get_attrib_cb_size = + gv11b_gr_init_get_attrib_cb_size, + .get_alpha_cb_size = + gv11b_gr_init_get_alpha_cb_size, + .get_global_attr_cb_size = + gv11b_gr_init_get_global_attr_cb_size, }, }, .fb = { diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c index f7df754c7..7d9e15ce9 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c @@ -685,3 +685,55 @@ void gm20b_gr_init_commit_global_timeslice(struct gk20a *g) mpc_vtg_debug, false); nvgpu_gr_ctx_patch_write(g, NULL, gr_ds_debug_r(), ds_debug, false); } + +u32 gm20b_gr_init_get_bundle_cb_default_size(struct gk20a *g) +{ + return gr_scc_bundle_cb_size_div_256b__prod_v(); +} + +u32 gm20b_gr_init_get_min_gpm_fifo_depth(struct gk20a *g) +{ + return gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(); +} + +u32 gm20b_gr_init_get_bundle_cb_token_limit(struct gk20a *g) +{ + return gr_pd_ab_dist_cfg2_token_limit_init_v(); +} + +u32 gm20b_gr_init_get_attrib_cb_default_size(struct gk20a *g) +{ + return gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(); +} + +u32 gm20b_gr_init_get_alpha_cb_default_size(struct gk20a *g) +{ + return gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); +} + +u32 gm20b_gr_init_get_attrib_cb_size(struct gk20a *g, u32 tpc_count) +{ + return g->ops.gr.init.get_attrib_cb_default_size(g) + + (g->ops.gr.init.get_attrib_cb_default_size(g) >> 1); +} + +u32 gm20b_gr_init_get_alpha_cb_size(struct gk20a *g, u32 tpc_count) +{ + return g->ops.gr.init.get_alpha_cb_default_size(g) + + (g->ops.gr.init.get_alpha_cb_default_size(g) >> 1); +} + +u32 gm20b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count, + u32 max_tpc) +{ + u32 size; + + size = g->ops.gr.init.get_attrib_cb_size(g, tpc_count) * + gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * max_tpc; + + size += g->ops.gr.init.get_alpha_cb_size(g, tpc_count) * + gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * max_tpc; + + return size; +} + diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h index f77ebc694..f2f75d31c 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h @@ -56,4 +56,14 @@ void gm20b_gr_init_load_method_init(struct gk20a *g, struct netlist_av_list *sw_method_init); void gm20b_gr_init_commit_global_timeslice(struct gk20a *g); +u32 gm20b_gr_init_get_bundle_cb_default_size(struct gk20a *g); +u32 gm20b_gr_init_get_min_gpm_fifo_depth(struct gk20a *g); +u32 gm20b_gr_init_get_bundle_cb_token_limit(struct gk20a *g); +u32 gm20b_gr_init_get_attrib_cb_default_size(struct gk20a *g); +u32 gm20b_gr_init_get_alpha_cb_default_size(struct gk20a *g); +u32 gm20b_gr_init_get_attrib_cb_size(struct gk20a *g, u32 tpc_count); +u32 gm20b_gr_init_get_alpha_cb_size(struct gk20a *g, u32 tpc_count); +u32 gm20b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count, + u32 max_tpc); + #endif /* NVGPU_GR_INIT_GM20B_H */ diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.c index 4819f7da6..abc7dcfa8 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.c @@ -235,3 +235,55 @@ int gp10b_gr_init_preemption_state(struct gk20a *g, u32 gfxp_wfi_timeout_count, return 0; } +u32 gp10b_gr_init_get_attrib_cb_default_size(struct gk20a *g) +{ + return 0x800; +} + +u32 gp10b_gr_init_get_alpha_cb_default_size(struct gk20a *g) +{ + return gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); +} + +u32 gp10b_gr_init_get_attrib_cb_gfxp_default_size(struct gk20a *g) +{ + return g->ops.gr.init.get_attrib_cb_default_size(g) + + (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - + gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); +} + +u32 gp10b_gr_init_get_attrib_cb_gfxp_size(struct gk20a *g) +{ + return g->ops.gr.init.get_attrib_cb_default_size(g) + + (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - + gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); +} + +u32 gp10b_gr_init_get_attrib_cb_size(struct gk20a *g, u32 tpc_count) +{ + return min(g->ops.gr.init.get_attrib_cb_default_size(g), + gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~U32(0U)) / tpc_count); +} + +u32 gp10b_gr_init_get_alpha_cb_size(struct gk20a *g, u32 tpc_count) +{ + return min(g->ops.gr.init.get_alpha_cb_default_size(g), + gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~U32(0U)) / tpc_count); +} + +u32 gp10b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count, + u32 max_tpc) +{ + u32 size; + + size = g->ops.gr.init.get_attrib_cb_size(g, tpc_count) * + gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * max_tpc; + + size += g->ops.gr.init.get_alpha_cb_size(g, tpc_count) * + gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * max_tpc; + + size = ALIGN(size, 128); + + return size; +} + diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.h index 763cfe23f..4534dc28f 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.h +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.h @@ -37,4 +37,13 @@ int gp10b_gr_init_fs_state(struct gk20a *g); int gp10b_gr_init_preemption_state(struct gk20a *g, u32 gfxp_wfi_timeout_count, bool gfxp_wfi_timeout_unit_usec); +u32 gp10b_gr_init_get_attrib_cb_default_size(struct gk20a *g); +u32 gp10b_gr_init_get_alpha_cb_default_size(struct gk20a *g); +u32 gp10b_gr_init_get_attrib_cb_gfxp_default_size(struct gk20a *g); +u32 gp10b_gr_init_get_attrib_cb_gfxp_size(struct gk20a *g); +u32 gp10b_gr_init_get_attrib_cb_size(struct gk20a *g, u32 tpc_count); +u32 gp10b_gr_init_get_alpha_cb_size(struct gk20a *g, u32 tpc_count); +u32 gp10b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count, + u32 max_tpc); + #endif /* NVGPU_GR_INIT_GP10B_H */ diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv100.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv100.c new file mode 100644 index 000000000..640044533 --- /dev/null +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv100.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include "gr_init_gv100.h" + +#include + +u32 gv100_gr_init_get_bundle_cb_default_size(struct gk20a *g) +{ + return gr_scc_bundle_cb_size_div_256b__prod_v(); +} + +u32 gv100_gr_init_get_min_gpm_fifo_depth(struct gk20a *g) +{ + return gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(); +} + +u32 gv100_gr_init_get_bundle_cb_token_limit(struct gk20a *g) +{ + return gr_pd_ab_dist_cfg2_token_limit_init_v(); +} + +u32 gv100_gr_init_get_attrib_cb_default_size(struct gk20a *g) +{ + return gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(); +} + +u32 gv100_gr_init_get_alpha_cb_default_size(struct gk20a *g) +{ + return gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); +} + +u32 gv100_gr_init_get_attrib_cb_gfxp_default_size(struct gk20a *g) +{ + return gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(); +} + +u32 gv100_gr_init_get_attrib_cb_gfxp_size(struct gk20a *g) +{ + return gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(); +} + diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv100.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv100.h new file mode 100644 index 000000000..de6a5ee3e --- /dev/null +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv100.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_GR_INIT_GV100_H +#define NVGPU_GR_INIT_GV100_H + +#include + +struct gk20a; + +u32 gv100_gr_init_get_bundle_cb_default_size(struct gk20a *g); +u32 gv100_gr_init_get_min_gpm_fifo_depth(struct gk20a *g); +u32 gv100_gr_init_get_bundle_cb_token_limit(struct gk20a *g); +u32 gv100_gr_init_get_attrib_cb_default_size(struct gk20a *g); +u32 gv100_gr_init_get_alpha_cb_default_size(struct gk20a *g); +u32 gv100_gr_init_get_attrib_cb_gfxp_default_size(struct gk20a *g); +u32 gv100_gr_init_get_attrib_cb_gfxp_size(struct gk20a *g); + +#endif /* NVGPU_GR_INIT_GV100_H */ diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c index fb60a87e5..77e56295e 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c @@ -377,3 +377,67 @@ void gv11b_gr_init_commit_global_timeslice(struct gk20a *g) mpc_vtg_debug, false); nvgpu_gr_ctx_patch_write(g, NULL, gr_ds_debug_r(), ds_debug, false); } + +u32 gv11b_gr_init_get_bundle_cb_default_size(struct gk20a *g) +{ + return gr_scc_bundle_cb_size_div_256b__prod_v(); +} + +u32 gv11b_gr_init_get_min_gpm_fifo_depth(struct gk20a *g) +{ + return gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(); +} + +u32 gv11b_gr_init_get_bundle_cb_token_limit(struct gk20a *g) +{ + return gr_pd_ab_dist_cfg2_token_limit_init_v(); +} + +u32 gv11b_gr_init_get_attrib_cb_default_size(struct gk20a *g) +{ + return gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(); +} + +u32 gv11b_gr_init_get_alpha_cb_default_size(struct gk20a *g) +{ + return gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); +} + +u32 gv11b_gr_init_get_attrib_cb_gfxp_default_size(struct gk20a *g) +{ + return gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(); +} + +u32 gv11b_gr_init_get_attrib_cb_gfxp_size(struct gk20a *g) +{ + return gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(); +} + +u32 gv11b_gr_init_get_attrib_cb_size(struct gk20a *g, u32 tpc_count) +{ + return min(g->ops.gr.init.get_attrib_cb_default_size(g), + gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~U32(0U)) / tpc_count); +} + +u32 gv11b_gr_init_get_alpha_cb_size(struct gk20a *g, u32 tpc_count) +{ + return min(g->ops.gr.init.get_alpha_cb_default_size(g), + gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~U32(0U)) / tpc_count); +} + +u32 gv11b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count, + u32 max_tpc) +{ + u32 size; + + size = g->ops.gr.init.get_attrib_cb_size(g, tpc_count) * + gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * max_tpc; + + size += g->ops.gr.init.get_alpha_cb_size(g, tpc_count) * + gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * max_tpc; + + size = ALIGN(size, 128); + + return size; +} + diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h index 8a314056b..8b5c609b8 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h @@ -42,4 +42,16 @@ int gv11b_gr_init_preemption_state(struct gk20a *g, u32 gfxp_wfi_timeout_count, bool gfxp_wfi_timeout_unit_usec); void gv11b_gr_init_commit_global_timeslice(struct gk20a *g); +u32 gv11b_gr_init_get_bundle_cb_default_size(struct gk20a *g); +u32 gv11b_gr_init_get_min_gpm_fifo_depth(struct gk20a *g); +u32 gv11b_gr_init_get_bundle_cb_token_limit(struct gk20a *g); +u32 gv11b_gr_init_get_attrib_cb_default_size(struct gk20a *g); +u32 gv11b_gr_init_get_alpha_cb_default_size(struct gk20a *g); +u32 gv11b_gr_init_get_attrib_cb_gfxp_default_size(struct gk20a *g); +u32 gv11b_gr_init_get_attrib_cb_gfxp_size(struct gk20a *g); +u32 gv11b_gr_init_get_attrib_cb_size(struct gk20a *g, u32 tpc_count); +u32 gv11b_gr_init_get_alpha_cb_size(struct gk20a *g, u32 tpc_count); +u32 gv11b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count, + u32 max_tpc); + #endif /* NVGPU_GR_INIT_GV11B_H */ diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.c index 5b35d7fd6..a5b58ba80 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.c @@ -89,3 +89,39 @@ void tu104_gr_init_commit_gfxp_rtv_cb(struct gk20a *g, tu104_gr_init_patch_rtv_cb(g, gr_ctx, addr, rtv_cb_size, gfxp_addr_size, patch); } + +u32 tu104_gr_init_get_bundle_cb_default_size(struct gk20a *g) +{ + return gr_scc_bundle_cb_size_div_256b__prod_v(); +} + +u32 tu104_gr_init_get_min_gpm_fifo_depth(struct gk20a *g) +{ + return gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(); +} + +u32 tu104_gr_init_get_bundle_cb_token_limit(struct gk20a *g) +{ + return gr_pd_ab_dist_cfg2_token_limit_init_v(); +} + +u32 tu104_gr_init_get_attrib_cb_default_size(struct gk20a *g) +{ + return gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(); +} + +u32 tu104_gr_init_get_alpha_cb_default_size(struct gk20a *g) +{ + return gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); +} + +u32 tu104_gr_init_get_attrib_cb_gfxp_default_size(struct gk20a *g) +{ + return gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(); +} + +u32 tu104_gr_init_get_attrib_cb_gfxp_size(struct gk20a *g) +{ + return gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(); +} + diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.h index 3f7a76c6b..ba63fb5dc 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.h +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.h @@ -34,4 +34,12 @@ void tu104_gr_init_commit_rtv_cb(struct gk20a *g, u64 addr, void tu104_gr_init_commit_gfxp_rtv_cb(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, bool patch); +u32 tu104_gr_init_get_bundle_cb_default_size(struct gk20a *g); +u32 tu104_gr_init_get_min_gpm_fifo_depth(struct gk20a *g); +u32 tu104_gr_init_get_bundle_cb_token_limit(struct gk20a *g); +u32 tu104_gr_init_get_attrib_cb_default_size(struct gk20a *g); +u32 tu104_gr_init_get_alpha_cb_default_size(struct gk20a *g); +u32 tu104_gr_init_get_attrib_cb_gfxp_default_size(struct gk20a *g); +u32 tu104_gr_init_get_attrib_cb_gfxp_size(struct gk20a *g); + #endif /* NVGPU_GR_INIT_TU104_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 491a963aa..d9a5aba29 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -256,9 +256,6 @@ struct gpu_ops { struct { u32 (*get_patch_slots)(struct gk20a *g); void (*access_smpc_reg)(struct gk20a *g, u32 quad, u32 offset); - void (*bundle_cb_defaults)(struct gk20a *g); - void (*cb_size_default)(struct gk20a *g); - u32 (*calc_global_ctx_buffer_size)(struct gk20a *g); void (*commit_global_attrib_cb)(struct gk20a *g, struct nvgpu_gr_ctx *ch_ctx, u64 addr, bool patch); @@ -699,6 +696,19 @@ struct gpu_ops { struct nvgpu_gr_ctx *gr_ctx, bool patch); void (*commit_gfxp_rtv_cb)(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, bool patch); + u32 (*get_bundle_cb_default_size)(struct gk20a *g); + u32 (*get_min_gpm_fifo_depth)(struct gk20a *g); + u32 (*get_bundle_cb_token_limit)(struct gk20a *g); + u32 (*get_attrib_cb_default_size)(struct gk20a *g); + u32 (*get_alpha_cb_default_size)(struct gk20a *g); + u32 (*get_attrib_cb_gfxp_default_size)(struct gk20a *g); + u32 (*get_attrib_cb_gfxp_size)(struct gk20a *g); + u32 (*get_attrib_cb_size)(struct gk20a *g, + u32 tpc_count); + u32 (*get_alpha_cb_size)(struct gk20a *g, + u32 tpc_count); + u32 (*get_global_attr_cb_size)(struct gk20a *g, + u32 tpc_count, u32 max_tpc); } init; u32 (*get_ctxsw_checksum_mismatch_mailbox_val)(void); diff --git a/drivers/gpu/nvgpu/os/linux/debug_gr.c b/drivers/gpu/nvgpu/os/linux/debug_gr.c index d54c6d631..532cdd0af 100644 --- a/drivers/gpu/nvgpu/os/linux/debug_gr.c +++ b/drivers/gpu/nvgpu/os/linux/debug_gr.c @@ -17,14 +17,39 @@ #include -int gr_gk20a_debugfs_init(struct gk20a *g) +static int gr_default_attrib_cb_size_show(struct seq_file *s, void *data) { - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct gk20a *g = s->private; - l->debugfs_gr_default_attrib_cb_size = - debugfs_create_u32("gr_default_attrib_cb_size", - S_IRUGO|S_IWUSR, l->debugfs, - &g->gr.attrib_cb_default_size); + seq_printf(s, "%u\n", g->ops.gr.init.get_attrib_cb_default_size(g)); + + return 0; +} + +static int gr_default_attrib_cb_size_open(struct inode *inode, + struct file *file) +{ + return single_open(file, gr_default_attrib_cb_size_show, + inode->i_private); +} + +static const struct file_operations gr_default_attrib_cb_size_fops= { + .open = gr_default_attrib_cb_size_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +int gr_gk20a_debugfs_init(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct dentry *d; + + d = debugfs_create_file( + "gr_default_attrib_cb_size", S_IRUGO, l->debugfs, g, + &gr_default_attrib_cb_size_fops); + if (!d) + return -ENOMEM; return 0; } diff --git a/drivers/gpu/nvgpu/os/linux/os_linux.h b/drivers/gpu/nvgpu/os/linux/os_linux.h index 6c8b974aa..8ef81c8d7 100644 --- a/drivers/gpu/nvgpu/os/linux/os_linux.h +++ b/drivers/gpu/nvgpu/os/linux/os_linux.h @@ -146,7 +146,6 @@ struct nvgpu_os_linux { struct dentry *debugfs_timeouts_enabled; struct dentry *debugfs_gr_idle_timeout_default; struct dentry *debugfs_disable_bigpage; - struct dentry *debugfs_gr_default_attrib_cb_size; struct dentry *debugfs_timeslice_low_priority_us; struct dentry *debugfs_timeslice_medium_priority_us; diff --git a/drivers/gpu/nvgpu/tu104/gr_tu104.c b/drivers/gpu/nvgpu/tu104/gr_tu104.c index 3affad157..b6191cef3 100644 --- a/drivers/gpu/nvgpu/tu104/gr_tu104.c +++ b/drivers/gpu/nvgpu/tu104/gr_tu104.c @@ -143,34 +143,6 @@ int gr_tu104_init_gfxp_rtv_cb(struct gk20a *g, return 0; } -void gr_tu104_bundle_cb_defaults(struct gk20a *g) -{ - struct gr_gk20a *gr = &g->gr; - - gr->bundle_cb_default_size = - gr_scc_bundle_cb_size_div_256b__prod_v(); - gr->min_gpm_fifo_depth = - gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(); - gr->bundle_cb_token_limit = - gr_pd_ab_dist_cfg2_token_limit_init_v(); -} - -void gr_tu104_cb_size_default(struct gk20a *g) -{ - struct gr_gk20a *gr = &g->gr; - - if (gr->attrib_cb_default_size == 0U) { - gr->attrib_cb_default_size = - gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(); - } - gr->alpha_cb_default_size = - gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); - gr->attrib_cb_gfxp_default_size = - gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(); - gr->attrib_cb_gfxp_size = - gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(); -} - void gr_tu104_enable_gpc_exceptions(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; diff --git a/drivers/gpu/nvgpu/tu104/gr_tu104.h b/drivers/gpu/nvgpu/tu104/gr_tu104.h index 2c42f6d83..0fde26423 100644 --- a/drivers/gpu/nvgpu/tu104/gr_tu104.h +++ b/drivers/gpu/nvgpu/tu104/gr_tu104.h @@ -63,9 +63,6 @@ int gr_tu104_init_sw_bundle64(struct gk20a *g); void gr_tu10x_create_sysfs(struct gk20a *g); void gr_tu10x_remove_sysfs(struct gk20a *g); -void gr_tu104_bundle_cb_defaults(struct gk20a *g); -void gr_tu104_cb_size_default(struct gk20a *g); - void gr_tu104_enable_gpc_exceptions(struct gk20a *g); int gr_tu104_get_offset_in_gpccs_segment(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index 873f30b97..7506241d4 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -382,10 +382,6 @@ static const struct gpu_ops tu104_ops = { .gr = { .get_patch_slots = gr_gv100_get_patch_slots, .init_gpc_mmu = gr_gv11b_init_gpc_mmu, - .bundle_cb_defaults = gr_tu104_bundle_cb_defaults, - .cb_size_default = gr_tu104_cb_size_default, - .calc_global_ctx_buffer_size = - gr_gv11b_calc_global_ctx_buffer_size, .commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb, .commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb, .commit_global_cb_manager = gr_gp10b_commit_global_cb_manager, @@ -684,6 +680,26 @@ static const struct gpu_ops tu104_ops = { .get_rtv_cb_size = tu104_gr_init_get_rtv_cb_size, .commit_rtv_cb = tu104_gr_init_commit_rtv_cb, .commit_gfxp_rtv_cb = tu104_gr_init_commit_gfxp_rtv_cb, + .get_bundle_cb_default_size = + tu104_gr_init_get_bundle_cb_default_size, + .get_min_gpm_fifo_depth = + tu104_gr_init_get_min_gpm_fifo_depth, + .get_bundle_cb_token_limit = + tu104_gr_init_get_bundle_cb_token_limit, + .get_attrib_cb_default_size = + tu104_gr_init_get_attrib_cb_default_size, + .get_alpha_cb_default_size = + tu104_gr_init_get_alpha_cb_default_size, + .get_attrib_cb_gfxp_default_size = + tu104_gr_init_get_attrib_cb_gfxp_default_size, + .get_attrib_cb_gfxp_size = + tu104_gr_init_get_attrib_cb_gfxp_size, + .get_attrib_cb_size = + gv11b_gr_init_get_attrib_cb_size, + .get_alpha_cb_size = + gv11b_gr_init_get_alpha_cb_size, + .get_global_attr_cb_size = + gv11b_gr_init_get_global_attr_cb_size, }, }, .fb = {