mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 01:50:07 +03:00
gpu: nvgpu: add hal.gr.init hals to get global cb sizes
Remove below variables from struct gr_gk20a u32 bundle_cb_default_size; u32 min_gpm_fifo_depth; u32 bundle_cb_token_limit; u32 attrib_cb_default_size; u32 alpha_cb_default_size; u32 attrib_cb_gfxp_default_size; u32 attrib_cb_gfxp_size; u32 attrib_cb_size; u32 alpha_cb_size; Instead add below hals in hal.gr.init unit to get all of above sizes u32 (*get_bundle_cb_default_size)(struct gk20a *g); u32 (*get_min_gpm_fifo_depth)(struct gk20a *g); u32 (*get_bundle_cb_token_limit)(struct gk20a *g); u32 (*get_attrib_cb_default_size)(struct gk20a *g); u32 (*get_alpha_cb_default_size)(struct gk20a *g); u32 (*get_attrib_cb_gfxp_default_size)(struct gk20a *g); u32 (*get_attrib_cb_gfxp_size)(struct gk20a *g); u32 (*get_attrib_cb_size)(struct gk20a *g, u32 tpc_count); u32 (*get_alpha_cb_size)(struct gk20a *g, u32 tpc_count); u32 (*get_global_attr_cb_size)(struct gk20a *g, u32 max_tpc); Define these hals for all gm20b/gp10b/gv11b/gv100/tu104 chips Also add hal.gr.init support for gv100 chip Remove all accesses to variables from struct gr_gk20a and start using newly defined hals Remove below hals to initialize sizes since they are no more required g->ops.gr.bundle_cb_defaults(g); g->ops.gr.cb_size_default(g); g->ops.gr.calc_global_ctx_buffer_size(g); Also remove definitions of above hals from all the chip files Jira NVGPU-2961 Change-Id: I130b578ababf22328d68fe19df581e46aebeccc9 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2077214 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
7abb0324b4
commit
4c8aadf83c
@@ -168,6 +168,7 @@ nvgpu-y += \
|
|||||||
hal/gr/fecs_trace/fecs_trace_gv11b.o \
|
hal/gr/fecs_trace/fecs_trace_gv11b.o \
|
||||||
hal/gr/init/gr_init_gm20b.o \
|
hal/gr/init/gr_init_gm20b.o \
|
||||||
hal/gr/init/gr_init_gp10b.o \
|
hal/gr/init/gr_init_gp10b.o \
|
||||||
|
hal/gr/init/gr_init_gv100.o \
|
||||||
hal/gr/init/gr_init_gv11b.o \
|
hal/gr/init/gr_init_gv11b.o \
|
||||||
hal/gr/init/gr_init_tu104.o \
|
hal/gr/init/gr_init_tu104.o \
|
||||||
hal/gr/hwpm_map/hwpm_map_gv100.o \
|
hal/gr/hwpm_map/hwpm_map_gv100.o \
|
||||||
|
|||||||
@@ -322,6 +322,7 @@ srcs += common/sim.c \
|
|||||||
hal/gr/fecs_trace/fecs_trace_gv11b.c \
|
hal/gr/fecs_trace/fecs_trace_gv11b.c \
|
||||||
hal/gr/init/gr_init_gm20b.c \
|
hal/gr/init/gr_init_gm20b.c \
|
||||||
hal/gr/init/gr_init_gp10b.c \
|
hal/gr/init/gr_init_gp10b.c \
|
||||||
|
hal/gr/init/gr_init_gv100.c \
|
||||||
hal/gr/init/gr_init_gv11b.c \
|
hal/gr/init/gr_init_gv11b.c \
|
||||||
hal/gr/init/gr_init_tu104.c \
|
hal/gr/init/gr_init_tu104.c \
|
||||||
hal/gr/hwpm_map/hwpm_map_gv100.c \
|
hal/gr/hwpm_map/hwpm_map_gv100.c \
|
||||||
|
|||||||
@@ -33,6 +33,8 @@
|
|||||||
#include "hal/fb/fb_gm20b.h"
|
#include "hal/fb/fb_gm20b.h"
|
||||||
#include "hal/fb/fb_gp10b.h"
|
#include "hal/fb/fb_gp10b.h"
|
||||||
#include "hal/gr/fecs_trace/fecs_trace_gm20b.h"
|
#include "hal/gr/fecs_trace/fecs_trace_gm20b.h"
|
||||||
|
#include "hal/gr/init/gr_init_gm20b.h"
|
||||||
|
#include "hal/gr/init/gr_init_gp10b.h"
|
||||||
|
|
||||||
#include "common/netlist/netlist_gp10b.h"
|
#include "common/netlist/netlist_gp10b.h"
|
||||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
||||||
@@ -112,10 +114,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
|
|||||||
.gr = {
|
.gr = {
|
||||||
.get_patch_slots = gr_gk20a_get_patch_slots,
|
.get_patch_slots = gr_gk20a_get_patch_slots,
|
||||||
.init_gpc_mmu = NULL,
|
.init_gpc_mmu = NULL,
|
||||||
.bundle_cb_defaults = gr_gm20b_bundle_cb_defaults,
|
|
||||||
.cb_size_default = gr_gp10b_cb_size_default,
|
|
||||||
.calc_global_ctx_buffer_size =
|
|
||||||
gr_gp10b_calc_global_ctx_buffer_size,
|
|
||||||
.commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb,
|
.commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb,
|
||||||
.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
|
.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
|
||||||
.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
|
.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
|
||||||
@@ -326,7 +324,27 @@ static const struct gpu_ops vgpu_gp10b_ops = {
|
|||||||
#endif /* CONFIG_GK20A_CTXSW_TRACE */
|
#endif /* CONFIG_GK20A_CTXSW_TRACE */
|
||||||
.init = {
|
.init = {
|
||||||
.fs_state = vgpu_gr_init_fs_state,
|
.fs_state = vgpu_gr_init_fs_state,
|
||||||
}
|
.get_bundle_cb_default_size =
|
||||||
|
gm20b_gr_init_get_bundle_cb_default_size,
|
||||||
|
.get_min_gpm_fifo_depth =
|
||||||
|
gm20b_gr_init_get_min_gpm_fifo_depth,
|
||||||
|
.get_bundle_cb_token_limit =
|
||||||
|
gm20b_gr_init_get_bundle_cb_token_limit,
|
||||||
|
.get_attrib_cb_default_size =
|
||||||
|
gp10b_gr_init_get_attrib_cb_default_size,
|
||||||
|
.get_alpha_cb_default_size =
|
||||||
|
gp10b_gr_init_get_alpha_cb_default_size,
|
||||||
|
.get_attrib_cb_gfxp_default_size =
|
||||||
|
gp10b_gr_init_get_attrib_cb_gfxp_default_size,
|
||||||
|
.get_attrib_cb_gfxp_size =
|
||||||
|
gp10b_gr_init_get_attrib_cb_gfxp_size,
|
||||||
|
.get_attrib_cb_size =
|
||||||
|
gp10b_gr_init_get_attrib_cb_size,
|
||||||
|
.get_alpha_cb_size =
|
||||||
|
gp10b_gr_init_get_alpha_cb_size,
|
||||||
|
.get_global_attr_cb_size =
|
||||||
|
gp10b_gr_init_get_global_attr_cb_size,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
.perf = {
|
.perf = {
|
||||||
.get_pmm_per_chiplet_offset =
|
.get_pmm_per_chiplet_offset =
|
||||||
|
|||||||
@@ -165,7 +165,9 @@ int vgpu_gr_alloc_global_ctx_buffers(struct gk20a *g)
|
|||||||
nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer,
|
nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer,
|
||||||
NVGPU_GR_GLOBAL_CTX_PAGEPOOL, size);
|
NVGPU_GR_GLOBAL_CTX_PAGEPOOL, size);
|
||||||
|
|
||||||
size = g->ops.gr.calc_global_ctx_buffer_size(g);
|
size = g->ops.gr.init.get_global_attr_cb_size(g,
|
||||||
|
nvgpu_gr_config_get_tpc_count(g->gr.config),
|
||||||
|
nvgpu_gr_config_get_max_tpc_count(g->gr.config));
|
||||||
nvgpu_log_info(g, "attr_buffer_size : %u", size);
|
nvgpu_log_info(g, "attr_buffer_size : %u", size);
|
||||||
|
|
||||||
nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer,
|
nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer,
|
||||||
@@ -416,9 +418,6 @@ static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
g->ops.gr.bundle_cb_defaults(g);
|
|
||||||
g->ops.gr.cb_size_default(g);
|
|
||||||
g->ops.gr.calc_global_ctx_buffer_size(g);
|
|
||||||
err = g->ops.gr.init.fs_state(g);
|
err = g->ops.gr.init.fs_state(g);
|
||||||
if (err) {
|
if (err) {
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
|
|||||||
@@ -40,6 +40,7 @@
|
|||||||
#include "hal/fb/fb_gm20b.h"
|
#include "hal/fb/fb_gm20b.h"
|
||||||
#include "hal/fb/fb_gp10b.h"
|
#include "hal/fb/fb_gp10b.h"
|
||||||
#include "hal/fb/fb_gv11b.h"
|
#include "hal/fb/fb_gv11b.h"
|
||||||
|
#include "hal/gr/init/gr_init_gv11b.h"
|
||||||
|
|
||||||
#include "common/netlist/netlist_gv11b.h"
|
#include "common/netlist/netlist_gv11b.h"
|
||||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
||||||
@@ -131,10 +132,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
|
|||||||
},
|
},
|
||||||
.gr = {
|
.gr = {
|
||||||
.init_gpc_mmu = NULL,
|
.init_gpc_mmu = NULL,
|
||||||
.bundle_cb_defaults = gr_gv11b_bundle_cb_defaults,
|
|
||||||
.cb_size_default = gr_gv11b_cb_size_default,
|
|
||||||
.calc_global_ctx_buffer_size =
|
|
||||||
gr_gv11b_calc_global_ctx_buffer_size,
|
|
||||||
.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb,
|
.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb,
|
||||||
.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
|
.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
|
||||||
.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
|
.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
|
||||||
@@ -375,7 +372,27 @@ static const struct gpu_ops vgpu_gv11b_ops = {
|
|||||||
#endif /* CONFIG_GK20A_CTXSW_TRACE */
|
#endif /* CONFIG_GK20A_CTXSW_TRACE */
|
||||||
.init = {
|
.init = {
|
||||||
.fs_state = vgpu_gr_init_fs_state,
|
.fs_state = vgpu_gr_init_fs_state,
|
||||||
}
|
.get_bundle_cb_default_size =
|
||||||
|
gv11b_gr_init_get_bundle_cb_default_size,
|
||||||
|
.get_min_gpm_fifo_depth =
|
||||||
|
gv11b_gr_init_get_min_gpm_fifo_depth,
|
||||||
|
.get_bundle_cb_token_limit =
|
||||||
|
gv11b_gr_init_get_bundle_cb_token_limit,
|
||||||
|
.get_attrib_cb_default_size =
|
||||||
|
gv11b_gr_init_get_attrib_cb_default_size,
|
||||||
|
.get_alpha_cb_default_size =
|
||||||
|
gv11b_gr_init_get_alpha_cb_default_size,
|
||||||
|
.get_attrib_cb_gfxp_default_size =
|
||||||
|
gv11b_gr_init_get_attrib_cb_gfxp_default_size,
|
||||||
|
.get_attrib_cb_gfxp_size =
|
||||||
|
gv11b_gr_init_get_attrib_cb_gfxp_size,
|
||||||
|
.get_attrib_cb_size =
|
||||||
|
gv11b_gr_init_get_attrib_cb_size,
|
||||||
|
.get_alpha_cb_size =
|
||||||
|
gv11b_gr_init_get_alpha_cb_size,
|
||||||
|
.get_global_attr_cb_size =
|
||||||
|
gv11b_gr_init_get_global_attr_cb_size,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
.perf = {
|
.perf = {
|
||||||
.get_pmm_per_chiplet_offset =
|
.get_pmm_per_chiplet_offset =
|
||||||
|
|||||||
@@ -709,7 +709,7 @@ int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
|
|||||||
NVGPU_GR_CTX_CIRCULAR_VA) >>
|
NVGPU_GR_CTX_CIRCULAR_VA) >>
|
||||||
U64(gr_scc_bundle_cb_base_addr_39_8_align_bits_v());
|
U64(gr_scc_bundle_cb_base_addr_39_8_align_bits_v());
|
||||||
|
|
||||||
size = gr->bundle_cb_default_size;
|
size = g->ops.gr.init.get_bundle_cb_default_size(g);
|
||||||
|
|
||||||
nvgpu_log_info(g, "bundle cb addr : 0x%016llx, size : %d",
|
nvgpu_log_info(g, "bundle cb addr : 0x%016llx, size : %d",
|
||||||
addr, size);
|
addr, size);
|
||||||
@@ -1676,7 +1676,9 @@ int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
|
|||||||
nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer,
|
nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer,
|
||||||
NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR, size);
|
NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR, size);
|
||||||
|
|
||||||
size = g->ops.gr.calc_global_ctx_buffer_size(g);
|
size = g->ops.gr.init.get_global_attr_cb_size(g,
|
||||||
|
nvgpu_gr_config_get_tpc_count(g->gr.config),
|
||||||
|
nvgpu_gr_config_get_max_tpc_count(g->gr.config));
|
||||||
nvgpu_log_info(g, "attr_buffer_size : %u", size);
|
nvgpu_log_info(g, "attr_buffer_size : %u", size);
|
||||||
|
|
||||||
nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer,
|
nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer,
|
||||||
@@ -1993,19 +1995,22 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
|
|||||||
nvgpu_log_info(g, "fbps: %d", gr->num_fbps);
|
nvgpu_log_info(g, "fbps: %d", gr->num_fbps);
|
||||||
nvgpu_log_info(g, "max_fbps_count: %d", gr->max_fbps_count);
|
nvgpu_log_info(g, "max_fbps_count: %d", gr->max_fbps_count);
|
||||||
|
|
||||||
g->ops.gr.bundle_cb_defaults(g);
|
|
||||||
g->ops.gr.cb_size_default(g);
|
|
||||||
g->ops.gr.calc_global_ctx_buffer_size(g);
|
|
||||||
|
|
||||||
nvgpu_log_info(g, "bundle_cb_default_size: %d",
|
nvgpu_log_info(g, "bundle_cb_default_size: %d",
|
||||||
gr->bundle_cb_default_size);
|
g->ops.gr.init.get_bundle_cb_default_size(g));
|
||||||
nvgpu_log_info(g, "min_gpm_fifo_depth: %d", gr->min_gpm_fifo_depth);
|
nvgpu_log_info(g, "min_gpm_fifo_depth: %d",
|
||||||
nvgpu_log_info(g, "bundle_cb_token_limit: %d", gr->bundle_cb_token_limit);
|
g->ops.gr.init.get_min_gpm_fifo_depth(g));
|
||||||
|
nvgpu_log_info(g, "bundle_cb_token_limit: %d",
|
||||||
|
g->ops.gr.init.get_bundle_cb_token_limit(g));
|
||||||
nvgpu_log_info(g, "attrib_cb_default_size: %d",
|
nvgpu_log_info(g, "attrib_cb_default_size: %d",
|
||||||
gr->attrib_cb_default_size);
|
g->ops.gr.init.get_attrib_cb_default_size(g));
|
||||||
nvgpu_log_info(g, "attrib_cb_size: %d", gr->attrib_cb_size);
|
nvgpu_log_info(g, "attrib_cb_size: %d",
|
||||||
nvgpu_log_info(g, "alpha_cb_default_size: %d", gr->alpha_cb_default_size);
|
g->ops.gr.init.get_attrib_cb_size(g,
|
||||||
nvgpu_log_info(g, "alpha_cb_size: %d", gr->alpha_cb_size);
|
nvgpu_gr_config_get_tpc_count(gr->config)));
|
||||||
|
nvgpu_log_info(g, "alpha_cb_default_size: %d",
|
||||||
|
g->ops.gr.init.get_alpha_cb_default_size(g));
|
||||||
|
nvgpu_log_info(g, "alpha_cb_size: %d",
|
||||||
|
g->ops.gr.init.get_alpha_cb_size(g,
|
||||||
|
nvgpu_gr_config_get_tpc_count(gr->config)));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
@@ -6267,9 +6272,7 @@ u32 gr_gk20a_gpccs_falcon_base_addr(void)
|
|||||||
|
|
||||||
u32 gk20a_gr_get_global_ctx_cb_buffer_size(struct gk20a *g)
|
u32 gk20a_gr_get_global_ctx_cb_buffer_size(struct gk20a *g)
|
||||||
{
|
{
|
||||||
struct gr_gk20a *gr = &g->gr;
|
return g->ops.gr.init.get_bundle_cb_default_size(g) *
|
||||||
|
|
||||||
return gr->bundle_cb_default_size *
|
|
||||||
gr_scc_bundle_cb_size_div_256b_byte_granularity_v();
|
gr_scc_bundle_cb_size_div_256b_byte_granularity_v();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -201,16 +201,6 @@ struct gr_gk20a {
|
|||||||
u32 num_fbps;
|
u32 num_fbps;
|
||||||
u32 max_fbps_count;
|
u32 max_fbps_count;
|
||||||
|
|
||||||
u32 bundle_cb_default_size;
|
|
||||||
u32 min_gpm_fifo_depth;
|
|
||||||
u32 bundle_cb_token_limit;
|
|
||||||
u32 attrib_cb_default_size;
|
|
||||||
u32 attrib_cb_size;
|
|
||||||
u32 attrib_cb_gfxp_default_size;
|
|
||||||
u32 attrib_cb_gfxp_size;
|
|
||||||
u32 alpha_cb_default_size;
|
|
||||||
u32 alpha_cb_size;
|
|
||||||
|
|
||||||
u32 gfxp_wfi_timeout_count;
|
u32 gfxp_wfi_timeout_count;
|
||||||
bool gfxp_wfi_timeout_unit_usec;
|
bool gfxp_wfi_timeout_unit_usec;
|
||||||
|
|
||||||
|
|||||||
@@ -76,51 +76,6 @@ void gr_gm20b_init_gpc_mmu(struct gk20a *g)
|
|||||||
gk20a_writel(g, gr_gpcs_mmu_num_active_ltcs_r(), g->ltc_count);
|
gk20a_writel(g, gr_gpcs_mmu_num_active_ltcs_r(), g->ltc_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
void gr_gm20b_bundle_cb_defaults(struct gk20a *g)
|
|
||||||
{
|
|
||||||
struct gr_gk20a *gr = &g->gr;
|
|
||||||
|
|
||||||
gr->bundle_cb_default_size =
|
|
||||||
gr_scc_bundle_cb_size_div_256b__prod_v();
|
|
||||||
gr->min_gpm_fifo_depth =
|
|
||||||
gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v();
|
|
||||||
gr->bundle_cb_token_limit =
|
|
||||||
gr_pd_ab_dist_cfg2_token_limit_init_v();
|
|
||||||
}
|
|
||||||
|
|
||||||
void gr_gm20b_cb_size_default(struct gk20a *g)
|
|
||||||
{
|
|
||||||
struct gr_gk20a *gr = &g->gr;
|
|
||||||
|
|
||||||
if (gr->attrib_cb_default_size == 0U) {
|
|
||||||
gr->attrib_cb_default_size =
|
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
|
|
||||||
}
|
|
||||||
gr->alpha_cb_default_size =
|
|
||||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g)
|
|
||||||
{
|
|
||||||
struct gr_gk20a *gr = &g->gr;
|
|
||||||
u32 size;
|
|
||||||
|
|
||||||
gr->attrib_cb_size = gr->attrib_cb_default_size
|
|
||||||
+ (gr->attrib_cb_default_size >> 1);
|
|
||||||
gr->alpha_cb_size = gr->alpha_cb_default_size
|
|
||||||
+ (gr->alpha_cb_default_size >> 1);
|
|
||||||
|
|
||||||
size = gr->attrib_cb_size *
|
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
|
|
||||||
nvgpu_gr_config_get_max_tpc_count(gr->config);
|
|
||||||
|
|
||||||
size += gr->alpha_cb_size *
|
|
||||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() *
|
|
||||||
nvgpu_gr_config_get_max_tpc_count(gr->config);
|
|
||||||
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
void gr_gm20b_commit_global_attrib_cb(struct gk20a *g,
|
void gr_gm20b_commit_global_attrib_cb(struct gk20a *g,
|
||||||
struct nvgpu_gr_ctx *ch_ctx,
|
struct nvgpu_gr_ctx *ch_ctx,
|
||||||
u64 addr, bool patch)
|
u64 addr, bool patch)
|
||||||
@@ -143,6 +98,7 @@ void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
|
|||||||
u64 addr, u64 size, bool patch)
|
u64 addr, u64 size, bool patch)
|
||||||
{
|
{
|
||||||
u32 data;
|
u32 data;
|
||||||
|
u32 bundle_cb_token_limit = g->ops.gr.init.get_bundle_cb_token_limit(g);
|
||||||
|
|
||||||
nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(),
|
nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(),
|
||||||
gr_scc_bundle_cb_base_addr_39_8_f(addr), patch);
|
gr_scc_bundle_cb_base_addr_39_8_f(addr), patch);
|
||||||
@@ -159,17 +115,17 @@ void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
|
|||||||
gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch);
|
gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch);
|
||||||
|
|
||||||
/* data for state_limit */
|
/* data for state_limit */
|
||||||
data = (g->gr.bundle_cb_default_size *
|
data = (g->ops.gr.init.get_bundle_cb_default_size(g) *
|
||||||
gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
|
gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
|
||||||
gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
|
gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
|
||||||
|
|
||||||
data = min_t(u32, data, g->gr.min_gpm_fifo_depth);
|
data = min_t(u32, data, g->ops.gr.init.get_min_gpm_fifo_depth(g));
|
||||||
|
|
||||||
nvgpu_log_info(g, "bundle cb token limit : %d, state limit : %d",
|
nvgpu_log_info(g, "bundle cb token limit : %d, state limit : %d",
|
||||||
g->gr.bundle_cb_token_limit, data);
|
bundle_cb_token_limit, data);
|
||||||
|
|
||||||
nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(),
|
nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(),
|
||||||
gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) |
|
gr_pd_ab_dist_cfg2_token_limit_f(bundle_cb_token_limit) |
|
||||||
gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
|
gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -183,6 +139,12 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
|
|||||||
u32 pd_ab_max_output;
|
u32 pd_ab_max_output;
|
||||||
u32 gpc_index, ppc_index;
|
u32 gpc_index, ppc_index;
|
||||||
u32 cbm_cfg_size1, cbm_cfg_size2;
|
u32 cbm_cfg_size1, cbm_cfg_size2;
|
||||||
|
u32 attrib_cb_default_size = g->ops.gr.init.get_attrib_cb_default_size(g);
|
||||||
|
u32 alpha_cb_default_size = g->ops.gr.init.get_alpha_cb_default_size(g);
|
||||||
|
u32 attrib_cb_size = g->ops.gr.init.get_attrib_cb_size(g,
|
||||||
|
nvgpu_gr_config_get_tpc_count(gr->config));
|
||||||
|
u32 alpha_cb_size = g->ops.gr.init.get_alpha_cb_size(g,
|
||||||
|
nvgpu_gr_config_get_tpc_count(gr->config));
|
||||||
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
||||||
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
|
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
|
||||||
u32 num_pes_per_gpc = nvgpu_get_litter_value(g,
|
u32 num_pes_per_gpc = nvgpu_get_litter_value(g,
|
||||||
@@ -191,11 +153,11 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
|
|||||||
nvgpu_log_fn(g, " ");
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_r(),
|
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_r(),
|
||||||
gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) |
|
gr_ds_tga_constraintlogic_beta_cbsize_f(attrib_cb_default_size) |
|
||||||
gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size),
|
gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_default_size),
|
||||||
patch);
|
patch);
|
||||||
|
|
||||||
pd_ab_max_output = (gr->alpha_cb_default_size *
|
pd_ab_max_output = (alpha_cb_default_size *
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) /
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) /
|
||||||
gr_pd_ab_dist_cfg1_max_output_granularity_v();
|
gr_pd_ab_dist_cfg1_max_output_granularity_v();
|
||||||
|
|
||||||
@@ -204,7 +166,7 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
|
|||||||
gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
|
gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
|
||||||
|
|
||||||
alpha_offset_in_chunk = attrib_offset_in_chunk +
|
alpha_offset_in_chunk = attrib_offset_in_chunk +
|
||||||
nvgpu_gr_config_get_tpc_count(gr->config) * gr->attrib_cb_size;
|
nvgpu_gr_config_get_tpc_count(gr->config) * attrib_cb_size;
|
||||||
|
|
||||||
for (gpc_index = 0;
|
for (gpc_index = 0;
|
||||||
gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
|
gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
|
||||||
@@ -215,10 +177,10 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
|
|||||||
ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config,
|
ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config,
|
||||||
gpc_index);
|
gpc_index);
|
||||||
ppc_index++) {
|
ppc_index++) {
|
||||||
cbm_cfg_size1 = gr->attrib_cb_default_size *
|
cbm_cfg_size1 = attrib_cb_default_size *
|
||||||
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
||||||
gpc_index, ppc_index);
|
gpc_index, ppc_index);
|
||||||
cbm_cfg_size2 = gr->alpha_cb_default_size *
|
cbm_cfg_size2 = alpha_cb_default_size *
|
||||||
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
||||||
gpc_index, ppc_index);
|
gpc_index, ppc_index);
|
||||||
|
|
||||||
@@ -232,7 +194,7 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
|
|||||||
ppc_in_gpc_stride * ppc_index,
|
ppc_in_gpc_stride * ppc_index,
|
||||||
attrib_offset_in_chunk, patch);
|
attrib_offset_in_chunk, patch);
|
||||||
|
|
||||||
attrib_offset_in_chunk += gr->attrib_cb_size *
|
attrib_offset_in_chunk += attrib_cb_size *
|
||||||
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
||||||
gpc_index, ppc_index);
|
gpc_index, ppc_index);
|
||||||
|
|
||||||
@@ -246,7 +208,7 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
|
|||||||
ppc_in_gpc_stride * ppc_index,
|
ppc_in_gpc_stride * ppc_index,
|
||||||
alpha_offset_in_chunk, patch);
|
alpha_offset_in_chunk, patch);
|
||||||
|
|
||||||
alpha_offset_in_chunk += gr->alpha_cb_size *
|
alpha_offset_in_chunk += alpha_cb_size *
|
||||||
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
||||||
gpc_index, ppc_index);
|
gpc_index, ppc_index);
|
||||||
|
|
||||||
@@ -335,6 +297,8 @@ void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
|
|||||||
u32 gpc_index, ppc_index, stride, val;
|
u32 gpc_index, ppc_index, stride, val;
|
||||||
u32 pd_ab_max_output;
|
u32 pd_ab_max_output;
|
||||||
u32 alpha_cb_size = data * 4U;
|
u32 alpha_cb_size = data * 4U;
|
||||||
|
u32 alpha_cb_size_max = g->ops.gr.init.get_alpha_cb_size(g,
|
||||||
|
nvgpu_gr_config_get_tpc_count(gr->config));
|
||||||
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
||||||
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
|
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
|
||||||
|
|
||||||
@@ -342,8 +306,8 @@ void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
|
|||||||
/* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF)
|
/* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF)
|
||||||
return; */
|
return; */
|
||||||
|
|
||||||
if (alpha_cb_size > gr->alpha_cb_size) {
|
if (alpha_cb_size > alpha_cb_size_max) {
|
||||||
alpha_cb_size = gr->alpha_cb_size;
|
alpha_cb_size = alpha_cb_size_max;
|
||||||
}
|
}
|
||||||
|
|
||||||
gk20a_writel(g, gr_ds_tga_constraintlogic_r(),
|
gk20a_writel(g, gr_ds_tga_constraintlogic_r(),
|
||||||
@@ -391,11 +355,13 @@ void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data)
|
|||||||
u32 cb_size = data * 4U;
|
u32 cb_size = data * 4U;
|
||||||
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
||||||
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
|
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
|
||||||
|
u32 attrib_cb_size = g->ops.gr.init.get_attrib_cb_size(g,
|
||||||
|
nvgpu_gr_config_get_tpc_count(gr->config));
|
||||||
|
|
||||||
nvgpu_log_fn(g, " ");
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
if (cb_size > gr->attrib_cb_size) {
|
if (cb_size > attrib_cb_size) {
|
||||||
cb_size = gr->attrib_cb_size;
|
cb_size = attrib_cb_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
gk20a_writel(g, gr_ds_tga_constraintlogic_r(),
|
gk20a_writel(g, gr_ds_tga_constraintlogic_r(),
|
||||||
|
|||||||
@@ -50,9 +50,6 @@ int gm20b_gr_tpc_disable_override(struct gk20a *g, u32 mask);
|
|||||||
void gr_gm20b_set_rd_coalesce(struct gk20a *g, u32 data);
|
void gr_gm20b_set_rd_coalesce(struct gk20a *g, u32 data);
|
||||||
void gm20a_gr_disable_rd_coalesce(struct gk20a *g);
|
void gm20a_gr_disable_rd_coalesce(struct gk20a *g);
|
||||||
void gr_gm20b_init_gpc_mmu(struct gk20a *g);
|
void gr_gm20b_init_gpc_mmu(struct gk20a *g);
|
||||||
void gr_gm20b_bundle_cb_defaults(struct gk20a *g);
|
|
||||||
void gr_gm20b_cb_size_default(struct gk20a *g);
|
|
||||||
u32 gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g);
|
|
||||||
void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
|
void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
|
||||||
struct nvgpu_gr_ctx *ch_ctx,
|
struct nvgpu_gr_ctx *ch_ctx,
|
||||||
u64 addr, u64 size, bool patch);
|
u64 addr, u64 size, bool patch);
|
||||||
|
|||||||
@@ -231,10 +231,6 @@ static const struct gpu_ops gm20b_ops = {
|
|||||||
.gr = {
|
.gr = {
|
||||||
.get_patch_slots = gr_gk20a_get_patch_slots,
|
.get_patch_slots = gr_gk20a_get_patch_slots,
|
||||||
.init_gpc_mmu = gr_gm20b_init_gpc_mmu,
|
.init_gpc_mmu = gr_gm20b_init_gpc_mmu,
|
||||||
.bundle_cb_defaults = gr_gm20b_bundle_cb_defaults,
|
|
||||||
.cb_size_default = gr_gm20b_cb_size_default,
|
|
||||||
.calc_global_ctx_buffer_size =
|
|
||||||
gr_gm20b_calc_global_ctx_buffer_size,
|
|
||||||
.commit_global_attrib_cb = gr_gm20b_commit_global_attrib_cb,
|
.commit_global_attrib_cb = gr_gm20b_commit_global_attrib_cb,
|
||||||
.commit_global_bundle_cb = gr_gm20b_commit_global_bundle_cb,
|
.commit_global_bundle_cb = gr_gm20b_commit_global_bundle_cb,
|
||||||
.commit_global_cb_manager = gr_gm20b_commit_global_cb_manager,
|
.commit_global_cb_manager = gr_gm20b_commit_global_cb_manager,
|
||||||
@@ -441,6 +437,22 @@ static const struct gpu_ops gm20b_ops = {
|
|||||||
.load_method_init = gm20b_gr_init_load_method_init,
|
.load_method_init = gm20b_gr_init_load_method_init,
|
||||||
.commit_global_timeslice =
|
.commit_global_timeslice =
|
||||||
gm20b_gr_init_commit_global_timeslice,
|
gm20b_gr_init_commit_global_timeslice,
|
||||||
|
.get_bundle_cb_default_size =
|
||||||
|
gm20b_gr_init_get_bundle_cb_default_size,
|
||||||
|
.get_min_gpm_fifo_depth =
|
||||||
|
gm20b_gr_init_get_min_gpm_fifo_depth,
|
||||||
|
.get_bundle_cb_token_limit =
|
||||||
|
gm20b_gr_init_get_bundle_cb_token_limit,
|
||||||
|
.get_attrib_cb_default_size =
|
||||||
|
gm20b_gr_init_get_attrib_cb_default_size,
|
||||||
|
.get_alpha_cb_default_size =
|
||||||
|
gm20b_gr_init_get_alpha_cb_default_size,
|
||||||
|
.get_attrib_cb_size =
|
||||||
|
gm20b_gr_init_get_attrib_cb_size,
|
||||||
|
.get_alpha_cb_size =
|
||||||
|
gm20b_gr_init_get_alpha_cb_size,
|
||||||
|
.get_global_attr_cb_size =
|
||||||
|
gm20b_gr_init_get_global_attr_cb_size,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
.fb = {
|
.fb = {
|
||||||
|
|||||||
@@ -127,21 +127,6 @@ fail:
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void gr_gp106_cb_size_default(struct gk20a *g)
|
|
||||||
{
|
|
||||||
struct gr_gk20a *gr = &g->gr;
|
|
||||||
|
|
||||||
if (gr->attrib_cb_default_size == 0U) {
|
|
||||||
gr->attrib_cb_default_size = 0x800;
|
|
||||||
}
|
|
||||||
gr->alpha_cb_default_size =
|
|
||||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
|
|
||||||
gr->attrib_cb_gfxp_default_size =
|
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
|
|
||||||
gr->attrib_cb_gfxp_size =
|
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
|
|
||||||
}
|
|
||||||
|
|
||||||
int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g,
|
int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g,
|
||||||
struct nvgpu_gr_ctx *gr_ctx,
|
struct nvgpu_gr_ctx *gr_ctx,
|
||||||
struct vm_gk20a *vm, u32 class,
|
struct vm_gk20a *vm, u32 class,
|
||||||
@@ -178,10 +163,12 @@ int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g,
|
|||||||
gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
|
gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
|
||||||
u32 pagepool_size = g->ops.gr.pagepool_default_size(g) *
|
u32 pagepool_size = g->ops.gr.pagepool_default_size(g) *
|
||||||
gr_scc_pagepool_total_pages_byte_granularity_v();
|
gr_scc_pagepool_total_pages_byte_granularity_v();
|
||||||
u32 betacb_size = g->gr.attrib_cb_default_size +
|
u32 betacb_size = g->ops.gr.init.get_attrib_cb_default_size(g) +
|
||||||
(gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
|
(gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
||||||
u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
|
u32 alpha_cb_size = g->ops.gr.init.get_alpha_cb_size(g,
|
||||||
|
nvgpu_gr_config_get_tpc_count(g->gr.config));
|
||||||
|
u32 attrib_cb_size = (betacb_size + alpha_cb_size) *
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
|
||||||
nvgpu_gr_config_get_max_tpc_count(g->gr.config);
|
nvgpu_gr_config_get_max_tpc_count(g->gr.config);
|
||||||
attrib_cb_size = ALIGN(attrib_cb_size, 128);
|
attrib_cb_size = ALIGN(attrib_cb_size, 128);
|
||||||
|
|||||||
@@ -34,7 +34,6 @@ bool gr_gp106_is_valid_class(struct gk20a *g, u32 class_num);
|
|||||||
u32 gr_gp106_pagepool_default_size(struct gk20a *g);
|
u32 gr_gp106_pagepool_default_size(struct gk20a *g);
|
||||||
int gr_gp106_handle_sw_method(struct gk20a *g, u32 addr,
|
int gr_gp106_handle_sw_method(struct gk20a *g, u32 addr,
|
||||||
u32 class_num, u32 offset, u32 data);
|
u32 class_num, u32 offset, u32 data);
|
||||||
void gr_gp106_cb_size_default(struct gk20a *g);
|
|
||||||
int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g,
|
int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g,
|
||||||
struct nvgpu_gr_ctx *gr_ctx,
|
struct nvgpu_gr_ctx *gr_ctx,
|
||||||
struct vm_gk20a *vm, u32 class,
|
struct vm_gk20a *vm, u32 class,
|
||||||
|
|||||||
@@ -403,6 +403,12 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
|
|||||||
u32 temp, temp2;
|
u32 temp, temp2;
|
||||||
u32 cbm_cfg_size_beta, cbm_cfg_size_alpha, cbm_cfg_size_steadystate;
|
u32 cbm_cfg_size_beta, cbm_cfg_size_alpha, cbm_cfg_size_steadystate;
|
||||||
u32 attrib_size_in_chunk, cb_attrib_cache_size_init;
|
u32 attrib_size_in_chunk, cb_attrib_cache_size_init;
|
||||||
|
u32 attrib_cb_default_size = g->ops.gr.init.get_attrib_cb_default_size(g);
|
||||||
|
u32 alpha_cb_default_size = g->ops.gr.init.get_alpha_cb_default_size(g);
|
||||||
|
u32 attrib_cb_size = g->ops.gr.init.get_attrib_cb_size(g,
|
||||||
|
nvgpu_gr_config_get_tpc_count(gr->config));
|
||||||
|
u32 alpha_cb_size = g->ops.gr.init.get_alpha_cb_size(g,
|
||||||
|
nvgpu_gr_config_get_tpc_count(gr->config));
|
||||||
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
||||||
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
|
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
|
||||||
u32 num_pes_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC);
|
u32 num_pes_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC);
|
||||||
@@ -410,19 +416,21 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
|
|||||||
nvgpu_log_fn(g, " ");
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
|
if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
|
||||||
attrib_size_in_chunk = gr->attrib_cb_gfxp_size;
|
attrib_size_in_chunk =
|
||||||
cb_attrib_cache_size_init = gr->attrib_cb_gfxp_default_size;
|
g->ops.gr.init.get_attrib_cb_gfxp_size(g);
|
||||||
|
cb_attrib_cache_size_init =
|
||||||
|
g->ops.gr.init.get_attrib_cb_gfxp_default_size(g);
|
||||||
} else {
|
} else {
|
||||||
attrib_size_in_chunk = gr->attrib_cb_size;
|
attrib_size_in_chunk = attrib_cb_size;
|
||||||
cb_attrib_cache_size_init = gr->attrib_cb_default_size;
|
cb_attrib_cache_size_init = attrib_cb_default_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_beta_r(),
|
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_beta_r(),
|
||||||
gr->attrib_cb_default_size, patch);
|
attrib_cb_default_size, patch);
|
||||||
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_alpha_r(),
|
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_alpha_r(),
|
||||||
gr->alpha_cb_default_size, patch);
|
alpha_cb_default_size, patch);
|
||||||
|
|
||||||
pd_ab_max_output = (gr->alpha_cb_default_size *
|
pd_ab_max_output = (alpha_cb_default_size *
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) /
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) /
|
||||||
gr_pd_ab_dist_cfg1_max_output_granularity_v();
|
gr_pd_ab_dist_cfg1_max_output_granularity_v();
|
||||||
|
|
||||||
@@ -431,7 +439,7 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
|
|||||||
gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
|
gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
|
||||||
|
|
||||||
attrib_offset_in_chunk = alpha_offset_in_chunk +
|
attrib_offset_in_chunk = alpha_offset_in_chunk +
|
||||||
nvgpu_gr_config_get_tpc_count(gr->config) * gr->alpha_cb_size;
|
nvgpu_gr_config_get_tpc_count(gr->config) * alpha_cb_size;
|
||||||
|
|
||||||
for (gpc_index = 0;
|
for (gpc_index = 0;
|
||||||
gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
|
gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
|
||||||
@@ -444,10 +452,10 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
|
|||||||
cbm_cfg_size_beta = cb_attrib_cache_size_init *
|
cbm_cfg_size_beta = cb_attrib_cache_size_init *
|
||||||
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
||||||
gpc_index, ppc_index);
|
gpc_index, ppc_index);
|
||||||
cbm_cfg_size_alpha = gr->alpha_cb_default_size *
|
cbm_cfg_size_alpha = alpha_cb_default_size *
|
||||||
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
||||||
gpc_index, ppc_index);
|
gpc_index, ppc_index);
|
||||||
cbm_cfg_size_steadystate = gr->attrib_cb_default_size *
|
cbm_cfg_size_steadystate = attrib_cb_default_size *
|
||||||
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
||||||
gpc_index, ppc_index);
|
gpc_index, ppc_index);
|
||||||
|
|
||||||
@@ -481,7 +489,7 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
|
|||||||
ppc_in_gpc_stride * ppc_index,
|
ppc_in_gpc_stride * ppc_index,
|
||||||
alpha_offset_in_chunk, patch);
|
alpha_offset_in_chunk, patch);
|
||||||
|
|
||||||
alpha_offset_in_chunk += gr->alpha_cb_size *
|
alpha_offset_in_chunk += alpha_cb_size *
|
||||||
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
||||||
gpc_index, ppc_index);
|
gpc_index, ppc_index);
|
||||||
|
|
||||||
@@ -520,34 +528,6 @@ u32 gr_gp10b_pagepool_default_size(struct gk20a *g)
|
|||||||
return gr_scc_pagepool_total_pages_hwmax_value_v();
|
return gr_scc_pagepool_total_pages_hwmax_value_v();
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g)
|
|
||||||
{
|
|
||||||
struct gr_gk20a *gr = &g->gr;
|
|
||||||
u32 size;
|
|
||||||
|
|
||||||
gr->attrib_cb_size = gr->attrib_cb_default_size;
|
|
||||||
gr->alpha_cb_size = gr->alpha_cb_default_size;
|
|
||||||
|
|
||||||
gr->attrib_cb_size = min(gr->attrib_cb_size,
|
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~U32(0U)) /
|
|
||||||
nvgpu_gr_config_get_tpc_count(gr->config));
|
|
||||||
gr->alpha_cb_size = min(gr->alpha_cb_size,
|
|
||||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~U32(0U)) /
|
|
||||||
nvgpu_gr_config_get_tpc_count(gr->config));
|
|
||||||
|
|
||||||
size = gr->attrib_cb_size *
|
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
|
|
||||||
nvgpu_gr_config_get_max_tpc_count(gr->config);
|
|
||||||
|
|
||||||
size += gr->alpha_cb_size *
|
|
||||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() *
|
|
||||||
nvgpu_gr_config_get_max_tpc_count(gr->config);
|
|
||||||
|
|
||||||
size = ALIGN(size, 128);
|
|
||||||
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void gr_gp10b_set_go_idle_timeout(struct gk20a *g, u32 data)
|
static void gr_gp10b_set_go_idle_timeout(struct gk20a *g, u32 data)
|
||||||
{
|
{
|
||||||
gk20a_writel(g, gr_fe_go_idle_timeout_r(), data);
|
gk20a_writel(g, gr_fe_go_idle_timeout_r(), data);
|
||||||
@@ -670,38 +650,21 @@ fail:
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void gr_gp10b_cb_size_default(struct gk20a *g)
|
|
||||||
{
|
|
||||||
struct gr_gk20a *gr = &g->gr;
|
|
||||||
|
|
||||||
if (gr->attrib_cb_default_size == 0U) {
|
|
||||||
gr->attrib_cb_default_size = 0x800;
|
|
||||||
}
|
|
||||||
gr->alpha_cb_default_size =
|
|
||||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
|
|
||||||
gr->attrib_cb_gfxp_default_size =
|
|
||||||
gr->attrib_cb_default_size +
|
|
||||||
(gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
|
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
|
||||||
gr->attrib_cb_gfxp_size =
|
|
||||||
gr->attrib_cb_default_size +
|
|
||||||
(gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
|
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
|
||||||
}
|
|
||||||
|
|
||||||
void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
|
void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
|
||||||
{
|
{
|
||||||
struct gr_gk20a *gr = &g->gr;
|
struct gr_gk20a *gr = &g->gr;
|
||||||
u32 gpc_index, ppc_index, stride, val;
|
u32 gpc_index, ppc_index, stride, val;
|
||||||
u32 pd_ab_max_output;
|
u32 pd_ab_max_output;
|
||||||
u32 alpha_cb_size = data * 4U;
|
u32 alpha_cb_size = data * 4U;
|
||||||
|
u32 alpha_cb_size_max = g->ops.gr.init.get_alpha_cb_size(g,
|
||||||
|
nvgpu_gr_config_get_tpc_count(gr->config));
|
||||||
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
||||||
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
|
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
|
||||||
|
|
||||||
nvgpu_log_fn(g, " ");
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
if (alpha_cb_size > gr->alpha_cb_size) {
|
if (alpha_cb_size > alpha_cb_size_max) {
|
||||||
alpha_cb_size = gr->alpha_cb_size;
|
alpha_cb_size = alpha_cb_size_max;
|
||||||
}
|
}
|
||||||
|
|
||||||
gk20a_writel(g, gr_ds_tga_constraintlogic_alpha_r(),
|
gk20a_writel(g, gr_ds_tga_constraintlogic_alpha_r(),
|
||||||
@@ -749,11 +712,13 @@ void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
|
|||||||
u32 cb_size_steady = data * 4U, cb_size;
|
u32 cb_size_steady = data * 4U, cb_size;
|
||||||
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
||||||
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
|
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
|
||||||
|
u32 attrib_cb_size = g->ops.gr.init.get_attrib_cb_size(g,
|
||||||
|
nvgpu_gr_config_get_tpc_count(gr->config));
|
||||||
|
|
||||||
nvgpu_log_fn(g, " ");
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
if (cb_size_steady > gr->attrib_cb_size) {
|
if (cb_size_steady > attrib_cb_size) {
|
||||||
cb_size_steady = gr->attrib_cb_size;
|
cb_size_steady = attrib_cb_size;
|
||||||
}
|
}
|
||||||
if (gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r()) !=
|
if (gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r()) !=
|
||||||
gk20a_readl(g,
|
gk20a_readl(g,
|
||||||
@@ -1216,7 +1181,9 @@ void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
|
|||||||
if (gr_ctx->preempt_ctxsw_buffer.gpu_va != 0ULL) {
|
if (gr_ctx->preempt_ctxsw_buffer.gpu_va != 0ULL) {
|
||||||
attrBufferSize = U32(gr_ctx->betacb_ctxsw_buffer.size);
|
attrBufferSize = U32(gr_ctx->betacb_ctxsw_buffer.size);
|
||||||
} else {
|
} else {
|
||||||
attrBufferSize = g->ops.gr.calc_global_ctx_buffer_size(g);
|
attrBufferSize = g->ops.gr.init.get_global_attr_cb_size(g,
|
||||||
|
nvgpu_gr_config_get_tpc_count(g->gr.config),
|
||||||
|
nvgpu_gr_config_get_max_tpc_count(g->gr.config));
|
||||||
}
|
}
|
||||||
|
|
||||||
attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f();
|
attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f();
|
||||||
@@ -1241,6 +1208,7 @@ void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
|
|||||||
u64 addr, u64 size, bool patch)
|
u64 addr, u64 size, bool patch)
|
||||||
{
|
{
|
||||||
u32 data;
|
u32 data;
|
||||||
|
u32 bundle_cb_token_limit = g->ops.gr.init.get_bundle_cb_token_limit(g);
|
||||||
|
|
||||||
nvgpu_assert(u64_hi32(addr) == 0U);
|
nvgpu_assert(u64_hi32(addr) == 0U);
|
||||||
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_base_r(),
|
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_base_r(),
|
||||||
@@ -1259,17 +1227,17 @@ void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
|
|||||||
gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch);
|
gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch);
|
||||||
|
|
||||||
/* data for state_limit */
|
/* data for state_limit */
|
||||||
data = (g->gr.bundle_cb_default_size *
|
data = (g->ops.gr.init.get_bundle_cb_default_size(g) *
|
||||||
gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
|
gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
|
||||||
gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
|
gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
|
||||||
|
|
||||||
data = min_t(u32, data, g->gr.min_gpm_fifo_depth);
|
data = min_t(u32, data, g->ops.gr.init.get_min_gpm_fifo_depth(g));
|
||||||
|
|
||||||
nvgpu_log_info(g, "bundle cb token limit : %d, state limit : %d",
|
nvgpu_log_info(g, "bundle cb token limit : %d, state limit : %d",
|
||||||
g->gr.bundle_cb_token_limit, data);
|
bundle_cb_token_limit, data);
|
||||||
|
|
||||||
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg2_r(),
|
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg2_r(),
|
||||||
gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) |
|
gr_pd_ab_dist_cfg2_token_limit_f(bundle_cb_token_limit) |
|
||||||
gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
|
gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1955,24 +1923,31 @@ unsigned long gr_gp10b_get_max_gfxp_wfi_timeout_count(struct gk20a *g)
|
|||||||
return (100UL * 1000UL * 1000UL);
|
return (100UL * 1000UL * 1000UL);
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 gp10b_gr_get_ctx_spill_size(struct gk20a *g) {
|
u32 gp10b_gr_get_ctx_spill_size(struct gk20a *g)
|
||||||
|
{
|
||||||
return gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
|
return gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
|
||||||
gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
|
gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 gp10b_gr_get_ctx_pagepool_size(struct gk20a *g) {
|
u32 gp10b_gr_get_ctx_pagepool_size(struct gk20a *g)
|
||||||
|
{
|
||||||
return g->ops.gr.pagepool_default_size(g) *
|
return g->ops.gr.pagepool_default_size(g) *
|
||||||
gr_scc_pagepool_total_pages_byte_granularity_v();
|
gr_scc_pagepool_total_pages_byte_granularity_v();
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 gp10b_gr_get_ctx_betacb_size(struct gk20a *g) {
|
u32 gp10b_gr_get_ctx_betacb_size(struct gk20a *g)
|
||||||
return g->gr.attrib_cb_default_size +
|
{
|
||||||
|
return g->ops.gr.init.get_attrib_cb_default_size(g) +
|
||||||
(gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
|
(gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 gp10b_gr_get_ctx_attrib_cb_size(struct gk20a *g, u32 betacb_size) {
|
u32 gp10b_gr_get_ctx_attrib_cb_size(struct gk20a *g, u32 betacb_size)
|
||||||
return (betacb_size + g->gr.alpha_cb_size) *
|
{
|
||||||
|
u32 alpha_cb_size = g->ops.gr.init.get_alpha_cb_size(g,
|
||||||
|
nvgpu_gr_config_get_tpc_count(g->gr.config));
|
||||||
|
|
||||||
|
return (betacb_size + alpha_cb_size) *
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
|
||||||
nvgpu_gr_config_get_max_tpc_count(g->gr.config);
|
nvgpu_gr_config_get_max_tpc_count(g->gr.config);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -76,12 +76,10 @@ void gr_gp10b_commit_global_pagepool(struct gk20a *g,
|
|||||||
struct nvgpu_gr_ctx *gr_ctx,
|
struct nvgpu_gr_ctx *gr_ctx,
|
||||||
u64 addr, u32 size, bool patch);
|
u64 addr, u32 size, bool patch);
|
||||||
u32 gr_gp10b_pagepool_default_size(struct gk20a *g);
|
u32 gr_gp10b_pagepool_default_size(struct gk20a *g);
|
||||||
u32 gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g);
|
|
||||||
void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data);
|
void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data);
|
||||||
void gr_gp10b_set_bes_crop_debug4(struct gk20a *g, u32 data);
|
void gr_gp10b_set_bes_crop_debug4(struct gk20a *g, u32 data);
|
||||||
int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr,
|
int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr,
|
||||||
u32 class_num, u32 offset, u32 data);
|
u32 class_num, u32 offset, u32 data);
|
||||||
void gr_gp10b_cb_size_default(struct gk20a *g);
|
|
||||||
void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data);
|
void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data);
|
||||||
void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data);
|
void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data);
|
||||||
int gr_gp10b_init_ctx_state(struct gk20a *g);
|
int gr_gp10b_init_ctx_state(struct gk20a *g);
|
||||||
|
|||||||
@@ -254,10 +254,6 @@ static const struct gpu_ops gp10b_ops = {
|
|||||||
.gr = {
|
.gr = {
|
||||||
.get_patch_slots = gr_gk20a_get_patch_slots,
|
.get_patch_slots = gr_gk20a_get_patch_slots,
|
||||||
.init_gpc_mmu = gr_gm20b_init_gpc_mmu,
|
.init_gpc_mmu = gr_gm20b_init_gpc_mmu,
|
||||||
.bundle_cb_defaults = gr_gm20b_bundle_cb_defaults,
|
|
||||||
.cb_size_default = gr_gp10b_cb_size_default,
|
|
||||||
.calc_global_ctx_buffer_size =
|
|
||||||
gr_gp10b_calc_global_ctx_buffer_size,
|
|
||||||
.commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb,
|
.commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb,
|
||||||
.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
|
.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
|
||||||
.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
|
.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
|
||||||
@@ -513,6 +509,26 @@ static const struct gpu_ops gp10b_ops = {
|
|||||||
.load_method_init = gm20b_gr_init_load_method_init,
|
.load_method_init = gm20b_gr_init_load_method_init,
|
||||||
.commit_global_timeslice =
|
.commit_global_timeslice =
|
||||||
gm20b_gr_init_commit_global_timeslice,
|
gm20b_gr_init_commit_global_timeslice,
|
||||||
|
.get_bundle_cb_default_size =
|
||||||
|
gm20b_gr_init_get_bundle_cb_default_size,
|
||||||
|
.get_min_gpm_fifo_depth =
|
||||||
|
gm20b_gr_init_get_min_gpm_fifo_depth,
|
||||||
|
.get_bundle_cb_token_limit =
|
||||||
|
gm20b_gr_init_get_bundle_cb_token_limit,
|
||||||
|
.get_attrib_cb_default_size =
|
||||||
|
gp10b_gr_init_get_attrib_cb_default_size,
|
||||||
|
.get_alpha_cb_default_size =
|
||||||
|
gp10b_gr_init_get_alpha_cb_default_size,
|
||||||
|
.get_attrib_cb_gfxp_default_size =
|
||||||
|
gp10b_gr_init_get_attrib_cb_gfxp_default_size,
|
||||||
|
.get_attrib_cb_gfxp_size =
|
||||||
|
gp10b_gr_init_get_attrib_cb_gfxp_size,
|
||||||
|
.get_attrib_cb_size =
|
||||||
|
gp10b_gr_init_get_attrib_cb_size,
|
||||||
|
.get_alpha_cb_size =
|
||||||
|
gp10b_gr_init_get_alpha_cb_size,
|
||||||
|
.get_global_attr_cb_size =
|
||||||
|
gp10b_gr_init_get_global_attr_cb_size,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
.fb = {
|
.fb = {
|
||||||
|
|||||||
@@ -191,30 +191,6 @@ free_resources:
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
void gr_gv100_bundle_cb_defaults(struct gk20a *g)
|
|
||||||
{
|
|
||||||
struct gr_gk20a *gr = &g->gr;
|
|
||||||
|
|
||||||
gr->bundle_cb_default_size =
|
|
||||||
gr_scc_bundle_cb_size_div_256b__prod_v();
|
|
||||||
gr->min_gpm_fifo_depth =
|
|
||||||
gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v();
|
|
||||||
gr->bundle_cb_token_limit =
|
|
||||||
gr_pd_ab_dist_cfg2_token_limit_init_v();
|
|
||||||
}
|
|
||||||
|
|
||||||
void gr_gv100_cb_size_default(struct gk20a *g)
|
|
||||||
{
|
|
||||||
struct gr_gk20a *gr = &g->gr;
|
|
||||||
|
|
||||||
if (gr->attrib_cb_default_size == 0U) {
|
|
||||||
gr->attrib_cb_default_size =
|
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
|
|
||||||
}
|
|
||||||
gr->alpha_cb_default_size =
|
|
||||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
|
|
||||||
}
|
|
||||||
|
|
||||||
void gr_gv100_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
|
void gr_gv100_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -25,8 +25,6 @@
|
|||||||
#ifndef NVGPU_GR_GV100_H
|
#ifndef NVGPU_GR_GV100_H
|
||||||
#define NVGPU_GR_GV100_H
|
#define NVGPU_GR_GV100_H
|
||||||
|
|
||||||
void gr_gv100_bundle_cb_defaults(struct gk20a *g);
|
|
||||||
void gr_gv100_cb_size_default(struct gk20a *g);
|
|
||||||
void gr_gv100_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
|
void gr_gv100_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
|
||||||
int gr_gv100_init_sm_id_table(struct gk20a *g);
|
int gr_gv100_init_sm_id_table(struct gk20a *g);
|
||||||
void gr_gv100_program_sm_id_numbering(struct gk20a *g,
|
void gr_gv100_program_sm_id_numbering(struct gk20a *g,
|
||||||
|
|||||||
@@ -58,6 +58,7 @@
|
|||||||
#include "hal/gr/zbc/zbc_gv11b.h"
|
#include "hal/gr/zbc/zbc_gv11b.h"
|
||||||
#include "hal/gr/init/gr_init_gm20b.h"
|
#include "hal/gr/init/gr_init_gm20b.h"
|
||||||
#include "hal/gr/init/gr_init_gp10b.h"
|
#include "hal/gr/init/gr_init_gp10b.h"
|
||||||
|
#include "hal/gr/init/gr_init_gv100.h"
|
||||||
#include "hal/gr/init/gr_init_gv11b.h"
|
#include "hal/gr/init/gr_init_gv11b.h"
|
||||||
#include "hal/gr/hwpm_map/hwpm_map_gv100.h"
|
#include "hal/gr/hwpm_map/hwpm_map_gv100.h"
|
||||||
#include "hal/falcon/falcon_gk20a.h"
|
#include "hal/falcon/falcon_gk20a.h"
|
||||||
@@ -361,10 +362,6 @@ static const struct gpu_ops gv100_ops = {
|
|||||||
.gr = {
|
.gr = {
|
||||||
.get_patch_slots = gr_gv100_get_patch_slots,
|
.get_patch_slots = gr_gv100_get_patch_slots,
|
||||||
.init_gpc_mmu = gr_gv11b_init_gpc_mmu,
|
.init_gpc_mmu = gr_gv11b_init_gpc_mmu,
|
||||||
.bundle_cb_defaults = gr_gv100_bundle_cb_defaults,
|
|
||||||
.cb_size_default = gr_gv100_cb_size_default,
|
|
||||||
.calc_global_ctx_buffer_size =
|
|
||||||
gr_gv11b_calc_global_ctx_buffer_size,
|
|
||||||
.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb,
|
.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb,
|
||||||
.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
|
.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
|
||||||
.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
|
.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
|
||||||
@@ -651,6 +648,26 @@ static const struct gpu_ops gv100_ops = {
|
|||||||
.load_method_init = gm20b_gr_init_load_method_init,
|
.load_method_init = gm20b_gr_init_load_method_init,
|
||||||
.commit_global_timeslice =
|
.commit_global_timeslice =
|
||||||
gv11b_gr_init_commit_global_timeslice,
|
gv11b_gr_init_commit_global_timeslice,
|
||||||
|
.get_bundle_cb_default_size =
|
||||||
|
gv100_gr_init_get_bundle_cb_default_size,
|
||||||
|
.get_min_gpm_fifo_depth =
|
||||||
|
gv100_gr_init_get_min_gpm_fifo_depth,
|
||||||
|
.get_bundle_cb_token_limit =
|
||||||
|
gv100_gr_init_get_bundle_cb_token_limit,
|
||||||
|
.get_attrib_cb_default_size =
|
||||||
|
gv100_gr_init_get_attrib_cb_default_size,
|
||||||
|
.get_alpha_cb_default_size =
|
||||||
|
gv100_gr_init_get_alpha_cb_default_size,
|
||||||
|
.get_attrib_cb_gfxp_default_size =
|
||||||
|
gv100_gr_init_get_attrib_cb_gfxp_default_size,
|
||||||
|
.get_attrib_cb_gfxp_size =
|
||||||
|
gv100_gr_init_get_attrib_cb_gfxp_size,
|
||||||
|
.get_attrib_cb_size =
|
||||||
|
gv11b_gr_init_get_attrib_cb_size,
|
||||||
|
.get_alpha_cb_size =
|
||||||
|
gv11b_gr_init_get_alpha_cb_size,
|
||||||
|
.get_global_attr_cb_size =
|
||||||
|
gv11b_gr_init_get_global_attr_cb_size,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
.fb = {
|
.fb = {
|
||||||
|
|||||||
@@ -1158,34 +1158,6 @@ u32 gr_gv11b_pagepool_default_size(struct gk20a *g)
|
|||||||
return gr_scc_pagepool_total_pages_hwmax_value_v();
|
return gr_scc_pagepool_total_pages_hwmax_value_v();
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 gr_gv11b_calc_global_ctx_buffer_size(struct gk20a *g)
|
|
||||||
{
|
|
||||||
struct gr_gk20a *gr = &g->gr;
|
|
||||||
u32 size;
|
|
||||||
|
|
||||||
gr->attrib_cb_size = gr->attrib_cb_default_size;
|
|
||||||
gr->alpha_cb_size = gr->alpha_cb_default_size;
|
|
||||||
|
|
||||||
gr->attrib_cb_size = min(gr->attrib_cb_size,
|
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~U32(0U)) /
|
|
||||||
nvgpu_gr_config_get_tpc_count(gr->config));
|
|
||||||
gr->alpha_cb_size = min(gr->alpha_cb_size,
|
|
||||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~U32(0U)) /
|
|
||||||
nvgpu_gr_config_get_tpc_count(gr->config));
|
|
||||||
|
|
||||||
size = gr->attrib_cb_size *
|
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
|
|
||||||
nvgpu_gr_config_get_max_tpc_count(gr->config);
|
|
||||||
|
|
||||||
size += gr->alpha_cb_size *
|
|
||||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() *
|
|
||||||
nvgpu_gr_config_get_max_tpc_count(gr->config);
|
|
||||||
|
|
||||||
size = ALIGN(size, 128);
|
|
||||||
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
void gr_gv11b_set_go_idle_timeout(struct gk20a *g, u32 data)
|
void gr_gv11b_set_go_idle_timeout(struct gk20a *g, u32 data)
|
||||||
{
|
{
|
||||||
gk20a_writel(g, gr_fe_go_idle_timeout_r(), data);
|
gk20a_writel(g, gr_fe_go_idle_timeout_r(), data);
|
||||||
@@ -1355,45 +1327,19 @@ fail:
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void gr_gv11b_bundle_cb_defaults(struct gk20a *g)
|
|
||||||
{
|
|
||||||
struct gr_gk20a *gr = &g->gr;
|
|
||||||
|
|
||||||
gr->bundle_cb_default_size =
|
|
||||||
gr_scc_bundle_cb_size_div_256b__prod_v();
|
|
||||||
gr->min_gpm_fifo_depth =
|
|
||||||
gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v();
|
|
||||||
gr->bundle_cb_token_limit =
|
|
||||||
gr_pd_ab_dist_cfg2_token_limit_init_v();
|
|
||||||
}
|
|
||||||
|
|
||||||
void gr_gv11b_cb_size_default(struct gk20a *g)
|
|
||||||
{
|
|
||||||
struct gr_gk20a *gr = &g->gr;
|
|
||||||
|
|
||||||
if (gr->attrib_cb_default_size == 0U) {
|
|
||||||
gr->attrib_cb_default_size =
|
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
|
|
||||||
}
|
|
||||||
gr->alpha_cb_default_size =
|
|
||||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
|
|
||||||
gr->attrib_cb_gfxp_default_size =
|
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
|
|
||||||
gr->attrib_cb_gfxp_size =
|
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
|
|
||||||
}
|
|
||||||
|
|
||||||
void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
|
void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
|
||||||
{
|
{
|
||||||
struct gr_gk20a *gr = &g->gr;
|
struct gr_gk20a *gr = &g->gr;
|
||||||
u32 gpc_index, ppc_index, stride, val;
|
u32 gpc_index, ppc_index, stride, val;
|
||||||
u32 pd_ab_max_output;
|
u32 pd_ab_max_output;
|
||||||
u32 alpha_cb_size = data * 4U;
|
u32 alpha_cb_size = data * 4U;
|
||||||
|
u32 alpha_cb_size_max = g->ops.gr.init.get_alpha_cb_size(g,
|
||||||
|
nvgpu_gr_config_get_tpc_count(gr->config));
|
||||||
|
|
||||||
nvgpu_log_fn(g, " ");
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
if (alpha_cb_size > gr->alpha_cb_size) {
|
if (alpha_cb_size > alpha_cb_size_max) {
|
||||||
alpha_cb_size = gr->alpha_cb_size;
|
alpha_cb_size = alpha_cb_size_max;
|
||||||
}
|
}
|
||||||
|
|
||||||
gk20a_writel(g, gr_ds_tga_constraintlogic_alpha_r(),
|
gk20a_writel(g, gr_ds_tga_constraintlogic_alpha_r(),
|
||||||
@@ -1438,11 +1384,13 @@ void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data)
|
|||||||
struct gr_gk20a *gr = &g->gr;
|
struct gr_gk20a *gr = &g->gr;
|
||||||
u32 gpc_index, ppc_index, stride, val;
|
u32 gpc_index, ppc_index, stride, val;
|
||||||
u32 cb_size_steady = data * 4U, cb_size;
|
u32 cb_size_steady = data * 4U, cb_size;
|
||||||
|
u32 attrib_cb_size = g->ops.gr.init.get_attrib_cb_size(g,
|
||||||
|
nvgpu_gr_config_get_tpc_count(gr->config));
|
||||||
|
|
||||||
nvgpu_log_fn(g, " ");
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
if (cb_size_steady > gr->attrib_cb_size) {
|
if (cb_size_steady > attrib_cb_size) {
|
||||||
cb_size_steady = gr->attrib_cb_size;
|
cb_size_steady = attrib_cb_size;
|
||||||
}
|
}
|
||||||
if (gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r()) !=
|
if (gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r()) !=
|
||||||
gk20a_readl(g,
|
gk20a_readl(g,
|
||||||
@@ -1834,7 +1782,9 @@ void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
|
|||||||
if (gr_ctx->preempt_ctxsw_buffer.gpu_va != 0ULL) {
|
if (gr_ctx->preempt_ctxsw_buffer.gpu_va != 0ULL) {
|
||||||
attrBufferSize = U32(gr_ctx->betacb_ctxsw_buffer.size);
|
attrBufferSize = U32(gr_ctx->betacb_ctxsw_buffer.size);
|
||||||
} else {
|
} else {
|
||||||
attrBufferSize = g->ops.gr.calc_global_ctx_buffer_size(g);
|
attrBufferSize = g->ops.gr.init.get_global_attr_cb_size(g,
|
||||||
|
nvgpu_gr_config_get_tpc_count(g->gr.config),
|
||||||
|
nvgpu_gr_config_get_max_tpc_count(g->gr.config));
|
||||||
}
|
}
|
||||||
|
|
||||||
attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f();
|
attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f();
|
||||||
@@ -4441,24 +4391,31 @@ fail:
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 gv11b_gr_get_ctx_spill_size(struct gk20a *g) {
|
u32 gv11b_gr_get_ctx_spill_size(struct gk20a *g)
|
||||||
|
{
|
||||||
return gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
|
return gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
|
||||||
gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
|
gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 gv11b_gr_get_ctx_pagepool_size(struct gk20a *g) {
|
u32 gv11b_gr_get_ctx_pagepool_size(struct gk20a *g)
|
||||||
|
{
|
||||||
return g->ops.gr.pagepool_default_size(g) *
|
return g->ops.gr.pagepool_default_size(g) *
|
||||||
gr_scc_pagepool_total_pages_byte_granularity_v();
|
gr_scc_pagepool_total_pages_byte_granularity_v();
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 gv11b_gr_get_ctx_betacb_size(struct gk20a *g) {
|
u32 gv11b_gr_get_ctx_betacb_size(struct gk20a *g)
|
||||||
return g->gr.attrib_cb_default_size +
|
{
|
||||||
|
return g->ops.gr.init.get_attrib_cb_default_size(g) +
|
||||||
(gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
|
(gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 gv11b_gr_get_ctx_attrib_cb_size(struct gk20a *g, u32 betacb_size) {
|
u32 gv11b_gr_get_ctx_attrib_cb_size(struct gk20a *g, u32 betacb_size)
|
||||||
return (betacb_size + g->gr.alpha_cb_size) *
|
{
|
||||||
|
u32 alpha_cb_size = g->ops.gr.init.get_alpha_cb_size(g,
|
||||||
|
nvgpu_gr_config_get_tpc_count(g->gr.config));
|
||||||
|
|
||||||
|
return (betacb_size + alpha_cb_size) *
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
|
||||||
nvgpu_gr_config_get_max_tpc_count(g->gr.config);
|
nvgpu_gr_config_get_max_tpc_count(g->gr.config);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -97,11 +97,8 @@ void gr_gv11b_enable_gpc_exceptions(struct gk20a *g);
|
|||||||
int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||||
bool *post_event);
|
bool *post_event);
|
||||||
u32 gr_gv11b_pagepool_default_size(struct gk20a *g);
|
u32 gr_gv11b_pagepool_default_size(struct gk20a *g);
|
||||||
u32 gr_gv11b_calc_global_ctx_buffer_size(struct gk20a *g);
|
|
||||||
int gr_gv11b_handle_sw_method(struct gk20a *g, u32 addr,
|
int gr_gv11b_handle_sw_method(struct gk20a *g, u32 addr,
|
||||||
u32 class_num, u32 offset, u32 data);
|
u32 class_num, u32 offset, u32 data);
|
||||||
void gr_gv11b_bundle_cb_defaults(struct gk20a *g);
|
|
||||||
void gr_gv11b_cb_size_default(struct gk20a *g);
|
|
||||||
void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data);
|
void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data);
|
||||||
void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data);
|
void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data);
|
||||||
int gr_gv11b_dump_gr_status_regs(struct gk20a *g,
|
int gr_gv11b_dump_gr_status_regs(struct gk20a *g,
|
||||||
|
|||||||
@@ -312,10 +312,6 @@ static const struct gpu_ops gv11b_ops = {
|
|||||||
.gr = {
|
.gr = {
|
||||||
.get_patch_slots = gr_gv100_get_patch_slots,
|
.get_patch_slots = gr_gv100_get_patch_slots,
|
||||||
.init_gpc_mmu = gr_gv11b_init_gpc_mmu,
|
.init_gpc_mmu = gr_gv11b_init_gpc_mmu,
|
||||||
.bundle_cb_defaults = gr_gv11b_bundle_cb_defaults,
|
|
||||||
.cb_size_default = gr_gv11b_cb_size_default,
|
|
||||||
.calc_global_ctx_buffer_size =
|
|
||||||
gr_gv11b_calc_global_ctx_buffer_size,
|
|
||||||
.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb,
|
.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb,
|
||||||
.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
|
.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
|
||||||
.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
|
.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
|
||||||
@@ -611,6 +607,26 @@ static const struct gpu_ops gv11b_ops = {
|
|||||||
.load_method_init = gm20b_gr_init_load_method_init,
|
.load_method_init = gm20b_gr_init_load_method_init,
|
||||||
.commit_global_timeslice =
|
.commit_global_timeslice =
|
||||||
gv11b_gr_init_commit_global_timeslice,
|
gv11b_gr_init_commit_global_timeslice,
|
||||||
|
.get_bundle_cb_default_size =
|
||||||
|
gv11b_gr_init_get_bundle_cb_default_size,
|
||||||
|
.get_min_gpm_fifo_depth =
|
||||||
|
gv11b_gr_init_get_min_gpm_fifo_depth,
|
||||||
|
.get_bundle_cb_token_limit =
|
||||||
|
gv11b_gr_init_get_bundle_cb_token_limit,
|
||||||
|
.get_attrib_cb_default_size =
|
||||||
|
gv11b_gr_init_get_attrib_cb_default_size,
|
||||||
|
.get_alpha_cb_default_size =
|
||||||
|
gv11b_gr_init_get_alpha_cb_default_size,
|
||||||
|
.get_attrib_cb_gfxp_default_size =
|
||||||
|
gv11b_gr_init_get_attrib_cb_gfxp_default_size,
|
||||||
|
.get_attrib_cb_gfxp_size =
|
||||||
|
gv11b_gr_init_get_attrib_cb_gfxp_size,
|
||||||
|
.get_attrib_cb_size =
|
||||||
|
gv11b_gr_init_get_attrib_cb_size,
|
||||||
|
.get_alpha_cb_size =
|
||||||
|
gv11b_gr_init_get_alpha_cb_size,
|
||||||
|
.get_global_attr_cb_size =
|
||||||
|
gv11b_gr_init_get_global_attr_cb_size,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
.fb = {
|
.fb = {
|
||||||
|
|||||||
@@ -685,3 +685,55 @@ void gm20b_gr_init_commit_global_timeslice(struct gk20a *g)
|
|||||||
mpc_vtg_debug, false);
|
mpc_vtg_debug, false);
|
||||||
nvgpu_gr_ctx_patch_write(g, NULL, gr_ds_debug_r(), ds_debug, false);
|
nvgpu_gr_ctx_patch_write(g, NULL, gr_ds_debug_r(), ds_debug, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 gm20b_gr_init_get_bundle_cb_default_size(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_scc_bundle_cb_size_div_256b__prod_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gm20b_gr_init_get_min_gpm_fifo_depth(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gm20b_gr_init_get_bundle_cb_token_limit(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_pd_ab_dist_cfg2_token_limit_init_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gm20b_gr_init_get_attrib_cb_default_size(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gm20b_gr_init_get_alpha_cb_default_size(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gm20b_gr_init_get_attrib_cb_size(struct gk20a *g, u32 tpc_count)
|
||||||
|
{
|
||||||
|
return g->ops.gr.init.get_attrib_cb_default_size(g)
|
||||||
|
+ (g->ops.gr.init.get_attrib_cb_default_size(g) >> 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gm20b_gr_init_get_alpha_cb_size(struct gk20a *g, u32 tpc_count)
|
||||||
|
{
|
||||||
|
return g->ops.gr.init.get_alpha_cb_default_size(g)
|
||||||
|
+ (g->ops.gr.init.get_alpha_cb_default_size(g) >> 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gm20b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count,
|
||||||
|
u32 max_tpc)
|
||||||
|
{
|
||||||
|
u32 size;
|
||||||
|
|
||||||
|
size = g->ops.gr.init.get_attrib_cb_size(g, tpc_count) *
|
||||||
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * max_tpc;
|
||||||
|
|
||||||
|
size += g->ops.gr.init.get_alpha_cb_size(g, tpc_count) *
|
||||||
|
gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * max_tpc;
|
||||||
|
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -56,4 +56,14 @@ void gm20b_gr_init_load_method_init(struct gk20a *g,
|
|||||||
struct netlist_av_list *sw_method_init);
|
struct netlist_av_list *sw_method_init);
|
||||||
void gm20b_gr_init_commit_global_timeslice(struct gk20a *g);
|
void gm20b_gr_init_commit_global_timeslice(struct gk20a *g);
|
||||||
|
|
||||||
|
u32 gm20b_gr_init_get_bundle_cb_default_size(struct gk20a *g);
|
||||||
|
u32 gm20b_gr_init_get_min_gpm_fifo_depth(struct gk20a *g);
|
||||||
|
u32 gm20b_gr_init_get_bundle_cb_token_limit(struct gk20a *g);
|
||||||
|
u32 gm20b_gr_init_get_attrib_cb_default_size(struct gk20a *g);
|
||||||
|
u32 gm20b_gr_init_get_alpha_cb_default_size(struct gk20a *g);
|
||||||
|
u32 gm20b_gr_init_get_attrib_cb_size(struct gk20a *g, u32 tpc_count);
|
||||||
|
u32 gm20b_gr_init_get_alpha_cb_size(struct gk20a *g, u32 tpc_count);
|
||||||
|
u32 gm20b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count,
|
||||||
|
u32 max_tpc);
|
||||||
|
|
||||||
#endif /* NVGPU_GR_INIT_GM20B_H */
|
#endif /* NVGPU_GR_INIT_GM20B_H */
|
||||||
|
|||||||
@@ -235,3 +235,55 @@ int gp10b_gr_init_preemption_state(struct gk20a *g, u32 gfxp_wfi_timeout_count,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 gp10b_gr_init_get_attrib_cb_default_size(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return 0x800;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gp10b_gr_init_get_alpha_cb_default_size(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gp10b_gr_init_get_attrib_cb_gfxp_default_size(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return g->ops.gr.init.get_attrib_cb_default_size(g) +
|
||||||
|
(gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
|
||||||
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gp10b_gr_init_get_attrib_cb_gfxp_size(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return g->ops.gr.init.get_attrib_cb_default_size(g) +
|
||||||
|
(gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
|
||||||
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gp10b_gr_init_get_attrib_cb_size(struct gk20a *g, u32 tpc_count)
|
||||||
|
{
|
||||||
|
return min(g->ops.gr.init.get_attrib_cb_default_size(g),
|
||||||
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~U32(0U)) / tpc_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gp10b_gr_init_get_alpha_cb_size(struct gk20a *g, u32 tpc_count)
|
||||||
|
{
|
||||||
|
return min(g->ops.gr.init.get_alpha_cb_default_size(g),
|
||||||
|
gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~U32(0U)) / tpc_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gp10b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count,
|
||||||
|
u32 max_tpc)
|
||||||
|
{
|
||||||
|
u32 size;
|
||||||
|
|
||||||
|
size = g->ops.gr.init.get_attrib_cb_size(g, tpc_count) *
|
||||||
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * max_tpc;
|
||||||
|
|
||||||
|
size += g->ops.gr.init.get_alpha_cb_size(g, tpc_count) *
|
||||||
|
gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * max_tpc;
|
||||||
|
|
||||||
|
size = ALIGN(size, 128);
|
||||||
|
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -37,4 +37,13 @@ int gp10b_gr_init_fs_state(struct gk20a *g);
|
|||||||
int gp10b_gr_init_preemption_state(struct gk20a *g, u32 gfxp_wfi_timeout_count,
|
int gp10b_gr_init_preemption_state(struct gk20a *g, u32 gfxp_wfi_timeout_count,
|
||||||
bool gfxp_wfi_timeout_unit_usec);
|
bool gfxp_wfi_timeout_unit_usec);
|
||||||
|
|
||||||
|
u32 gp10b_gr_init_get_attrib_cb_default_size(struct gk20a *g);
|
||||||
|
u32 gp10b_gr_init_get_alpha_cb_default_size(struct gk20a *g);
|
||||||
|
u32 gp10b_gr_init_get_attrib_cb_gfxp_default_size(struct gk20a *g);
|
||||||
|
u32 gp10b_gr_init_get_attrib_cb_gfxp_size(struct gk20a *g);
|
||||||
|
u32 gp10b_gr_init_get_attrib_cb_size(struct gk20a *g, u32 tpc_count);
|
||||||
|
u32 gp10b_gr_init_get_alpha_cb_size(struct gk20a *g, u32 tpc_count);
|
||||||
|
u32 gp10b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count,
|
||||||
|
u32 max_tpc);
|
||||||
|
|
||||||
#endif /* NVGPU_GR_INIT_GP10B_H */
|
#endif /* NVGPU_GR_INIT_GP10B_H */
|
||||||
|
|||||||
64
drivers/gpu/nvgpu/hal/gr/init/gr_init_gv100.c
Normal file
64
drivers/gpu/nvgpu/hal/gr/init/gr_init_gv100.c
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <nvgpu/gk20a.h>
|
||||||
|
#include <nvgpu/gr/ctx.h>
|
||||||
|
|
||||||
|
#include "gr_init_gv100.h"
|
||||||
|
|
||||||
|
#include <nvgpu/hw/gv100/hw_gr_gv100.h>
|
||||||
|
|
||||||
|
u32 gv100_gr_init_get_bundle_cb_default_size(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_scc_bundle_cb_size_div_256b__prod_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gv100_gr_init_get_min_gpm_fifo_depth(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gv100_gr_init_get_bundle_cb_token_limit(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_pd_ab_dist_cfg2_token_limit_init_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gv100_gr_init_get_attrib_cb_default_size(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gv100_gr_init_get_alpha_cb_default_size(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gv100_gr_init_get_attrib_cb_gfxp_default_size(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gv100_gr_init_get_attrib_cb_gfxp_size(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
|
||||||
|
}
|
||||||
|
|
||||||
38
drivers/gpu/nvgpu/hal/gr/init/gr_init_gv100.h
Normal file
38
drivers/gpu/nvgpu/hal/gr/init/gr_init_gv100.h
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef NVGPU_GR_INIT_GV100_H
|
||||||
|
#define NVGPU_GR_INIT_GV100_H
|
||||||
|
|
||||||
|
#include <nvgpu/types.h>
|
||||||
|
|
||||||
|
struct gk20a;
|
||||||
|
|
||||||
|
u32 gv100_gr_init_get_bundle_cb_default_size(struct gk20a *g);
|
||||||
|
u32 gv100_gr_init_get_min_gpm_fifo_depth(struct gk20a *g);
|
||||||
|
u32 gv100_gr_init_get_bundle_cb_token_limit(struct gk20a *g);
|
||||||
|
u32 gv100_gr_init_get_attrib_cb_default_size(struct gk20a *g);
|
||||||
|
u32 gv100_gr_init_get_alpha_cb_default_size(struct gk20a *g);
|
||||||
|
u32 gv100_gr_init_get_attrib_cb_gfxp_default_size(struct gk20a *g);
|
||||||
|
u32 gv100_gr_init_get_attrib_cb_gfxp_size(struct gk20a *g);
|
||||||
|
|
||||||
|
#endif /* NVGPU_GR_INIT_GV100_H */
|
||||||
@@ -377,3 +377,67 @@ void gv11b_gr_init_commit_global_timeslice(struct gk20a *g)
|
|||||||
mpc_vtg_debug, false);
|
mpc_vtg_debug, false);
|
||||||
nvgpu_gr_ctx_patch_write(g, NULL, gr_ds_debug_r(), ds_debug, false);
|
nvgpu_gr_ctx_patch_write(g, NULL, gr_ds_debug_r(), ds_debug, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 gv11b_gr_init_get_bundle_cb_default_size(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_scc_bundle_cb_size_div_256b__prod_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gv11b_gr_init_get_min_gpm_fifo_depth(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gv11b_gr_init_get_bundle_cb_token_limit(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_pd_ab_dist_cfg2_token_limit_init_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gv11b_gr_init_get_attrib_cb_default_size(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gv11b_gr_init_get_alpha_cb_default_size(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gv11b_gr_init_get_attrib_cb_gfxp_default_size(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gv11b_gr_init_get_attrib_cb_gfxp_size(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gv11b_gr_init_get_attrib_cb_size(struct gk20a *g, u32 tpc_count)
|
||||||
|
{
|
||||||
|
return min(g->ops.gr.init.get_attrib_cb_default_size(g),
|
||||||
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~U32(0U)) / tpc_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gv11b_gr_init_get_alpha_cb_size(struct gk20a *g, u32 tpc_count)
|
||||||
|
{
|
||||||
|
return min(g->ops.gr.init.get_alpha_cb_default_size(g),
|
||||||
|
gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~U32(0U)) / tpc_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 gv11b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count,
|
||||||
|
u32 max_tpc)
|
||||||
|
{
|
||||||
|
u32 size;
|
||||||
|
|
||||||
|
size = g->ops.gr.init.get_attrib_cb_size(g, tpc_count) *
|
||||||
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * max_tpc;
|
||||||
|
|
||||||
|
size += g->ops.gr.init.get_alpha_cb_size(g, tpc_count) *
|
||||||
|
gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * max_tpc;
|
||||||
|
|
||||||
|
size = ALIGN(size, 128);
|
||||||
|
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -42,4 +42,16 @@ int gv11b_gr_init_preemption_state(struct gk20a *g, u32 gfxp_wfi_timeout_count,
|
|||||||
bool gfxp_wfi_timeout_unit_usec);
|
bool gfxp_wfi_timeout_unit_usec);
|
||||||
void gv11b_gr_init_commit_global_timeslice(struct gk20a *g);
|
void gv11b_gr_init_commit_global_timeslice(struct gk20a *g);
|
||||||
|
|
||||||
|
u32 gv11b_gr_init_get_bundle_cb_default_size(struct gk20a *g);
|
||||||
|
u32 gv11b_gr_init_get_min_gpm_fifo_depth(struct gk20a *g);
|
||||||
|
u32 gv11b_gr_init_get_bundle_cb_token_limit(struct gk20a *g);
|
||||||
|
u32 gv11b_gr_init_get_attrib_cb_default_size(struct gk20a *g);
|
||||||
|
u32 gv11b_gr_init_get_alpha_cb_default_size(struct gk20a *g);
|
||||||
|
u32 gv11b_gr_init_get_attrib_cb_gfxp_default_size(struct gk20a *g);
|
||||||
|
u32 gv11b_gr_init_get_attrib_cb_gfxp_size(struct gk20a *g);
|
||||||
|
u32 gv11b_gr_init_get_attrib_cb_size(struct gk20a *g, u32 tpc_count);
|
||||||
|
u32 gv11b_gr_init_get_alpha_cb_size(struct gk20a *g, u32 tpc_count);
|
||||||
|
u32 gv11b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count,
|
||||||
|
u32 max_tpc);
|
||||||
|
|
||||||
#endif /* NVGPU_GR_INIT_GV11B_H */
|
#endif /* NVGPU_GR_INIT_GV11B_H */
|
||||||
|
|||||||
@@ -89,3 +89,39 @@ void tu104_gr_init_commit_gfxp_rtv_cb(struct gk20a *g,
|
|||||||
tu104_gr_init_patch_rtv_cb(g, gr_ctx, addr,
|
tu104_gr_init_patch_rtv_cb(g, gr_ctx, addr,
|
||||||
rtv_cb_size, gfxp_addr_size, patch);
|
rtv_cb_size, gfxp_addr_size, patch);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 tu104_gr_init_get_bundle_cb_default_size(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_scc_bundle_cb_size_div_256b__prod_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 tu104_gr_init_get_min_gpm_fifo_depth(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 tu104_gr_init_get_bundle_cb_token_limit(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_pd_ab_dist_cfg2_token_limit_init_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 tu104_gr_init_get_attrib_cb_default_size(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 tu104_gr_init_get_alpha_cb_default_size(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 tu104_gr_init_get_attrib_cb_gfxp_default_size(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 tu104_gr_init_get_attrib_cb_gfxp_size(struct gk20a *g)
|
||||||
|
{
|
||||||
|
return gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -34,4 +34,12 @@ void tu104_gr_init_commit_rtv_cb(struct gk20a *g, u64 addr,
|
|||||||
void tu104_gr_init_commit_gfxp_rtv_cb(struct gk20a *g,
|
void tu104_gr_init_commit_gfxp_rtv_cb(struct gk20a *g,
|
||||||
struct nvgpu_gr_ctx *gr_ctx, bool patch);
|
struct nvgpu_gr_ctx *gr_ctx, bool patch);
|
||||||
|
|
||||||
|
u32 tu104_gr_init_get_bundle_cb_default_size(struct gk20a *g);
|
||||||
|
u32 tu104_gr_init_get_min_gpm_fifo_depth(struct gk20a *g);
|
||||||
|
u32 tu104_gr_init_get_bundle_cb_token_limit(struct gk20a *g);
|
||||||
|
u32 tu104_gr_init_get_attrib_cb_default_size(struct gk20a *g);
|
||||||
|
u32 tu104_gr_init_get_alpha_cb_default_size(struct gk20a *g);
|
||||||
|
u32 tu104_gr_init_get_attrib_cb_gfxp_default_size(struct gk20a *g);
|
||||||
|
u32 tu104_gr_init_get_attrib_cb_gfxp_size(struct gk20a *g);
|
||||||
|
|
||||||
#endif /* NVGPU_GR_INIT_TU104_H */
|
#endif /* NVGPU_GR_INIT_TU104_H */
|
||||||
|
|||||||
@@ -256,9 +256,6 @@ struct gpu_ops {
|
|||||||
struct {
|
struct {
|
||||||
u32 (*get_patch_slots)(struct gk20a *g);
|
u32 (*get_patch_slots)(struct gk20a *g);
|
||||||
void (*access_smpc_reg)(struct gk20a *g, u32 quad, u32 offset);
|
void (*access_smpc_reg)(struct gk20a *g, u32 quad, u32 offset);
|
||||||
void (*bundle_cb_defaults)(struct gk20a *g);
|
|
||||||
void (*cb_size_default)(struct gk20a *g);
|
|
||||||
u32 (*calc_global_ctx_buffer_size)(struct gk20a *g);
|
|
||||||
void (*commit_global_attrib_cb)(struct gk20a *g,
|
void (*commit_global_attrib_cb)(struct gk20a *g,
|
||||||
struct nvgpu_gr_ctx *ch_ctx,
|
struct nvgpu_gr_ctx *ch_ctx,
|
||||||
u64 addr, bool patch);
|
u64 addr, bool patch);
|
||||||
@@ -699,6 +696,19 @@ struct gpu_ops {
|
|||||||
struct nvgpu_gr_ctx *gr_ctx, bool patch);
|
struct nvgpu_gr_ctx *gr_ctx, bool patch);
|
||||||
void (*commit_gfxp_rtv_cb)(struct gk20a *g,
|
void (*commit_gfxp_rtv_cb)(struct gk20a *g,
|
||||||
struct nvgpu_gr_ctx *gr_ctx, bool patch);
|
struct nvgpu_gr_ctx *gr_ctx, bool patch);
|
||||||
|
u32 (*get_bundle_cb_default_size)(struct gk20a *g);
|
||||||
|
u32 (*get_min_gpm_fifo_depth)(struct gk20a *g);
|
||||||
|
u32 (*get_bundle_cb_token_limit)(struct gk20a *g);
|
||||||
|
u32 (*get_attrib_cb_default_size)(struct gk20a *g);
|
||||||
|
u32 (*get_alpha_cb_default_size)(struct gk20a *g);
|
||||||
|
u32 (*get_attrib_cb_gfxp_default_size)(struct gk20a *g);
|
||||||
|
u32 (*get_attrib_cb_gfxp_size)(struct gk20a *g);
|
||||||
|
u32 (*get_attrib_cb_size)(struct gk20a *g,
|
||||||
|
u32 tpc_count);
|
||||||
|
u32 (*get_alpha_cb_size)(struct gk20a *g,
|
||||||
|
u32 tpc_count);
|
||||||
|
u32 (*get_global_attr_cb_size)(struct gk20a *g,
|
||||||
|
u32 tpc_count, u32 max_tpc);
|
||||||
} init;
|
} init;
|
||||||
|
|
||||||
u32 (*get_ctxsw_checksum_mismatch_mailbox_val)(void);
|
u32 (*get_ctxsw_checksum_mismatch_mailbox_val)(void);
|
||||||
|
|||||||
@@ -17,14 +17,39 @@
|
|||||||
|
|
||||||
#include <linux/debugfs.h>
|
#include <linux/debugfs.h>
|
||||||
|
|
||||||
int gr_gk20a_debugfs_init(struct gk20a *g)
|
static int gr_default_attrib_cb_size_show(struct seq_file *s, void *data)
|
||||||
{
|
{
|
||||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
struct gk20a *g = s->private;
|
||||||
|
|
||||||
l->debugfs_gr_default_attrib_cb_size =
|
seq_printf(s, "%u\n", g->ops.gr.init.get_attrib_cb_default_size(g));
|
||||||
debugfs_create_u32("gr_default_attrib_cb_size",
|
|
||||||
S_IRUGO|S_IWUSR, l->debugfs,
|
return 0;
|
||||||
&g->gr.attrib_cb_default_size);
|
}
|
||||||
|
|
||||||
|
static int gr_default_attrib_cb_size_open(struct inode *inode,
|
||||||
|
struct file *file)
|
||||||
|
{
|
||||||
|
return single_open(file, gr_default_attrib_cb_size_show,
|
||||||
|
inode->i_private);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct file_operations gr_default_attrib_cb_size_fops= {
|
||||||
|
.open = gr_default_attrib_cb_size_open,
|
||||||
|
.read = seq_read,
|
||||||
|
.llseek = seq_lseek,
|
||||||
|
.release = single_release,
|
||||||
|
};
|
||||||
|
|
||||||
|
int gr_gk20a_debugfs_init(struct gk20a *g)
|
||||||
|
{
|
||||||
|
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||||
|
struct dentry *d;
|
||||||
|
|
||||||
|
d = debugfs_create_file(
|
||||||
|
"gr_default_attrib_cb_size", S_IRUGO, l->debugfs, g,
|
||||||
|
&gr_default_attrib_cb_size_fops);
|
||||||
|
if (!d)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -146,7 +146,6 @@ struct nvgpu_os_linux {
|
|||||||
struct dentry *debugfs_timeouts_enabled;
|
struct dentry *debugfs_timeouts_enabled;
|
||||||
struct dentry *debugfs_gr_idle_timeout_default;
|
struct dentry *debugfs_gr_idle_timeout_default;
|
||||||
struct dentry *debugfs_disable_bigpage;
|
struct dentry *debugfs_disable_bigpage;
|
||||||
struct dentry *debugfs_gr_default_attrib_cb_size;
|
|
||||||
|
|
||||||
struct dentry *debugfs_timeslice_low_priority_us;
|
struct dentry *debugfs_timeslice_low_priority_us;
|
||||||
struct dentry *debugfs_timeslice_medium_priority_us;
|
struct dentry *debugfs_timeslice_medium_priority_us;
|
||||||
|
|||||||
@@ -143,34 +143,6 @@ int gr_tu104_init_gfxp_rtv_cb(struct gk20a *g,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void gr_tu104_bundle_cb_defaults(struct gk20a *g)
|
|
||||||
{
|
|
||||||
struct gr_gk20a *gr = &g->gr;
|
|
||||||
|
|
||||||
gr->bundle_cb_default_size =
|
|
||||||
gr_scc_bundle_cb_size_div_256b__prod_v();
|
|
||||||
gr->min_gpm_fifo_depth =
|
|
||||||
gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v();
|
|
||||||
gr->bundle_cb_token_limit =
|
|
||||||
gr_pd_ab_dist_cfg2_token_limit_init_v();
|
|
||||||
}
|
|
||||||
|
|
||||||
void gr_tu104_cb_size_default(struct gk20a *g)
|
|
||||||
{
|
|
||||||
struct gr_gk20a *gr = &g->gr;
|
|
||||||
|
|
||||||
if (gr->attrib_cb_default_size == 0U) {
|
|
||||||
gr->attrib_cb_default_size =
|
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
|
|
||||||
}
|
|
||||||
gr->alpha_cb_default_size =
|
|
||||||
gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
|
|
||||||
gr->attrib_cb_gfxp_default_size =
|
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
|
|
||||||
gr->attrib_cb_gfxp_size =
|
|
||||||
gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
|
|
||||||
}
|
|
||||||
|
|
||||||
void gr_tu104_enable_gpc_exceptions(struct gk20a *g)
|
void gr_tu104_enable_gpc_exceptions(struct gk20a *g)
|
||||||
{
|
{
|
||||||
struct gr_gk20a *gr = &g->gr;
|
struct gr_gk20a *gr = &g->gr;
|
||||||
|
|||||||
@@ -63,9 +63,6 @@ int gr_tu104_init_sw_bundle64(struct gk20a *g);
|
|||||||
void gr_tu10x_create_sysfs(struct gk20a *g);
|
void gr_tu10x_create_sysfs(struct gk20a *g);
|
||||||
void gr_tu10x_remove_sysfs(struct gk20a *g);
|
void gr_tu10x_remove_sysfs(struct gk20a *g);
|
||||||
|
|
||||||
void gr_tu104_bundle_cb_defaults(struct gk20a *g);
|
|
||||||
void gr_tu104_cb_size_default(struct gk20a *g);
|
|
||||||
|
|
||||||
void gr_tu104_enable_gpc_exceptions(struct gk20a *g);
|
void gr_tu104_enable_gpc_exceptions(struct gk20a *g);
|
||||||
|
|
||||||
int gr_tu104_get_offset_in_gpccs_segment(struct gk20a *g,
|
int gr_tu104_get_offset_in_gpccs_segment(struct gk20a *g,
|
||||||
|
|||||||
@@ -382,10 +382,6 @@ static const struct gpu_ops tu104_ops = {
|
|||||||
.gr = {
|
.gr = {
|
||||||
.get_patch_slots = gr_gv100_get_patch_slots,
|
.get_patch_slots = gr_gv100_get_patch_slots,
|
||||||
.init_gpc_mmu = gr_gv11b_init_gpc_mmu,
|
.init_gpc_mmu = gr_gv11b_init_gpc_mmu,
|
||||||
.bundle_cb_defaults = gr_tu104_bundle_cb_defaults,
|
|
||||||
.cb_size_default = gr_tu104_cb_size_default,
|
|
||||||
.calc_global_ctx_buffer_size =
|
|
||||||
gr_gv11b_calc_global_ctx_buffer_size,
|
|
||||||
.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb,
|
.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb,
|
||||||
.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
|
.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
|
||||||
.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
|
.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
|
||||||
@@ -684,6 +680,26 @@ static const struct gpu_ops tu104_ops = {
|
|||||||
.get_rtv_cb_size = tu104_gr_init_get_rtv_cb_size,
|
.get_rtv_cb_size = tu104_gr_init_get_rtv_cb_size,
|
||||||
.commit_rtv_cb = tu104_gr_init_commit_rtv_cb,
|
.commit_rtv_cb = tu104_gr_init_commit_rtv_cb,
|
||||||
.commit_gfxp_rtv_cb = tu104_gr_init_commit_gfxp_rtv_cb,
|
.commit_gfxp_rtv_cb = tu104_gr_init_commit_gfxp_rtv_cb,
|
||||||
|
.get_bundle_cb_default_size =
|
||||||
|
tu104_gr_init_get_bundle_cb_default_size,
|
||||||
|
.get_min_gpm_fifo_depth =
|
||||||
|
tu104_gr_init_get_min_gpm_fifo_depth,
|
||||||
|
.get_bundle_cb_token_limit =
|
||||||
|
tu104_gr_init_get_bundle_cb_token_limit,
|
||||||
|
.get_attrib_cb_default_size =
|
||||||
|
tu104_gr_init_get_attrib_cb_default_size,
|
||||||
|
.get_alpha_cb_default_size =
|
||||||
|
tu104_gr_init_get_alpha_cb_default_size,
|
||||||
|
.get_attrib_cb_gfxp_default_size =
|
||||||
|
tu104_gr_init_get_attrib_cb_gfxp_default_size,
|
||||||
|
.get_attrib_cb_gfxp_size =
|
||||||
|
tu104_gr_init_get_attrib_cb_gfxp_size,
|
||||||
|
.get_attrib_cb_size =
|
||||||
|
gv11b_gr_init_get_attrib_cb_size,
|
||||||
|
.get_alpha_cb_size =
|
||||||
|
gv11b_gr_init_get_alpha_cb_size,
|
||||||
|
.get_global_attr_cb_size =
|
||||||
|
gv11b_gr_init_get_global_attr_cb_size,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
.fb = {
|
.fb = {
|
||||||
|
|||||||
Reference in New Issue
Block a user