gpu: nvgpu: Reorg gr HAL initialization

Reorganize HAL initialization to remove inheritance and construct
the gpu_ops struct at compile time. This patch only covers the
gr sub-module of the gpu_ops struct.

Perform HAL function assignments in hal_gxxxx.c through the
population of a chip-specific copy of gpu_ops.

Jira NVGPU-74

Change-Id: Ie37638f442fd68aca8a7ade5f297118447bdc91e
Signed-off-by: Sunny He <suhe@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1542989
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
This commit is contained in:
Sunny He
2017-08-17 16:11:34 -07:00
committed by mobile promotions
parent 82ba1277f3
commit 4b5b67d6d8
9 changed files with 649 additions and 295 deletions

View File

@@ -36,7 +36,7 @@
#include <nvgpu/hw/gm20b/hw_ctxsw_prog_gm20b.h> #include <nvgpu/hw/gm20b/hw_ctxsw_prog_gm20b.h>
#include <nvgpu/hw/gm20b/hw_fuse_gm20b.h> #include <nvgpu/hw/gm20b/hw_fuse_gm20b.h>
static void gr_gm20b_init_gpc_mmu(struct gk20a *g) void gr_gm20b_init_gpc_mmu(struct gk20a *g)
{ {
u32 temp; u32 temp;
@@ -73,7 +73,7 @@ static void gr_gm20b_init_gpc_mmu(struct gk20a *g)
gk20a_readl(g, fb_fbhub_num_active_ltcs_r())); gk20a_readl(g, fb_fbhub_num_active_ltcs_r()));
} }
static void gr_gm20b_bundle_cb_defaults(struct gk20a *g) void gr_gm20b_bundle_cb_defaults(struct gk20a *g)
{ {
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
@@ -85,7 +85,7 @@ static void gr_gm20b_bundle_cb_defaults(struct gk20a *g)
gr_pd_ab_dist_cfg2_token_limit_init_v(); gr_pd_ab_dist_cfg2_token_limit_init_v();
} }
static void gr_gm20b_cb_size_default(struct gk20a *g) void gr_gm20b_cb_size_default(struct gk20a *g)
{ {
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
@@ -96,7 +96,7 @@ static void gr_gm20b_cb_size_default(struct gk20a *g)
gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
} }
static int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g) int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g)
{ {
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
int size; int size;
@@ -134,7 +134,7 @@ void gr_gm20b_commit_global_attrib_cb(struct gk20a *g,
gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch); gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch);
} }
static void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct channel_ctx_gk20a *ch_ctx,
u64 addr, u64 size, bool patch) u64 addr, u64 size, bool patch)
{ {
@@ -170,7 +170,7 @@ static void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
} }
static int gr_gm20b_commit_global_cb_manager(struct gk20a *g, int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
struct channel_gk20a *c, bool patch) struct channel_gk20a *c, bool patch)
{ {
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
@@ -250,7 +250,7 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
return 0; return 0;
} }
static void gr_gm20b_commit_global_pagepool(struct gk20a *g, void gr_gm20b_commit_global_pagepool(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct channel_ctx_gk20a *ch_ctx,
u64 addr, u32 size, bool patch) u64 addr, u32 size, bool patch)
{ {
@@ -276,7 +276,7 @@ void gr_gm20b_set_rd_coalesce(struct gk20a *g, u32 data)
gk20a_dbg_fn("done"); gk20a_dbg_fn("done");
} }
static int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr, int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr,
u32 class_num, u32 offset, u32 data) u32 class_num, u32 offset, u32 data)
{ {
gk20a_dbg_fn(""); gk20a_dbg_fn("");
@@ -318,7 +318,7 @@ fail:
return -EINVAL; return -EINVAL;
} }
static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
{ {
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
u32 gpc_index, ppc_index, stride, val; u32 gpc_index, ppc_index, stride, val;
@@ -368,7 +368,7 @@ static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
} }
} }
static void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data) void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data)
{ {
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
u32 gpc_index, ppc_index, stride, val; u32 gpc_index, ppc_index, stride, val;
@@ -423,7 +423,7 @@ static void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data)
} }
} }
static void gr_gm20b_set_hww_esr_report_mask(struct gk20a *g) void gr_gm20b_set_hww_esr_report_mask(struct gk20a *g)
{ {
/* setup sm warp esr report masks */ /* setup sm warp esr report masks */
gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(), gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(),
@@ -456,7 +456,7 @@ static void gr_gm20b_set_hww_esr_report_mask(struct gk20a *g)
gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f()); gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f());
} }
static bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num) bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num)
{ {
bool valid = false; bool valid = false;
@@ -476,7 +476,7 @@ static bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num)
return valid; return valid;
} }
static bool gr_gm20b_is_valid_gfx_class(struct gk20a *g, u32 class_num) bool gr_gm20b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
{ {
if (class_num == MAXWELL_B) if (class_num == MAXWELL_B)
return true; return true;
@@ -484,7 +484,7 @@ static bool gr_gm20b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
return false; return false;
} }
static bool gr_gm20b_is_valid_compute_class(struct gk20a *g, u32 class_num) bool gr_gm20b_is_valid_compute_class(struct gk20a *g, u32 class_num)
{ {
if (class_num == MAXWELL_COMPUTE_B) if (class_num == MAXWELL_COMPUTE_B)
return true; return true;
@@ -502,7 +502,7 @@ static const u32 _num_sm_dsm_perf_ctrl_regs = 2;
static u32 *_sm_dsm_perf_regs; static u32 *_sm_dsm_perf_regs;
static u32 _sm_dsm_perf_ctrl_regs[2]; static u32 _sm_dsm_perf_ctrl_regs[2];
static void gr_gm20b_init_sm_dsm_reg_info(void) void gr_gm20b_init_sm_dsm_reg_info(void)
{ {
if (_sm_dsm_perf_ctrl_regs[0] != 0) if (_sm_dsm_perf_ctrl_regs[0] != 0)
return; return;
@@ -513,7 +513,7 @@ static void gr_gm20b_init_sm_dsm_reg_info(void)
gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r(); gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r();
} }
static void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g, void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g,
u32 *num_sm_dsm_perf_regs, u32 *num_sm_dsm_perf_regs,
u32 **sm_dsm_perf_regs, u32 **sm_dsm_perf_regs,
u32 *perf_register_stride) u32 *perf_register_stride)
@@ -523,7 +523,7 @@ static void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g,
*perf_register_stride = 0; *perf_register_stride = 0;
} }
static void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
u32 *num_sm_dsm_perf_ctrl_regs, u32 *num_sm_dsm_perf_ctrl_regs,
u32 **sm_dsm_perf_ctrl_regs, u32 **sm_dsm_perf_ctrl_regs,
u32 *ctrl_register_stride) u32 *ctrl_register_stride)
@@ -535,7 +535,7 @@ static void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v();
} }
static u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
{ {
u32 val; u32 val;
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
@@ -546,7 +546,7 @@ static u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
return (~val) & ((0x1 << gr->max_tpc_per_gpc_count) - 1); return (~val) & ((0x1 << gr->max_tpc_per_gpc_count) - 1);
} }
static void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
{ {
nvgpu_tegra_fuse_write_bypass(g, 0x1); nvgpu_tegra_fuse_write_bypass(g, 0x1);
nvgpu_tegra_fuse_write_access_sw(g, 0x0); nvgpu_tegra_fuse_write_access_sw(g, 0x0);
@@ -563,7 +563,7 @@ static void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
} }
} }
static void gr_gm20b_load_tpc_mask(struct gk20a *g) void gr_gm20b_load_tpc_mask(struct gk20a *g)
{ {
u32 pes_tpc_mask = 0, fuse_tpc_mask; u32 pes_tpc_mask = 0, fuse_tpc_mask;
u32 gpc, pes; u32 gpc, pes;
@@ -588,7 +588,7 @@ static void gr_gm20b_load_tpc_mask(struct gk20a *g)
} }
} }
static void gr_gm20b_program_sm_id_numbering(struct gk20a *g, void gr_gm20b_program_sm_id_numbering(struct gk20a *g,
u32 gpc, u32 tpc, u32 smid) u32 gpc, u32 tpc, u32 smid)
{ {
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
@@ -604,7 +604,7 @@ static void gr_gm20b_program_sm_id_numbering(struct gk20a *g,
gr_gpc0_tpc0_pe_cfg_smid_value_f(smid)); gr_gpc0_tpc0_pe_cfg_smid_value_f(smid));
} }
static int gr_gm20b_load_smid_config(struct gk20a *g) int gr_gm20b_load_smid_config(struct gk20a *g)
{ {
u32 *tpc_sm_id; u32 *tpc_sm_id;
u32 i, j; u32 i, j;
@@ -669,7 +669,7 @@ int gr_gm20b_init_fs_state(struct gk20a *g)
return 0; return 0;
} }
static int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
{ {
gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(), gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(),
@@ -697,7 +697,7 @@ static bool gr_gm20b_is_tpc_addr_shared(struct gk20a *g, u32 addr)
tpc_in_gpc_stride)); tpc_in_gpc_stride));
} }
static bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr) bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr)
{ {
u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
@@ -708,7 +708,7 @@ static bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr)
|| gr_gm20b_is_tpc_addr_shared(g, addr); || gr_gm20b_is_tpc_addr_shared(g, addr);
} }
static u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr) u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr)
{ {
u32 i, start; u32 i, start;
u32 num_tpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); u32 num_tpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
@@ -738,7 +738,7 @@ static void gr_gm20b_load_gpccs_with_bootloader(struct gk20a *g)
gr_fecs_falcon_hwcfg_r()); gr_fecs_falcon_hwcfg_r());
} }
static int gr_gm20b_load_ctxsw_ucode(struct gk20a *g) int gr_gm20b_load_ctxsw_ucode(struct gk20a *g)
{ {
u32 err, flags; u32 err, flags;
u32 reg_offset = gr_gpcs_gpccs_falcon_hwcfg_r() - u32 reg_offset = gr_gpcs_gpccs_falcon_hwcfg_r() -
@@ -819,14 +819,14 @@ static int gr_gm20b_load_ctxsw_ucode(struct gk20a *g)
} }
#else #else
static int gr_gm20b_load_ctxsw_ucode(struct gk20a *g) int gr_gm20b_load_ctxsw_ucode(struct gk20a *g)
{ {
return -EPERM; return -EPERM;
} }
#endif #endif
static void gr_gm20b_detect_sm_arch(struct gk20a *g) void gr_gm20b_detect_sm_arch(struct gk20a *g)
{ {
u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r()); u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());
@@ -838,12 +838,12 @@ static void gr_gm20b_detect_sm_arch(struct gk20a *g)
gr_gpc0_tpc0_sm_arch_warp_count_v(v); gr_gpc0_tpc0_sm_arch_warp_count_v(v);
} }
static u32 gr_gm20b_pagepool_default_size(struct gk20a *g) u32 gr_gm20b_pagepool_default_size(struct gk20a *g)
{ {
return gr_scc_pagepool_total_pages_hwmax_value_v(); return gr_scc_pagepool_total_pages_hwmax_value_v();
} }
static int gr_gm20b_alloc_gr_ctx(struct gk20a *g, int gr_gm20b_alloc_gr_ctx(struct gk20a *g,
struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
u32 class, u32 class,
u32 flags) u32 flags)
@@ -864,7 +864,7 @@ static int gr_gm20b_alloc_gr_ctx(struct gk20a *g,
return 0; return 0;
} }
static void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct channel_ctx_gk20a *ch_ctx,
struct nvgpu_mem *mem) struct nvgpu_mem *mem)
{ {
@@ -884,7 +884,7 @@ static void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g,
gk20a_dbg_fn("done"); gk20a_dbg_fn("done");
} }
static int gr_gm20b_dump_gr_status_regs(struct gk20a *g, int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
struct gk20a_debug_output *o) struct gk20a_debug_output *o)
{ {
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
@@ -1022,7 +1022,7 @@ static int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
return 0; return 0;
} }
static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
bool enable) bool enable)
{ {
struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
@@ -1051,7 +1051,7 @@ static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
return 0; return 0;
} }
static u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g) u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g)
{ {
u32 fbp_en_mask, opt_fbio; u32 fbp_en_mask, opt_fbio;
u32 tmp, max_fbps_count; u32 tmp, max_fbps_count;
@@ -1066,7 +1066,7 @@ static u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g)
return fbp_en_mask; return fbp_en_mask;
} }
static u32 gr_gm20b_get_max_ltc_per_fbp(struct gk20a *g) u32 gr_gm20b_get_max_ltc_per_fbp(struct gk20a *g)
{ {
u32 ltc_per_fbp, reg; u32 ltc_per_fbp, reg;
reg = gk20a_readl(g, top_ltc_per_fbp_r()); reg = gk20a_readl(g, top_ltc_per_fbp_r());
@@ -1074,7 +1074,7 @@ static u32 gr_gm20b_get_max_ltc_per_fbp(struct gk20a *g)
return ltc_per_fbp; return ltc_per_fbp;
} }
static u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g) u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g)
{ {
u32 lts_per_ltc, reg; u32 lts_per_ltc, reg;
reg = gk20a_readl(g, top_slices_per_ltc_r()); reg = gk20a_readl(g, top_slices_per_ltc_r());
@@ -1082,7 +1082,7 @@ static u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g)
return lts_per_ltc; return lts_per_ltc;
} }
static u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g) u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g)
{ {
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
u32 i, tmp, max_fbps_count, max_ltc_per_fbp; u32 i, tmp, max_fbps_count, max_ltc_per_fbp;
@@ -1102,7 +1102,7 @@ static u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g)
return gr->fbp_rop_l2_en_mask; return gr->fbp_rop_l2_en_mask;
} }
static u32 gr_gm20b_get_max_fbps_count(struct gk20a *g) u32 gr_gm20b_get_max_fbps_count(struct gk20a *g)
{ {
u32 tmp, max_fbps_count; u32 tmp, max_fbps_count;
tmp = gk20a_readl(g, top_num_fbps_r()); tmp = gk20a_readl(g, top_num_fbps_r());
@@ -1110,7 +1110,7 @@ static u32 gr_gm20b_get_max_fbps_count(struct gk20a *g)
return max_fbps_count; return max_fbps_count;
} }
static void gr_gm20b_init_cyclestats(struct gk20a *g) void gr_gm20b_init_cyclestats(struct gk20a *g)
{ {
#if defined(CONFIG_GK20A_CYCLE_STATS) #if defined(CONFIG_GK20A_CYCLE_STATS)
g->gpu_characteristics.flags |= g->gpu_characteristics.flags |=
@@ -1122,7 +1122,7 @@ static void gr_gm20b_init_cyclestats(struct gk20a *g)
#endif #endif
} }
static void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct nvgpu_mem *mem) void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct nvgpu_mem *mem)
{ {
u32 cde_v; u32 cde_v;
@@ -1131,7 +1131,7 @@ static void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct nvgpu_mem *mem)
nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_ctl_o(), cde_v); nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_ctl_o(), cde_v);
} }
static void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state)
{ {
/* Check if we have at least one valid warp */ /* Check if we have at least one valid warp */
/* get paused state on maxwell */ /* get paused state on maxwell */
@@ -1210,7 +1210,7 @@ static void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state)
} }
} }
static void gr_gm20b_get_access_map(struct gk20a *g, void gr_gm20b_get_access_map(struct gk20a *g,
u32 **whitelist, int *num_entries) u32 **whitelist, int *num_entries)
{ {
static u32 wl_addr_gm20b[] = { static u32 wl_addr_gm20b[] = {
@@ -1251,7 +1251,7 @@ static void gr_gm20b_get_access_map(struct gk20a *g,
*num_entries = ARRAY_SIZE(wl_addr_gm20b); *num_entries = ARRAY_SIZE(wl_addr_gm20b);
} }
static int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc) int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
{ {
int sm_id; int sm_id;
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
@@ -1281,7 +1281,7 @@ static int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
return 0; return 0;
} }
static int gm20b_gr_update_sm_error_state(struct gk20a *g, int gm20b_gr_update_sm_error_state(struct gk20a *g,
struct channel_gk20a *ch, u32 sm_id, struct channel_gk20a *ch, u32 sm_id,
struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state) struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state)
{ {
@@ -1353,7 +1353,7 @@ fail:
return err; return err;
} }
static int gm20b_gr_clear_sm_error_state(struct gk20a *g, int gm20b_gr_clear_sm_error_state(struct gk20a *g,
struct channel_gk20a *ch, u32 sm_id) struct channel_gk20a *ch, u32 sm_id)
{ {
u32 gpc, tpc, offset; u32 gpc, tpc, offset;
@@ -1394,7 +1394,7 @@ fail:
return err; return err;
} }
static int gr_gm20b_get_preemption_mode_flags(struct gk20a *g, int gr_gm20b_get_preemption_mode_flags(struct gk20a *g,
struct nvgpu_preemption_modes_rec *preemption_modes_rec) struct nvgpu_preemption_modes_rec *preemption_modes_rec)
{ {
preemption_modes_rec->graphics_preemption_mode_flags = preemption_modes_rec->graphics_preemption_mode_flags =
@@ -1421,7 +1421,7 @@ int gm20b_gr_tpc_disable_override(struct gk20a *g, u32 mask)
return 0; return 0;
} }
static int gm20b_gr_fuse_override(struct gk20a *g) int gm20b_gr_fuse_override(struct gk20a *g)
{ {
struct device_node *np = dev_from_gk20a(g)->of_node; struct device_node *np = dev_from_gk20a(g)->of_node;
u32 *fuses; u32 *fuses;
@@ -1457,7 +1457,7 @@ static int gm20b_gr_fuse_override(struct gk20a *g)
return 0; return 0;
} }
static bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr) bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr)
{ {
u32 ltc_shared_base = ltc_ltcs_ltss_v(); u32 ltc_shared_base = ltc_ltcs_ltss_v();
u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
@@ -1466,7 +1466,7 @@ static bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr)
(addr < (ltc_shared_base + lts_stride)); (addr < (ltc_shared_base + lts_stride));
} }
static bool gr_gm20b_is_ltcn_ltss_addr(struct gk20a *g, u32 addr) bool gr_gm20b_is_ltcn_ltss_addr(struct gk20a *g, u32 addr)
{ {
u32 lts_shared_base = ltc_ltc0_ltss_v(); u32 lts_shared_base = ltc_ltc0_ltss_v();
u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
@@ -1498,7 +1498,7 @@ static void gr_gm20b_update_ltc_lts_addr(struct gk20a *g, u32 addr, u32 ltc_num,
*priv_addr_table_index = index; *priv_addr_table_index = index;
} }
static void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr, void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr,
u32 *priv_addr_table, u32 *priv_addr_table,
u32 *priv_addr_table_index) u32 *priv_addr_table_index)
{ {
@@ -1518,7 +1518,7 @@ static void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr,
priv_addr_table_index); priv_addr_table_index);
} }
static void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr, void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr,
u32 *priv_addr_table, u32 *priv_addr_table,
u32 *priv_addr_table_index) u32 *priv_addr_table_index)
{ {
@@ -1530,7 +1530,7 @@ static void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr,
priv_addr_table, priv_addr_table_index); priv_addr_table, priv_addr_table_index);
} }
static void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
u32 global_esr) u32 global_esr)
{ {
u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc); u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
@@ -1559,121 +1559,3 @@ void gm20a_gr_disable_rd_coalesce(struct gk20a *g)
gk20a_writel(g, gr_gpcs_tpcs_tex_m_dbg2_r(), dbg2_reg); gk20a_writel(g, gr_gpcs_tpcs_tex_m_dbg2_r(), dbg2_reg);
} }
void gm20b_init_gr(struct gk20a *g)
{
struct gpu_ops *gops = &g->ops;
gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;
gops->gr.bundle_cb_defaults = gr_gm20b_bundle_cb_defaults;
gops->gr.cb_size_default = gr_gm20b_cb_size_default;
gops->gr.calc_global_ctx_buffer_size =
gr_gm20b_calc_global_ctx_buffer_size;
gops->gr.commit_global_attrib_cb = gr_gm20b_commit_global_attrib_cb;
gops->gr.commit_global_bundle_cb = gr_gm20b_commit_global_bundle_cb;
gops->gr.commit_global_cb_manager = gr_gm20b_commit_global_cb_manager;
gops->gr.commit_global_pagepool = gr_gm20b_commit_global_pagepool;
gops->gr.handle_sw_method = gr_gm20b_handle_sw_method;
gops->gr.set_alpha_circular_buffer_size = gr_gm20b_set_alpha_circular_buffer_size;
gops->gr.set_circular_buffer_size = gr_gm20b_set_circular_buffer_size;
gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions;
gops->gr.is_valid_class = gr_gm20b_is_valid_class;
gops->gr.is_valid_gfx_class = gr_gm20b_is_valid_gfx_class;
gops->gr.is_valid_compute_class = gr_gm20b_is_valid_compute_class;
gops->gr.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs;
gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs;
gops->gr.init_fs_state = gr_gm20b_init_fs_state;
gops->gr.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask;
gops->gr.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments;
if (nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY))
gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode;
else
gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
gops->gr.set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask;
gops->gr.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask;
gops->gr.free_channel_ctx = gk20a_free_channel_ctx;
gops->gr.alloc_obj_ctx = gk20a_alloc_obj_ctx;
gops->gr.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull;
gops->gr.get_zcull_info = gr_gk20a_get_zcull_info;
gops->gr.is_tpc_addr = gr_gm20b_is_tpc_addr;
gops->gr.get_tpc_num = gr_gm20b_get_tpc_num;
gops->gr.detect_sm_arch = gr_gm20b_detect_sm_arch;
gops->gr.add_zbc_color = gr_gk20a_add_zbc_color;
gops->gr.add_zbc_depth = gr_gk20a_add_zbc_depth;
gops->gr.zbc_set_table = gk20a_gr_zbc_set_table;
gops->gr.zbc_query_table = gr_gk20a_query_zbc;
gops->gr.pmu_save_zbc = gk20a_pmu_save_zbc;
gops->gr.add_zbc = gr_gk20a_add_zbc;
gops->gr.pagepool_default_size = gr_gm20b_pagepool_default_size;
gops->gr.init_ctx_state = gr_gk20a_init_ctx_state;
gops->gr.alloc_gr_ctx = gr_gm20b_alloc_gr_ctx;
gops->gr.free_gr_ctx = gr_gk20a_free_gr_ctx;
gops->gr.update_ctxsw_preemption_mode =
gr_gm20b_update_ctxsw_preemption_mode;
gops->gr.dump_gr_regs = gr_gm20b_dump_gr_status_regs;
gops->gr.update_pc_sampling = gr_gm20b_update_pc_sampling;
gops->gr.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask;
gops->gr.get_max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp;
gops->gr.get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc;
gops->gr.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask;
gops->gr.get_max_fbps_count = gr_gm20b_get_max_fbps_count;
gops->gr.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info;
gops->gr.wait_empty = gr_gk20a_wait_idle;
gops->gr.init_cyclestats = gr_gm20b_init_cyclestats;
gops->gr.set_sm_debug_mode = gr_gk20a_set_sm_debug_mode;
gops->gr.enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs;
gops->gr.bpt_reg_info = gr_gm20b_bpt_reg_info;
gops->gr.get_access_map = gr_gm20b_get_access_map;
gops->gr.handle_fecs_error = gk20a_gr_handle_fecs_error;
gops->gr.handle_sm_exception = gr_gk20a_handle_sm_exception;
gops->gr.handle_tex_exception = gr_gk20a_handle_tex_exception;
gops->gr.enable_gpc_exceptions = gk20a_gr_enable_gpc_exceptions;
gops->gr.enable_exceptions = gk20a_gr_enable_exceptions;
gops->gr.get_lrf_tex_ltc_dram_override = NULL;
gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode;
gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode;
gops->gr.record_sm_error_state = gm20b_gr_record_sm_error_state;
gops->gr.update_sm_error_state = gm20b_gr_update_sm_error_state;
gops->gr.clear_sm_error_state = gm20b_gr_clear_sm_error_state;
gops->gr.suspend_contexts = gr_gk20a_suspend_contexts;
gops->gr.resume_contexts = gr_gk20a_resume_contexts;
gops->gr.get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags;
gops->gr.fuse_override = gm20b_gr_fuse_override;
gops->gr.init_sm_id_table = gr_gk20a_init_sm_id_table;
gops->gr.load_smid_config = gr_gm20b_load_smid_config;
gops->gr.program_sm_id_numbering = gr_gm20b_program_sm_id_numbering;
gops->gr.is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr;
gops->gr.is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr;
gops->gr.split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr;
gops->gr.split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr;
gops->gr.setup_rop_mapping = gr_gk20a_setup_rop_mapping;
gops->gr.program_zcull_mapping = gr_gk20a_program_zcull_mapping;
gops->gr.commit_global_timeslice = gr_gk20a_commit_global_timeslice;
gops->gr.commit_inst = gr_gk20a_commit_inst;
gops->gr.write_zcull_ptr = gr_gk20a_write_zcull_ptr;
gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr;
gops->gr.init_elcg_mode = gr_gk20a_init_elcg_mode;
gops->gr.load_tpc_mask = gr_gm20b_load_tpc_mask;
gops->gr.inval_icache = gr_gk20a_inval_icache;
gops->gr.trigger_suspend = gr_gk20a_trigger_suspend;
gops->gr.wait_for_pause = gr_gk20a_wait_for_pause;
gops->gr.resume_from_pause = gr_gk20a_resume_from_pause;
gops->gr.clear_sm_errors = gr_gk20a_clear_sm_errors;
gops->gr.tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions;
gops->gr.get_esr_sm_sel = gk20a_gr_get_esr_sm_sel;
gops->gr.sm_debugger_attached = gk20a_gr_sm_debugger_attached;
gops->gr.suspend_single_sm = gk20a_gr_suspend_single_sm;
gops->gr.suspend_all_sms = gk20a_gr_suspend_all_sms;
gops->gr.resume_single_sm = gk20a_gr_resume_single_sm;
gops->gr.resume_all_sms = gk20a_gr_resume_all_sms;
gops->gr.get_sm_hww_warp_esr = gk20a_gr_get_sm_hww_warp_esr;
gops->gr.get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr;
gops->gr.get_sm_no_lock_down_hww_global_esr_mask =
gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask;
gops->gr.lock_down_sm = gk20a_gr_lock_down_sm;
gops->gr.wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down;
gops->gr.clear_sm_hww = gm20b_gr_clear_sm_hww;
gops->gr.init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf;
gops->gr.get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs;
gops->gr.disable_rd_coalesce = gm20a_gr_disable_rd_coalesce;
}

View File

@@ -34,12 +34,95 @@ enum {
#define NVB1C0_SET_RD_COALESCE 0x0228 #define NVB1C0_SET_RD_COALESCE 0x0228
#define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0 #define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0
void gm20b_init_gr(struct gk20a *g);
void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, void gr_gm20b_commit_global_attrib_cb(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct channel_ctx_gk20a *ch_ctx,
u64 addr, bool patch); u64 addr, bool patch);
int gr_gm20b_init_fs_state(struct gk20a *g); int gr_gm20b_init_fs_state(struct gk20a *g);
int gm20b_gr_tpc_disable_override(struct gk20a *g, u32 mask); int gm20b_gr_tpc_disable_override(struct gk20a *g, u32 mask);
void gr_gm20b_set_rd_coalesce(struct gk20a *g, u32 data); void gr_gm20b_set_rd_coalesce(struct gk20a *g, u32 data);
void gm20a_gr_disable_rd_coalesce(struct gk20a *g);
void gr_gm20b_init_gpc_mmu(struct gk20a *g);
void gr_gm20b_bundle_cb_defaults(struct gk20a *g);
void gr_gm20b_cb_size_default(struct gk20a *g);
int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g);
void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx,
u64 addr, u64 size, bool patch);
int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
struct channel_gk20a *c, bool patch);
void gr_gm20b_commit_global_pagepool(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx,
u64 addr, u32 size, bool patch);
int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr,
u32 class_num, u32 offset, u32 data);
void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data);
void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data);
void gr_gm20b_set_hww_esr_report_mask(struct gk20a *g);
bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num);
bool gr_gm20b_is_valid_gfx_class(struct gk20a *g, u32 class_num);
bool gr_gm20b_is_valid_compute_class(struct gk20a *g, u32 class_num);
void gr_gm20b_init_sm_dsm_reg_info(void);
void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g,
u32 *num_sm_dsm_perf_regs,
u32 **sm_dsm_perf_regs,
u32 *perf_register_stride);
void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
u32 *num_sm_dsm_perf_ctrl_regs,
u32 **sm_dsm_perf_ctrl_regs,
u32 *ctrl_register_stride);
u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
void gr_gm20b_load_tpc_mask(struct gk20a *g);
void gr_gm20b_program_sm_id_numbering(struct gk20a *g,
u32 gpc, u32 tpc, u32 smid);
int gr_gm20b_load_smid_config(struct gk20a *g);
int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr);
u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr);
int gr_gm20b_load_ctxsw_ucode(struct gk20a *g);
int gr_gm20b_load_ctxsw_ucode(struct gk20a *g);
void gr_gm20b_detect_sm_arch(struct gk20a *g);
u32 gr_gm20b_pagepool_default_size(struct gk20a *g);
int gr_gm20b_alloc_gr_ctx(struct gk20a *g,
struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
u32 class,
u32 flags);
void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx,
struct nvgpu_mem *mem);
int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
struct gk20a_debug_output *o);
int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
bool enable);
u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g);
u32 gr_gm20b_get_max_ltc_per_fbp(struct gk20a *g);
u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g);
u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g);
u32 gr_gm20b_get_max_fbps_count(struct gk20a *g);
void gr_gm20b_init_cyclestats(struct gk20a *g);
void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct nvgpu_mem *mem);
void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state);
void gr_gm20b_get_access_map(struct gk20a *g,
u32 **whitelist, int *num_entries);
int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc);
int gm20b_gr_update_sm_error_state(struct gk20a *g,
struct channel_gk20a *ch, u32 sm_id,
struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state);
int gm20b_gr_clear_sm_error_state(struct gk20a *g,
struct channel_gk20a *ch, u32 sm_id);
int gr_gm20b_get_preemption_mode_flags(struct gk20a *g,
struct nvgpu_preemption_modes_rec *preemption_modes_rec);
int gm20b_gr_fuse_override(struct gk20a *g);
bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr);
bool gr_gm20b_is_ltcn_ltss_addr(struct gk20a *g, u32 addr);
void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr,
u32 *priv_addr_table,
u32 *priv_addr_table_index);
void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr,
u32 *priv_addr_table,
u32 *priv_addr_table_index);
void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
u32 global_esr);
#endif #endif

View File

@@ -27,6 +27,7 @@
#include "gk20a/priv_ring_gk20a.h" #include "gk20a/priv_ring_gk20a.h"
#include "gk20a/regops_gk20a.h" #include "gk20a/regops_gk20a.h"
#include "gk20a/pmu_gk20a.h" #include "gk20a/pmu_gk20a.h"
#include "gk20a/gr_gk20a.h"
#include "ltc_gm20b.h" #include "ltc_gm20b.h"
#include "gr_gm20b.h" #include "gr_gm20b.h"
@@ -170,6 +171,118 @@ static const struct gpu_ops gm20b_ops = {
.isr_stall = gk20a_ce2_isr, .isr_stall = gk20a_ce2_isr,
.isr_nonstall = gk20a_ce2_nonstall_isr, .isr_nonstall = gk20a_ce2_nonstall_isr,
}, },
.gr = {
.init_gpc_mmu = gr_gm20b_init_gpc_mmu,
.bundle_cb_defaults = gr_gm20b_bundle_cb_defaults,
.cb_size_default = gr_gm20b_cb_size_default,
.calc_global_ctx_buffer_size =
gr_gm20b_calc_global_ctx_buffer_size,
.commit_global_attrib_cb = gr_gm20b_commit_global_attrib_cb,
.commit_global_bundle_cb = gr_gm20b_commit_global_bundle_cb,
.commit_global_cb_manager = gr_gm20b_commit_global_cb_manager,
.commit_global_pagepool = gr_gm20b_commit_global_pagepool,
.handle_sw_method = gr_gm20b_handle_sw_method,
.set_alpha_circular_buffer_size =
gr_gm20b_set_alpha_circular_buffer_size,
.set_circular_buffer_size = gr_gm20b_set_circular_buffer_size,
.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions,
.is_valid_class = gr_gm20b_is_valid_class,
.is_valid_gfx_class = gr_gm20b_is_valid_gfx_class,
.is_valid_compute_class = gr_gm20b_is_valid_compute_class,
.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs,
.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs,
.init_fs_state = gr_gm20b_init_fs_state,
.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask,
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask,
.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
.free_channel_ctx = gk20a_free_channel_ctx,
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
.get_zcull_info = gr_gk20a_get_zcull_info,
.is_tpc_addr = gr_gm20b_is_tpc_addr,
.get_tpc_num = gr_gm20b_get_tpc_num,
.detect_sm_arch = gr_gm20b_detect_sm_arch,
.add_zbc_color = gr_gk20a_add_zbc_color,
.add_zbc_depth = gr_gk20a_add_zbc_depth,
.zbc_set_table = gk20a_gr_zbc_set_table,
.zbc_query_table = gr_gk20a_query_zbc,
.pmu_save_zbc = gk20a_pmu_save_zbc,
.add_zbc = gr_gk20a_add_zbc,
.pagepool_default_size = gr_gm20b_pagepool_default_size,
.init_ctx_state = gr_gk20a_init_ctx_state,
.alloc_gr_ctx = gr_gm20b_alloc_gr_ctx,
.free_gr_ctx = gr_gk20a_free_gr_ctx,
.update_ctxsw_preemption_mode =
gr_gm20b_update_ctxsw_preemption_mode,
.dump_gr_regs = gr_gm20b_dump_gr_status_regs,
.update_pc_sampling = gr_gm20b_update_pc_sampling,
.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
.get_max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp,
.get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
.get_max_fbps_count = gr_gm20b_get_max_fbps_count,
.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
.wait_empty = gr_gk20a_wait_idle,
.init_cyclestats = gr_gm20b_init_cyclestats,
.set_sm_debug_mode = gr_gk20a_set_sm_debug_mode,
.enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs,
.bpt_reg_info = gr_gm20b_bpt_reg_info,
.get_access_map = gr_gm20b_get_access_map,
.handle_fecs_error = gk20a_gr_handle_fecs_error,
.handle_sm_exception = gr_gk20a_handle_sm_exception,
.handle_tex_exception = gr_gk20a_handle_tex_exception,
.enable_gpc_exceptions = gk20a_gr_enable_gpc_exceptions,
.enable_exceptions = gk20a_gr_enable_exceptions,
.get_lrf_tex_ltc_dram_override = NULL,
.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
.record_sm_error_state = gm20b_gr_record_sm_error_state,
.update_sm_error_state = gm20b_gr_update_sm_error_state,
.clear_sm_error_state = gm20b_gr_clear_sm_error_state,
.suspend_contexts = gr_gk20a_suspend_contexts,
.resume_contexts = gr_gk20a_resume_contexts,
.get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags,
.fuse_override = gm20b_gr_fuse_override,
.init_sm_id_table = gr_gk20a_init_sm_id_table,
.load_smid_config = gr_gm20b_load_smid_config,
.program_sm_id_numbering = gr_gm20b_program_sm_id_numbering,
.is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr,
.is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr,
.split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr,
.split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr,
.setup_rop_mapping = gr_gk20a_setup_rop_mapping,
.program_zcull_mapping = gr_gk20a_program_zcull_mapping,
.commit_global_timeslice = gr_gk20a_commit_global_timeslice,
.commit_inst = gr_gk20a_commit_inst,
.write_zcull_ptr = gr_gk20a_write_zcull_ptr,
.write_pm_ptr = gr_gk20a_write_pm_ptr,
.init_elcg_mode = gr_gk20a_init_elcg_mode,
.load_tpc_mask = gr_gm20b_load_tpc_mask,
.inval_icache = gr_gk20a_inval_icache,
.trigger_suspend = gr_gk20a_trigger_suspend,
.wait_for_pause = gr_gk20a_wait_for_pause,
.resume_from_pause = gr_gk20a_resume_from_pause,
.clear_sm_errors = gr_gk20a_clear_sm_errors,
.tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions,
.get_esr_sm_sel = gk20a_gr_get_esr_sm_sel,
.sm_debugger_attached = gk20a_gr_sm_debugger_attached,
.suspend_single_sm = gk20a_gr_suspend_single_sm,
.suspend_all_sms = gk20a_gr_suspend_all_sms,
.resume_single_sm = gk20a_gr_resume_single_sm,
.resume_all_sms = gk20a_gr_resume_all_sms,
.get_sm_hww_warp_esr = gk20a_gr_get_sm_hww_warp_esr,
.get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr,
.get_sm_no_lock_down_hww_global_esr_mask =
gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask,
.lock_down_sm = gk20a_gr_lock_down_sm,
.wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down,
.clear_sm_hww = gm20b_gr_clear_sm_hww,
.init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf,
.get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs,
.disable_rd_coalesce = gm20a_gr_disable_rd_coalesce,
},
.fb = { .fb = {
.reset = fb_gk20a_reset, .reset = fb_gk20a_reset,
.init_hw = gk20a_fb_init_hw, .init_hw = gk20a_fb_init_hw,
@@ -448,6 +561,7 @@ int gm20b_init_hal(struct gk20a *g)
gops->ltc = gm20b_ops.ltc; gops->ltc = gm20b_ops.ltc;
gops->ce2 = gm20b_ops.ce2; gops->ce2 = gm20b_ops.ce2;
gops->gr = gm20b_ops.gr;
gops->fb = gm20b_ops.fb; gops->fb = gm20b_ops.fb;
gops->clock_gating = gm20b_ops.clock_gating; gops->clock_gating = gm20b_ops.clock_gating;
gops->fifo = gm20b_ops.fifo; gops->fifo = gm20b_ops.fifo;
@@ -538,6 +652,8 @@ int gm20b_init_hal(struct gk20a *g)
gops->pmu.init_wpr_region = gm20b_pmu_init_acr; gops->pmu.init_wpr_region = gm20b_pmu_init_acr;
gops->pmu.load_lsfalcon_ucode = gm20b_load_falcon_ucode; gops->pmu.load_lsfalcon_ucode = gm20b_load_falcon_ucode;
gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode;
} else { } else {
/* Inherit from gk20a */ /* Inherit from gk20a */
gops->pmu.is_pmu_supported = gk20a_is_pmu_supported; gops->pmu.is_pmu_supported = gk20a_is_pmu_supported;
@@ -547,14 +663,14 @@ int gm20b_init_hal(struct gk20a *g)
gops->pmu.load_lsfalcon_ucode = NULL; gops->pmu.load_lsfalcon_ucode = NULL;
gops->pmu.init_wpr_region = NULL; gops->pmu.init_wpr_region = NULL;
gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
} }
__nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false); __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
g->pmu_lsf_pmu_wpr_init_done = 0; g->pmu_lsf_pmu_wpr_init_done = 0;
g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT; g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;
gm20b_init_gr(g);
gm20b_init_uncompressed_kind_map(); gm20b_init_uncompressed_kind_map();
gm20b_init_kind_attr(); gm20b_init_kind_attr();

View File

@@ -24,7 +24,7 @@
#include <nvgpu/hw/gp106/hw_gr_gp106.h> #include <nvgpu/hw/gp106/hw_gr_gp106.h>
static bool gr_gp106_is_valid_class(struct gk20a *g, u32 class_num) bool gr_gp106_is_valid_class(struct gk20a *g, u32 class_num)
{ {
bool valid = false; bool valid = false;
@@ -53,7 +53,7 @@ static bool gr_gp106_is_valid_class(struct gk20a *g, u32 class_num)
return valid; return valid;
} }
static u32 gr_gp106_pagepool_default_size(struct gk20a *g) u32 gr_gp106_pagepool_default_size(struct gk20a *g)
{ {
return gr_scc_pagepool_total_pages_hwmax_value_v(); return gr_scc_pagepool_total_pages_hwmax_value_v();
} }
@@ -63,7 +63,7 @@ static void gr_gp106_set_go_idle_timeout(struct gk20a *g, u32 data)
gk20a_writel(g, gr_fe_go_idle_timeout_r(), data); gk20a_writel(g, gr_fe_go_idle_timeout_r(), data);
} }
static int gr_gp106_handle_sw_method(struct gk20a *g, u32 addr, int gr_gp106_handle_sw_method(struct gk20a *g, u32 addr,
u32 class_num, u32 offset, u32 data) u32 class_num, u32 offset, u32 data)
{ {
gk20a_dbg_fn(""); gk20a_dbg_fn("");
@@ -111,7 +111,7 @@ fail:
return -EINVAL; return -EINVAL;
} }
static void gr_gp106_cb_size_default(struct gk20a *g) void gr_gp106_cb_size_default(struct gk20a *g)
{ {
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
@@ -121,7 +121,7 @@ static void gr_gp106_cb_size_default(struct gk20a *g)
gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
} }
static int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g, int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g,
struct gr_ctx_desc *gr_ctx, struct gr_ctx_desc *gr_ctx,
struct vm_gk20a *vm, u32 class, struct vm_gk20a *vm, u32 class,
u32 graphics_preempt_mode, u32 graphics_preempt_mode,
@@ -233,19 +233,3 @@ fail_free_preempt:
fail: fail:
return err; return err;
} }
void gp106_init_gr(struct gk20a *g)
{
struct gpu_ops *gops = &g->ops;
gp10b_init_gr(g);
gops->gr.is_valid_class = gr_gp106_is_valid_class;
gops->gr.pagepool_default_size = gr_gp106_pagepool_default_size;
gops->gr.handle_sw_method = gr_gp106_handle_sw_method;
gops->gr.cb_size_default = gr_gp106_cb_size_default;
gops->gr.init_preemption_state = NULL;
gops->gr.set_ctxsw_preemption_mode = gr_gp106_set_ctxsw_preemption_mode;
gops->gr.create_gr_sysfs = NULL;
gops->gr.set_boosted_ctx = NULL;
gops->gr.update_boosted_ctx = NULL;
}

View File

@@ -23,6 +23,15 @@ enum {
PASCAL_COMPUTE_B = 0xC1C0, PASCAL_COMPUTE_B = 0xC1C0,
}; };
void gp106_init_gr(struct gk20a *g); bool gr_gp106_is_valid_class(struct gk20a *g, u32 class_num);
u32 gr_gp106_pagepool_default_size(struct gk20a *g);
int gr_gp106_handle_sw_method(struct gk20a *g, u32 addr,
u32 class_num, u32 offset, u32 data);
void gr_gp106_cb_size_default(struct gk20a *g);
int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g,
struct gr_ctx_desc *gr_ctx,
struct vm_gk20a *vm, u32 class,
u32 graphics_preempt_mode,
u32 compute_preempt_mode);
#endif #endif

View File

@@ -27,6 +27,7 @@
#include "gk20a/mc_gk20a.h" #include "gk20a/mc_gk20a.h"
#include "gk20a/fb_gk20a.h" #include "gk20a/fb_gk20a.h"
#include "gk20a/pmu_gk20a.h" #include "gk20a/pmu_gk20a.h"
#include "gk20a/gr_gk20a.h"
#include "gp10b/ltc_gp10b.h" #include "gp10b/ltc_gp10b.h"
#include "gp10b/gr_gp10b.h" #include "gp10b/gr_gp10b.h"
@@ -40,6 +41,7 @@
#include "gp10b/fifo_gp10b.h" #include "gp10b/fifo_gp10b.h"
#include "gp10b/fb_gp10b.h" #include "gp10b/fb_gp10b.h"
#include "gp10b/pmu_gp10b.h" #include "gp10b/pmu_gp10b.h"
#include "gp10b/gr_gp10b.h"
#include "gp106/fifo_gp106.h" #include "gp106/fifo_gp106.h"
#include "gp106/regops_gp106.h" #include "gp106/regops_gp106.h"
@@ -51,6 +53,7 @@
#include "gm20b/pmu_gm20b.h" #include "gm20b/pmu_gm20b.h"
#include "gm20b/fb_gm20b.h" #include "gm20b/fb_gm20b.h"
#include "gm20b/acr_gm20b.h" #include "gm20b/acr_gm20b.h"
#include "gm20b/gr_gm20b.h"
#include "gp106/acr_gp106.h" #include "gp106/acr_gp106.h"
#include "gp106/sec2_gp106.h" #include "gp106/sec2_gp106.h"
@@ -221,6 +224,128 @@ static const struct gpu_ops gp106_ops = {
.isr_stall = gp10b_ce_isr, .isr_stall = gp10b_ce_isr,
.isr_nonstall = gp10b_ce_nonstall_isr, .isr_nonstall = gp10b_ce_nonstall_isr,
}, },
.gr = {
.init_gpc_mmu = gr_gm20b_init_gpc_mmu,
.bundle_cb_defaults = gr_gm20b_bundle_cb_defaults,
.cb_size_default = gr_gp106_cb_size_default,
.calc_global_ctx_buffer_size =
gr_gp10b_calc_global_ctx_buffer_size,
.commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb,
.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
.commit_global_pagepool = gr_gp10b_commit_global_pagepool,
.handle_sw_method = gr_gp106_handle_sw_method,
.set_alpha_circular_buffer_size =
gr_gp10b_set_alpha_circular_buffer_size,
.set_circular_buffer_size = gr_gp10b_set_circular_buffer_size,
.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions,
.is_valid_class = gr_gp106_is_valid_class,
.is_valid_gfx_class = gr_gp10b_is_valid_gfx_class,
.is_valid_compute_class = gr_gp10b_is_valid_compute_class,
.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs,
.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs,
.init_fs_state = gr_gp10b_init_fs_state,
.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask,
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask,
.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
.free_channel_ctx = gk20a_free_channel_ctx,
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
.get_zcull_info = gr_gk20a_get_zcull_info,
.is_tpc_addr = gr_gm20b_is_tpc_addr,
.get_tpc_num = gr_gm20b_get_tpc_num,
.detect_sm_arch = gr_gm20b_detect_sm_arch,
.add_zbc_color = gr_gp10b_add_zbc_color,
.add_zbc_depth = gr_gp10b_add_zbc_depth,
.zbc_set_table = gk20a_gr_zbc_set_table,
.zbc_query_table = gr_gk20a_query_zbc,
.pmu_save_zbc = gk20a_pmu_save_zbc,
.add_zbc = gr_gk20a_add_zbc,
.pagepool_default_size = gr_gp106_pagepool_default_size,
.init_ctx_state = gr_gp10b_init_ctx_state,
.alloc_gr_ctx = gr_gp10b_alloc_gr_ctx,
.free_gr_ctx = gr_gp10b_free_gr_ctx,
.update_ctxsw_preemption_mode =
gr_gp10b_update_ctxsw_preemption_mode,
.dump_gr_regs = gr_gp10b_dump_gr_status_regs,
.update_pc_sampling = gr_gm20b_update_pc_sampling,
.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
.get_max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp,
.get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
.get_max_fbps_count = gr_gm20b_get_max_fbps_count,
.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
.wait_empty = gr_gp10b_wait_empty,
.init_cyclestats = gr_gp10b_init_cyclestats,
.set_sm_debug_mode = gr_gk20a_set_sm_debug_mode,
.enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs,
.bpt_reg_info = gr_gm20b_bpt_reg_info,
.get_access_map = gr_gp10b_get_access_map,
.handle_fecs_error = gr_gp10b_handle_fecs_error,
.handle_sm_exception = gr_gp10b_handle_sm_exception,
.handle_tex_exception = gr_gp10b_handle_tex_exception,
.enable_gpc_exceptions = gk20a_gr_enable_gpc_exceptions,
.enable_exceptions = gk20a_gr_enable_exceptions,
.get_lrf_tex_ltc_dram_override = get_ecc_override_val,
.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
.record_sm_error_state = gm20b_gr_record_sm_error_state,
.update_sm_error_state = gm20b_gr_update_sm_error_state,
.clear_sm_error_state = gm20b_gr_clear_sm_error_state,
.suspend_contexts = gr_gp10b_suspend_contexts,
.resume_contexts = gr_gk20a_resume_contexts,
.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
.fuse_override = gp10b_gr_fuse_override,
.init_sm_id_table = gr_gk20a_init_sm_id_table,
.load_smid_config = gr_gp10b_load_smid_config,
.program_sm_id_numbering = gr_gm20b_program_sm_id_numbering,
.is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr,
.is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr,
.split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr,
.split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr,
.setup_rop_mapping = gr_gk20a_setup_rop_mapping,
.program_zcull_mapping = gr_gk20a_program_zcull_mapping,
.commit_global_timeslice = gr_gk20a_commit_global_timeslice,
.commit_inst = gr_gk20a_commit_inst,
.write_zcull_ptr = gr_gk20a_write_zcull_ptr,
.write_pm_ptr = gr_gk20a_write_pm_ptr,
.init_elcg_mode = gr_gk20a_init_elcg_mode,
.load_tpc_mask = gr_gm20b_load_tpc_mask,
.inval_icache = gr_gk20a_inval_icache,
.trigger_suspend = gr_gk20a_trigger_suspend,
.wait_for_pause = gr_gk20a_wait_for_pause,
.resume_from_pause = gr_gk20a_resume_from_pause,
.clear_sm_errors = gr_gk20a_clear_sm_errors,
.tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions,
.get_esr_sm_sel = gk20a_gr_get_esr_sm_sel,
.sm_debugger_attached = gk20a_gr_sm_debugger_attached,
.suspend_single_sm = gk20a_gr_suspend_single_sm,
.suspend_all_sms = gk20a_gr_suspend_all_sms,
.resume_single_sm = gk20a_gr_resume_single_sm,
.resume_all_sms = gk20a_gr_resume_all_sms,
.get_sm_hww_warp_esr = gp10b_gr_get_sm_hww_warp_esr,
.get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr,
.get_sm_no_lock_down_hww_global_esr_mask =
gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask,
.lock_down_sm = gk20a_gr_lock_down_sm,
.wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down,
.clear_sm_hww = gm20b_gr_clear_sm_hww,
.init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf,
.get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs,
.disable_rd_coalesce = gm20a_gr_disable_rd_coalesce,
.set_boosted_ctx = NULL,
.set_preemption_mode = gr_gp10b_set_preemption_mode,
.set_czf_bypass = gr_gp10b_set_czf_bypass,
.pre_process_sm_exception = gr_gp10b_pre_process_sm_exception,
.set_preemption_buffer_va = gr_gp10b_set_preemption_buffer_va,
.init_preemption_state = NULL,
.update_boosted_ctx = NULL,
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
.create_gr_sysfs = NULL,
.set_ctxsw_preemption_mode = gr_gp106_set_ctxsw_preemption_mode,
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode
},
.fb = { .fb = {
.reset = gp106_fb_reset, .reset = gp106_fb_reset,
.init_hw = gk20a_fb_init_hw, .init_hw = gk20a_fb_init_hw,
@@ -569,6 +694,7 @@ int gp106_init_hal(struct gk20a *g)
gops->ltc = gp106_ops.ltc; gops->ltc = gp106_ops.ltc;
gops->ce2 = gp106_ops.ce2; gops->ce2 = gp106_ops.ce2;
gops->gr = gp106_ops.gr;
gops->fb = gp106_ops.fb; gops->fb = gp106_ops.fb;
gops->clock_gating = gp106_ops.clock_gating; gops->clock_gating = gp106_ops.clock_gating;
gops->fifo = gp106_ops.fifo; gops->fifo = gp106_ops.fifo;
@@ -618,7 +744,6 @@ int gp106_init_hal(struct gk20a *g)
g->pmu_lsf_pmu_wpr_init_done = 0; g->pmu_lsf_pmu_wpr_init_done = 0;
g->bootstrap_owner = LSF_FALCON_ID_SEC2; g->bootstrap_owner = LSF_FALCON_ID_SEC2;
gp106_init_gr(g);
gp10b_init_uncompressed_kind_map(); gp10b_init_uncompressed_kind_map();
gp10b_init_kind_attr(); gp10b_init_kind_attr();

View File

@@ -41,7 +41,7 @@
#define NVGPU_GFXP_WFI_TIMEOUT_US 100LL #define NVGPU_GFXP_WFI_TIMEOUT_US 100LL
static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num) bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
{ {
bool valid = false; bool valid = false;
@@ -67,7 +67,7 @@ static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
return valid; return valid;
} }
static bool gr_gp10b_is_valid_gfx_class(struct gk20a *g, u32 class_num) bool gr_gp10b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
{ {
if (class_num == PASCAL_A || class_num == MAXWELL_B) if (class_num == PASCAL_A || class_num == MAXWELL_B)
return true; return true;
@@ -75,7 +75,7 @@ static bool gr_gp10b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
return false; return false;
} }
static bool gr_gp10b_is_valid_compute_class(struct gk20a *g, u32 class_num) bool gr_gp10b_is_valid_compute_class(struct gk20a *g, u32 class_num)
{ {
if (class_num == PASCAL_COMPUTE_A || class_num == MAXWELL_COMPUTE_B) if (class_num == PASCAL_COMPUTE_A || class_num == MAXWELL_COMPUTE_B)
return true; return true;
@@ -119,7 +119,7 @@ static void gr_gp10b_sm_lrf_ecc_overcount_war(int single_err,
*count_to_adjust = 0; *count_to_adjust = 0;
} }
static int gr_gp10b_handle_sm_exception(struct gk20a *g, int gr_gp10b_handle_sm_exception(struct gk20a *g,
u32 gpc, u32 tpc, u32 sm, u32 gpc, u32 tpc, u32 sm,
bool *post_event, struct channel_gk20a *fault_ch, bool *post_event, struct channel_gk20a *fault_ch,
u32 *hww_global_esr) u32 *hww_global_esr)
@@ -244,7 +244,7 @@ static int gr_gp10b_handle_sm_exception(struct gk20a *g,
return ret; return ret;
} }
static int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
bool *post_event) bool *post_event)
{ {
int ret = 0; int ret = 0;
@@ -380,7 +380,7 @@ static int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
return ret; return ret;
} }
static int gr_gp10b_commit_global_cb_manager(struct gk20a *g, int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
struct channel_gk20a *c, bool patch) struct channel_gk20a *c, bool patch)
{ {
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
@@ -481,7 +481,7 @@ static int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
return 0; return 0;
} }
static void gr_gp10b_commit_global_pagepool(struct gk20a *g, void gr_gp10b_commit_global_pagepool(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct channel_ctx_gk20a *ch_ctx,
u64 addr, u32 size, bool patch) u64 addr, u32 size, bool patch)
{ {
@@ -499,7 +499,7 @@ static void gr_gp10b_commit_global_pagepool(struct gk20a *g,
gr_gpcs_gcc_pagepool_total_pages_f(size), patch); gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
} }
static int gr_gp10b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr, int gr_gp10b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
struct zbc_entry *color_val, u32 index) struct zbc_entry *color_val, u32 index)
{ {
u32 i; u32 i;
@@ -554,7 +554,7 @@ static int gr_gp10b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
return 0; return 0;
} }
static int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr, int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
struct zbc_entry *depth_val, u32 index) struct zbc_entry *depth_val, u32 index)
{ {
u32 zbc_z; u32 zbc_z;
@@ -592,12 +592,12 @@ static int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
return 0; return 0;
} }
static u32 gr_gp10b_pagepool_default_size(struct gk20a *g) u32 gr_gp10b_pagepool_default_size(struct gk20a *g)
{ {
return gr_scc_pagepool_total_pages_hwmax_value_v(); return gr_scc_pagepool_total_pages_hwmax_value_v();
} }
static int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g) int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g)
{ {
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
int size; int size;
@@ -642,7 +642,7 @@ static void gr_gp10b_set_coalesce_buffer_size(struct gk20a *g, u32 data)
gk20a_dbg_fn("done"); gk20a_dbg_fn("done");
} }
static void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data) void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data)
{ {
u32 val; u32 val;
@@ -667,7 +667,7 @@ static void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data)
gk20a_writel(g, gr_bes_crop_debug3_r(), val); gk20a_writel(g, gr_bes_crop_debug3_r(), val);
} }
static int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr, int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr,
u32 class_num, u32 offset, u32 data) u32 class_num, u32 offset, u32 data)
{ {
gk20a_dbg_fn(""); gk20a_dbg_fn("");
@@ -718,7 +718,7 @@ fail:
return -EINVAL; return -EINVAL;
} }
static void gr_gp10b_cb_size_default(struct gk20a *g) void gr_gp10b_cb_size_default(struct gk20a *g)
{ {
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
@@ -728,7 +728,7 @@ static void gr_gp10b_cb_size_default(struct gk20a *g)
gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
} }
static void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
{ {
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
u32 gpc_index, ppc_index, stride, val; u32 gpc_index, ppc_index, stride, val;
@@ -776,7 +776,7 @@ static void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
} }
} }
static void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data) void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
{ {
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
u32 gpc_index, ppc_index, stride, val; u32 gpc_index, ppc_index, stride, val;
@@ -843,7 +843,7 @@ static void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
} }
} }
static int gr_gp10b_init_ctx_state(struct gk20a *g) int gr_gp10b_init_ctx_state(struct gk20a *g)
{ {
struct fecs_method_op_gk20a op = { struct fecs_method_op_gk20a op = {
.mailbox = { .id = 0, .data = 0, .mailbox = { .id = 0, .data = 0,
@@ -910,7 +910,7 @@ fail_free:
return err; return err;
} }
static int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
struct gr_ctx_desc *gr_ctx, struct gr_ctx_desc *gr_ctx,
struct vm_gk20a *vm, u32 class, struct vm_gk20a *vm, u32 class,
u32 graphics_preempt_mode, u32 graphics_preempt_mode,
@@ -1034,7 +1034,7 @@ fail:
return err; return err;
} }
static int gr_gp10b_alloc_gr_ctx(struct gk20a *g, int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
u32 class, u32 class,
u32 flags) u32 flags)
@@ -1131,7 +1131,7 @@ static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm,
nvgpu_mem_end(g, mem); nvgpu_mem_end(g, mem);
} }
static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
struct gr_ctx_desc *gr_ctx) struct gr_ctx_desc *gr_ctx)
{ {
gk20a_dbg_fn(""); gk20a_dbg_fn("");
@@ -1151,7 +1151,7 @@ static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
} }
static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct channel_ctx_gk20a *ch_ctx,
struct nvgpu_mem *mem) struct nvgpu_mem *mem)
{ {
@@ -1256,7 +1256,7 @@ out:
gk20a_dbg_fn("done"); gk20a_dbg_fn("done");
} }
static int gr_gp10b_dump_gr_status_regs(struct gk20a *g, int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
struct gk20a_debug_output *o) struct gk20a_debug_output *o)
{ {
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
@@ -1402,7 +1402,7 @@ static bool gr_activity_empty_or_preempted(u32 val)
return true; return true;
} }
static int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms, int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms,
u32 expect_delay) u32 expect_delay)
{ {
u32 delay = expect_delay; u32 delay = expect_delay;
@@ -1453,7 +1453,7 @@ static int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms,
return -EAGAIN; return -EAGAIN;
} }
static void gr_gp10b_commit_global_attrib_cb(struct gk20a *g, void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct channel_ctx_gk20a *ch_ctx,
u64 addr, bool patch) u64 addr, bool patch)
{ {
@@ -1481,7 +1481,7 @@ static void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch);
} }
static void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct channel_ctx_gk20a *ch_ctx,
u64 addr, u64 size, bool patch) u64 addr, u64 size, bool patch)
{ {
@@ -1516,7 +1516,7 @@ static void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
gr_pd_ab_dist_cfg2_state_limit_f(data), patch); gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
} }
static int gr_gp10b_load_smid_config(struct gk20a *g) int gr_gp10b_load_smid_config(struct gk20a *g)
{ {
u32 *tpc_sm_id; u32 *tpc_sm_id;
u32 i, j; u32 i, j;
@@ -1586,7 +1586,7 @@ int gr_gp10b_init_fs_state(struct gk20a *g)
return gr_gm20b_init_fs_state(g); return gr_gm20b_init_fs_state(g);
} }
static void gr_gp10b_init_cyclestats(struct gk20a *g) void gr_gp10b_init_cyclestats(struct gk20a *g)
{ {
#if defined(CONFIG_GK20A_CYCLE_STATS) #if defined(CONFIG_GK20A_CYCLE_STATS)
g->gpu_characteristics.flags |= g->gpu_characteristics.flags |=
@@ -1598,7 +1598,7 @@ static void gr_gp10b_init_cyclestats(struct gk20a *g)
#endif #endif
} }
static void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
{ {
nvgpu_tegra_fuse_write_bypass(g, 0x1); nvgpu_tegra_fuse_write_bypass(g, 0x1);
nvgpu_tegra_fuse_write_access_sw(g, 0x0); nvgpu_tegra_fuse_write_access_sw(g, 0x0);
@@ -1611,7 +1611,7 @@ static void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0); nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0);
} }
static void gr_gp10b_get_access_map(struct gk20a *g, void gr_gp10b_get_access_map(struct gk20a *g,
u32 **whitelist, int *num_entries) u32 **whitelist, int *num_entries)
{ {
static u32 wl_addr_gp10b[] = { static u32 wl_addr_gp10b[] = {
@@ -1801,7 +1801,7 @@ static int gr_gp10b_clear_cilp_preempt_pending(struct gk20a *g,
* *
* On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing
*/ */
static int gr_gp10b_pre_process_sm_exception(struct gk20a *g, int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
u32 gpc, u32 tpc, u32 sm, u32 global_esr, u32 warp_esr, u32 gpc, u32 tpc, u32 sm, u32 global_esr, u32 warp_esr,
bool sm_debugger_attached, struct channel_gk20a *fault_ch, bool sm_debugger_attached, struct channel_gk20a *fault_ch,
bool *early_exit, bool *ignore_debugger) bool *early_exit, bool *ignore_debugger)
@@ -1988,7 +1988,7 @@ clean_up:
return gk20a_gr_handle_fecs_error(g, __ch, isr_data); return gk20a_gr_handle_fecs_error(g, __ch, isr_data);
} }
static u32 gp10b_gr_get_sm_hww_warp_esr(struct gk20a *g, u32 gp10b_gr_get_sm_hww_warp_esr(struct gk20a *g,
u32 gpc, u32 tpc, u32 sm) u32 gpc, u32 tpc, u32 sm)
{ {
u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc); u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
@@ -2003,7 +2003,7 @@ static u32 gp10b_gr_get_sm_hww_warp_esr(struct gk20a *g,
return hww_warp_esr; return hww_warp_esr;
} }
static u32 get_ecc_override_val(struct gk20a *g) u32 get_ecc_override_val(struct gk20a *g)
{ {
u32 val; u32 val;
@@ -2046,7 +2046,7 @@ static bool gr_gp10b_suspend_context(struct channel_gk20a *ch,
return ctx_resident; return ctx_resident;
} }
static int gr_gp10b_suspend_contexts(struct gk20a *g, int gr_gp10b_suspend_contexts(struct gk20a *g,
struct dbg_session_gk20a *dbg_s, struct dbg_session_gk20a *dbg_s,
int *ctx_resident_ch_fd) int *ctx_resident_ch_fd)
{ {
@@ -2122,7 +2122,7 @@ clean_up:
return err; return err;
} }
static int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch, int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch,
bool boost) bool boost)
{ {
struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx; struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx;
@@ -2156,7 +2156,7 @@ unmap_ctx:
return err; return err;
} }
static void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem, void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem,
struct gr_ctx_desc *gr_ctx) { struct gr_ctx_desc *gr_ctx) {
u32 v; u32 v;
@@ -2165,7 +2165,7 @@ static void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem,
nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pmu_options_o(), v); nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pmu_options_o(), v);
} }
static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
u32 graphics_preempt_mode, u32 graphics_preempt_mode,
u32 compute_preempt_mode) u32 compute_preempt_mode)
{ {
@@ -2261,7 +2261,7 @@ unamp_ctx_header:
return err; return err;
} }
static int gr_gp10b_get_preemption_mode_flags(struct gk20a *g, int gr_gp10b_get_preemption_mode_flags(struct gk20a *g,
struct nvgpu_preemption_modes_rec *preemption_modes_rec) struct nvgpu_preemption_modes_rec *preemption_modes_rec)
{ {
preemption_modes_rec->graphics_preemption_mode_flags = ( preemption_modes_rec->graphics_preemption_mode_flags = (
@@ -2279,7 +2279,7 @@ static int gr_gp10b_get_preemption_mode_flags(struct gk20a *g,
return 0; return 0;
} }
static int gp10b_gr_fuse_override(struct gk20a *g) int gp10b_gr_fuse_override(struct gk20a *g)
{ {
struct device_node *np = dev_from_gk20a(g)->of_node; struct device_node *np = dev_from_gk20a(g)->of_node;
u32 *fuses; u32 *fuses;
@@ -2319,7 +2319,7 @@ static int gp10b_gr_fuse_override(struct gk20a *g)
return 0; return 0;
} }
static int gr_gp10b_init_preemption_state(struct gk20a *g) int gr_gp10b_init_preemption_state(struct gk20a *g)
{ {
u32 debug_2; u32 debug_2;
u64 sysclk_rate; u64 sysclk_rate;
@@ -2341,7 +2341,7 @@ static int gr_gp10b_init_preemption_state(struct gk20a *g)
return 0; return 0;
} }
static void gr_gp10b_set_preemption_buffer_va(struct gk20a *g, void gr_gp10b_set_preemption_buffer_va(struct gk20a *g,
struct nvgpu_mem *mem, u64 gpu_va) struct nvgpu_mem *mem, u64 gpu_va)
{ {
u32 va = u64_lo32(gpu_va >> 8); u32 va = u64_lo32(gpu_va >> 8);
@@ -2367,59 +2367,3 @@ int gr_gp10b_set_czf_bypass(struct gk20a *g, struct channel_gk20a *ch)
return __gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0, false); return __gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0, false);
} }
void gp10b_init_gr(struct gk20a *g)
{
struct gpu_ops *gops = &g->ops;
gm20b_init_gr(g);
gops->gr.init_fs_state = gr_gp10b_init_fs_state;
gops->gr.init_preemption_state = gr_gp10b_init_preemption_state;
gops->gr.is_valid_class = gr_gp10b_is_valid_class;
gops->gr.is_valid_gfx_class = gr_gp10b_is_valid_gfx_class;
gops->gr.is_valid_compute_class = gr_gp10b_is_valid_compute_class;
gops->gr.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager;
gops->gr.commit_global_pagepool = gr_gp10b_commit_global_pagepool;
gops->gr.set_preemption_buffer_va = gr_gp10b_set_preemption_buffer_va;
gops->gr.add_zbc_color = gr_gp10b_add_zbc_color;
gops->gr.add_zbc_depth = gr_gp10b_add_zbc_depth;
gops->gr.pagepool_default_size = gr_gp10b_pagepool_default_size;
gops->gr.calc_global_ctx_buffer_size =
gr_gp10b_calc_global_ctx_buffer_size;
gops->gr.commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb;
gops->gr.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb;
gops->gr.handle_sw_method = gr_gp10b_handle_sw_method;
gops->gr.cb_size_default = gr_gp10b_cb_size_default;
gops->gr.set_alpha_circular_buffer_size =
gr_gp10b_set_alpha_circular_buffer_size;
gops->gr.set_circular_buffer_size =
gr_gp10b_set_circular_buffer_size;
gops->gr.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3;
gops->gr.init_ctx_state = gr_gp10b_init_ctx_state;
gops->gr.alloc_gr_ctx = gr_gp10b_alloc_gr_ctx;
gops->gr.free_gr_ctx = gr_gp10b_free_gr_ctx;
gops->gr.update_ctxsw_preemption_mode =
gr_gp10b_update_ctxsw_preemption_mode;
gops->gr.dump_gr_regs = gr_gp10b_dump_gr_status_regs;
gops->gr.wait_empty = gr_gp10b_wait_empty;
gops->gr.init_cyclestats = gr_gp10b_init_cyclestats;
gops->gr.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask;
gops->gr.get_access_map = gr_gp10b_get_access_map;
gops->gr.handle_sm_exception = gr_gp10b_handle_sm_exception;
gops->gr.handle_tex_exception = gr_gp10b_handle_tex_exception;
gops->gr.pre_process_sm_exception =
gr_gp10b_pre_process_sm_exception;
gops->gr.handle_fecs_error = gr_gp10b_handle_fecs_error;
gops->gr.create_gr_sysfs = gr_gp10b_create_sysfs;
gops->gr.get_lrf_tex_ltc_dram_override = get_ecc_override_val;
gops->gr.suspend_contexts = gr_gp10b_suspend_contexts;
gops->gr.set_preemption_mode = gr_gp10b_set_preemption_mode;
gops->gr.set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode;
gops->gr.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags;
gops->gr.fuse_override = gp10b_gr_fuse_override;
gops->gr.load_smid_config = gr_gp10b_load_smid_config;
gops->gr.set_boosted_ctx = gr_gp10b_set_boosted_ctx;
gops->gr.update_boosted_ctx = gr_gp10b_update_boosted_ctx;
gops->gr.set_czf_bypass = gr_gp10b_set_czf_bypass;
gops->gr.get_sm_hww_warp_esr = gp10b_gr_get_sm_hww_warp_esr;
}

View File

@@ -20,6 +20,10 @@
struct gk20a; struct gk20a;
struct gr_gk20a_isr_data; struct gr_gk20a_isr_data;
struct channel_ctx_gk20a;
struct zbc_entry;
struct gr_ctx_desc;
struct nvgpu_preemption_modes_rec;
enum { enum {
PASCAL_CHANNEL_GPFIFO_A = 0xC06F, PASCAL_CHANNEL_GPFIFO_A = 0xC06F,
@@ -39,7 +43,6 @@ enum {
#define NVC0C0_SET_SHADER_EXCEPTIONS 0x1528 #define NVC0C0_SET_SHADER_EXCEPTIONS 0x1528
#define NVC0C0_SET_RD_COALESCE 0x0228 #define NVC0C0_SET_RD_COALESCE 0x0228
void gp10b_init_gr(struct gk20a *g);
int gr_gp10b_init_fs_state(struct gk20a *g); int gr_gp10b_init_fs_state(struct gk20a *g);
int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size, int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size,
struct nvgpu_mem *mem); struct nvgpu_mem *mem);
@@ -50,6 +53,87 @@ int gr_gp10b_handle_fecs_error(struct gk20a *g,
int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g, int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g,
struct channel_gk20a *fault_ch); struct channel_gk20a *fault_ch);
bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num);
bool gr_gp10b_is_valid_gfx_class(struct gk20a *g, u32 class_num);
bool gr_gp10b_is_valid_compute_class(struct gk20a *g, u32 class_num);
int gr_gp10b_handle_sm_exception(struct gk20a *g,
u32 gpc, u32 tpc, u32 sm,
bool *post_event, struct channel_gk20a *fault_ch,
u32 *hww_global_esr);
int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
bool *post_event);
int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
struct channel_gk20a *c, bool patch);
void gr_gp10b_commit_global_pagepool(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx,
u64 addr, u32 size, bool patch);
int gr_gp10b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
struct zbc_entry *color_val, u32 index);
int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
struct zbc_entry *depth_val, u32 index);
u32 gr_gp10b_pagepool_default_size(struct gk20a *g);
int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g);
void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data);
int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr,
u32 class_num, u32 offset, u32 data);
void gr_gp10b_cb_size_default(struct gk20a *g);
void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data);
void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data);
int gr_gp10b_init_ctx_state(struct gk20a *g);
int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
struct gr_ctx_desc *gr_ctx,
struct vm_gk20a *vm, u32 class,
u32 graphics_preempt_mode,
u32 compute_preempt_mode);
int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
u32 class,
u32 flags);
void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
struct gr_ctx_desc *gr_ctx);
void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx,
struct nvgpu_mem *mem);
int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
struct gk20a_debug_output *o);
int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms,
u32 expect_delay);
void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx,
u64 addr, bool patch);
void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx,
u64 addr, u64 size, bool patch);
int gr_gp10b_load_smid_config(struct gk20a *g);
void gr_gp10b_init_cyclestats(struct gk20a *g);
void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
void gr_gp10b_get_access_map(struct gk20a *g,
u32 **whitelist, int *num_entries);
int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
u32 gpc, u32 tpc, u32 sm, u32 global_esr, u32 warp_esr,
bool sm_debugger_attached, struct channel_gk20a *fault_ch,
bool *early_exit, bool *ignore_debugger);
u32 gp10b_gr_get_sm_hww_warp_esr(struct gk20a *g,
u32 gpc, u32 tpc, u32 sm);
u32 get_ecc_override_val(struct gk20a *g);
int gr_gp10b_suspend_contexts(struct gk20a *g,
struct dbg_session_gk20a *dbg_s,
int *ctx_resident_ch_fd);
int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch,
bool boost);
void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem,
struct gr_ctx_desc *gr_ctx);
int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
u32 graphics_preempt_mode,
u32 compute_preempt_mode);
int gr_gp10b_get_preemption_mode_flags(struct gk20a *g,
struct nvgpu_preemption_modes_rec *preemption_modes_rec);
int gp10b_gr_fuse_override(struct gk20a *g);
int gr_gp10b_init_preemption_state(struct gk20a *g);
void gr_gp10b_set_preemption_buffer_va(struct gk20a *g,
struct nvgpu_mem *mem, u64 gpu_va);
int gr_gp10b_set_czf_bypass(struct gk20a *g, struct channel_gk20a *ch);
struct gr_t18x { struct gr_t18x {
struct { struct {
u32 preempt_image_size; u32 preempt_image_size;

View File

@@ -27,6 +27,7 @@
#include "gk20a/mc_gk20a.h" #include "gk20a/mc_gk20a.h"
#include "gk20a/fb_gk20a.h" #include "gk20a/fb_gk20a.h"
#include "gk20a/pmu_gk20a.h" #include "gk20a/pmu_gk20a.h"
#include "gk20a/gr_gk20a.h"
#include "gp10b/gr_gp10b.h" #include "gp10b/gr_gp10b.h"
#include "gp10b/fecs_trace_gp10b.h" #include "gp10b/fecs_trace_gp10b.h"
@@ -179,6 +180,128 @@ static const struct gpu_ops gp10b_ops = {
.isr_stall = gp10b_ce_isr, .isr_stall = gp10b_ce_isr,
.isr_nonstall = gp10b_ce_nonstall_isr, .isr_nonstall = gp10b_ce_nonstall_isr,
}, },
.gr = {
.init_gpc_mmu = gr_gm20b_init_gpc_mmu,
.bundle_cb_defaults = gr_gm20b_bundle_cb_defaults,
.cb_size_default = gr_gp10b_cb_size_default,
.calc_global_ctx_buffer_size =
gr_gp10b_calc_global_ctx_buffer_size,
.commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb,
.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
.commit_global_pagepool = gr_gp10b_commit_global_pagepool,
.handle_sw_method = gr_gp10b_handle_sw_method,
.set_alpha_circular_buffer_size =
gr_gp10b_set_alpha_circular_buffer_size,
.set_circular_buffer_size = gr_gp10b_set_circular_buffer_size,
.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions,
.is_valid_class = gr_gp10b_is_valid_class,
.is_valid_gfx_class = gr_gp10b_is_valid_gfx_class,
.is_valid_compute_class = gr_gp10b_is_valid_compute_class,
.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs,
.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs,
.init_fs_state = gr_gp10b_init_fs_state,
.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask,
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask,
.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
.free_channel_ctx = gk20a_free_channel_ctx,
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
.get_zcull_info = gr_gk20a_get_zcull_info,
.is_tpc_addr = gr_gm20b_is_tpc_addr,
.get_tpc_num = gr_gm20b_get_tpc_num,
.detect_sm_arch = gr_gm20b_detect_sm_arch,
.add_zbc_color = gr_gp10b_add_zbc_color,
.add_zbc_depth = gr_gp10b_add_zbc_depth,
.zbc_set_table = gk20a_gr_zbc_set_table,
.zbc_query_table = gr_gk20a_query_zbc,
.pmu_save_zbc = gk20a_pmu_save_zbc,
.add_zbc = gr_gk20a_add_zbc,
.pagepool_default_size = gr_gp10b_pagepool_default_size,
.init_ctx_state = gr_gp10b_init_ctx_state,
.alloc_gr_ctx = gr_gp10b_alloc_gr_ctx,
.free_gr_ctx = gr_gp10b_free_gr_ctx,
.update_ctxsw_preemption_mode =
gr_gp10b_update_ctxsw_preemption_mode,
.dump_gr_regs = gr_gp10b_dump_gr_status_regs,
.update_pc_sampling = gr_gm20b_update_pc_sampling,
.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
.get_max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp,
.get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
.get_max_fbps_count = gr_gm20b_get_max_fbps_count,
.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
.wait_empty = gr_gp10b_wait_empty,
.init_cyclestats = gr_gp10b_init_cyclestats,
.set_sm_debug_mode = gr_gk20a_set_sm_debug_mode,
.enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs,
.bpt_reg_info = gr_gm20b_bpt_reg_info,
.get_access_map = gr_gp10b_get_access_map,
.handle_fecs_error = gr_gp10b_handle_fecs_error,
.handle_sm_exception = gr_gp10b_handle_sm_exception,
.handle_tex_exception = gr_gp10b_handle_tex_exception,
.enable_gpc_exceptions = gk20a_gr_enable_gpc_exceptions,
.enable_exceptions = gk20a_gr_enable_exceptions,
.get_lrf_tex_ltc_dram_override = get_ecc_override_val,
.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
.record_sm_error_state = gm20b_gr_record_sm_error_state,
.update_sm_error_state = gm20b_gr_update_sm_error_state,
.clear_sm_error_state = gm20b_gr_clear_sm_error_state,
.suspend_contexts = gr_gp10b_suspend_contexts,
.resume_contexts = gr_gk20a_resume_contexts,
.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
.fuse_override = gp10b_gr_fuse_override,
.init_sm_id_table = gr_gk20a_init_sm_id_table,
.load_smid_config = gr_gp10b_load_smid_config,
.program_sm_id_numbering = gr_gm20b_program_sm_id_numbering,
.is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr,
.is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr,
.split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr,
.split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr,
.setup_rop_mapping = gr_gk20a_setup_rop_mapping,
.program_zcull_mapping = gr_gk20a_program_zcull_mapping,
.commit_global_timeslice = gr_gk20a_commit_global_timeslice,
.commit_inst = gr_gk20a_commit_inst,
.write_zcull_ptr = gr_gk20a_write_zcull_ptr,
.write_pm_ptr = gr_gk20a_write_pm_ptr,
.init_elcg_mode = gr_gk20a_init_elcg_mode,
.load_tpc_mask = gr_gm20b_load_tpc_mask,
.inval_icache = gr_gk20a_inval_icache,
.trigger_suspend = gr_gk20a_trigger_suspend,
.wait_for_pause = gr_gk20a_wait_for_pause,
.resume_from_pause = gr_gk20a_resume_from_pause,
.clear_sm_errors = gr_gk20a_clear_sm_errors,
.tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions,
.get_esr_sm_sel = gk20a_gr_get_esr_sm_sel,
.sm_debugger_attached = gk20a_gr_sm_debugger_attached,
.suspend_single_sm = gk20a_gr_suspend_single_sm,
.suspend_all_sms = gk20a_gr_suspend_all_sms,
.resume_single_sm = gk20a_gr_resume_single_sm,
.resume_all_sms = gk20a_gr_resume_all_sms,
.get_sm_hww_warp_esr = gp10b_gr_get_sm_hww_warp_esr,
.get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr,
.get_sm_no_lock_down_hww_global_esr_mask =
gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask,
.lock_down_sm = gk20a_gr_lock_down_sm,
.wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down,
.clear_sm_hww = gm20b_gr_clear_sm_hww,
.init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf,
.get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs,
.disable_rd_coalesce = gm20a_gr_disable_rd_coalesce,
.set_boosted_ctx = gr_gp10b_set_boosted_ctx,
.set_preemption_mode = gr_gp10b_set_preemption_mode,
.set_czf_bypass = gr_gp10b_set_czf_bypass,
.pre_process_sm_exception = gr_gp10b_pre_process_sm_exception,
.set_preemption_buffer_va = gr_gp10b_set_preemption_buffer_va,
.init_preemption_state = gr_gp10b_init_preemption_state,
.update_boosted_ctx = gr_gp10b_update_boosted_ctx,
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
.create_gr_sysfs = gr_gp10b_create_sysfs,
.set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode,
},
.fb = { .fb = {
.reset = fb_gk20a_reset, .reset = fb_gk20a_reset,
.init_hw = gk20a_fb_init_hw, .init_hw = gk20a_fb_init_hw,
@@ -474,6 +597,7 @@ int gp10b_init_hal(struct gk20a *g)
gops->ltc = gp10b_ops.ltc; gops->ltc = gp10b_ops.ltc;
gops->ce2 = gp10b_ops.ce2; gops->ce2 = gp10b_ops.ce2;
gops->gr = gp10b_ops.gr;
gops->fb = gp10b_ops.fb; gops->fb = gp10b_ops.fb;
gops->clock_gating = gp10b_ops.clock_gating; gops->clock_gating = gp10b_ops.clock_gating;
gops->fifo = gp10b_ops.fifo; gops->fifo = gp10b_ops.fifo;
@@ -564,6 +688,8 @@ int gp10b_init_hal(struct gk20a *g)
gops->pmu.load_lsfalcon_ucode = gp10b_load_falcon_ucode; gops->pmu.load_lsfalcon_ucode = gp10b_load_falcon_ucode;
gops->pmu.is_lazy_bootstrap = gp10b_is_lazy_bootstrap; gops->pmu.is_lazy_bootstrap = gp10b_is_lazy_bootstrap;
gops->pmu.is_priv_load = gp10b_is_priv_load; gops->pmu.is_priv_load = gp10b_is_priv_load;
gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode;
} else { } else {
/* Inherit from gk20a */ /* Inherit from gk20a */
gops->pmu.is_pmu_supported = gk20a_is_pmu_supported, gops->pmu.is_pmu_supported = gk20a_is_pmu_supported,
@@ -574,12 +700,13 @@ int gp10b_init_hal(struct gk20a *g)
gops->pmu.load_lsfalcon_ucode = NULL; gops->pmu.load_lsfalcon_ucode = NULL;
gops->pmu.init_wpr_region = NULL; gops->pmu.init_wpr_region = NULL;
gops->pmu.pmu_setup_hw_and_bootstrap = gp10b_init_pmu_setup_hw1; gops->pmu.pmu_setup_hw_and_bootstrap = gp10b_init_pmu_setup_hw1;
gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
} }
__nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false); __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
g->pmu_lsf_pmu_wpr_init_done = 0; g->pmu_lsf_pmu_wpr_init_done = 0;
g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT; g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;
gp10b_init_gr(g);
gp10b_init_uncompressed_kind_map(); gp10b_init_uncompressed_kind_map();
gp10b_init_kind_attr(); gp10b_init_kind_attr();