diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 538c8eabc..bb5d6c493 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -77,6 +77,7 @@ nvgpu-y += \ common/gr/global_ctx.o \ common/gr/ctx.o \ common/gr/subctx.o \ + common/gr/zcull.o \ common/gr/config/gr_config.o \ common/gr/config/gr_config_gm20b.o \ common/gr/fecs_trace/fecs_trace.o \ @@ -166,6 +167,8 @@ nvgpu-y += \ hal/gr/fecs_trace/fecs_trace_gm20b.o \ hal/gr/fecs_trace/fecs_trace_gp10b.o \ hal/gr/fecs_trace/fecs_trace_gv11b.o \ + hal/gr/zcull/zcull_gm20b.o \ + hal/gr/zcull/zcull_gv11b.o \ hal/gr/init/gr_init_gm20b.o \ hal/gr/init/gr_init_gp10b.o \ hal/gr/init/gr_init_gv100.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index d5441a340..29116b51b 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -113,6 +113,7 @@ srcs += common/sim.c \ common/gr/global_ctx.c \ common/gr/subctx.c \ common/gr/ctx.c \ + common/gr/zcull.c \ common/gr/config/gr_config.c \ common/gr/config/gr_config_gm20b.c \ common/gr/fecs_trace/fecs_trace.c \ @@ -320,6 +321,8 @@ srcs += common/sim.c \ hal/gr/fecs_trace/fecs_trace_gm20b.c \ hal/gr/fecs_trace/fecs_trace_gp10b.c \ hal/gr/fecs_trace/fecs_trace_gv11b.c \ + hal/gr/zcull/zcull_gm20b.c \ + hal/gr/zcull/zcull_gv11b.c \ hal/gr/init/gr_init_gm20b.c \ hal/gr/init/gr_init_gp10b.c \ hal/gr/init/gr_init_gv100.c \ diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c index b7bc16b81..569340540 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel.c +++ b/drivers/gpu/nvgpu/common/fifo/channel.c @@ -44,6 +44,8 @@ #include #include #include +#include +#include #include #include #include @@ -213,6 +215,23 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch) gk20a_channel_update(ch); } +int nvgpu_channel_gr_zcull_setup(struct gk20a *g, struct channel_gk20a *c, + struct nvgpu_gr_ctx *gr_ctx) +{ + int ret = 0; + + if (c->subctx != NULL) { + ret = nvgpu_gr_ctx_zcull_setup(g, gr_ctx, false); + if (ret == 0) { + nvgpu_gr_subctx_zcull_setup(g, c->subctx, gr_ctx); + } + } else { + ret = nvgpu_gr_ctx_zcull_setup(g, gr_ctx, true); + } + + return ret; +} + void gk20a_channel_set_unserviceable(struct channel_gk20a *ch) { nvgpu_spinlock_acquire(&ch->unserviceable_lock); diff --git a/drivers/gpu/nvgpu/common/gr/zcull.c b/drivers/gpu/nvgpu/common/gr/zcull.c new file mode 100644 index 000000000..df75272e8 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/zcull.c @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +int nvgpu_gr_zcull_init(struct gk20a *g, struct nvgpu_gr_zcull **gr_zcull) +{ + struct nvgpu_gr_config *gr_config = g->gr.config; + struct nvgpu_gr_zcull *zcull; + int err = 0; + + zcull = nvgpu_kzalloc(g, sizeof(*zcull)); + if (zcull == NULL) { + err = -ENOMEM; + goto exit; + } + + zcull->g = g; + + zcull->aliquot_width = nvgpu_gr_config_get_tpc_count(gr_config) * 16U; + zcull->aliquot_height = 16; + + zcull->width_align_pixels = + nvgpu_gr_config_get_tpc_count(gr_config) * 16U; + zcull->height_align_pixels = 32; + + zcull->aliquot_size = + zcull->aliquot_width * zcull->aliquot_height; + + /* assume no floor sweeping since we only have 1 tpc in 1 gpc */ + zcull->pixel_squares_by_aliquots = + nvgpu_gr_config_get_zcb_count(gr_config) * 16U * 16U * + nvgpu_gr_config_get_tpc_count(gr_config) / + (nvgpu_gr_config_get_gpc_count(gr_config) * + nvgpu_gr_config_get_gpc_tpc_count(gr_config, 0U)); + +exit: + *gr_zcull = zcull; + return err; +} + +void nvgpu_gr_zcull_deinit(struct gk20a *g, struct nvgpu_gr_zcull *gr_zcull) +{ + nvgpu_kfree(g, gr_zcull); +} + +u32 nvgpu_gr_get_ctxsw_zcull_size(struct gk20a *g, + struct nvgpu_gr_zcull *gr_zcull) +{ + /* assuming zcull has already been initialized */ + return gr_zcull->zcull_ctxsw_image_size; +} + +int nvgpu_gr_zcull_init_hw(struct gk20a *g, + struct nvgpu_gr_zcull *gr_zcull, + struct nvgpu_gr_config *gr_config) +{ + u32 *zcull_map_tiles, *zcull_bank_counters; + u32 map_counter; + u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); + u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, + GPU_LIT_NUM_TPC_PER_GPC); + u32 zcull_alloc_num = num_gpcs * num_tpc_per_gpc; + u32 map_tile_count; + int ret = 0; + + if (gr_config->map_tiles == NULL) { + return -1; + } + + if (zcull_alloc_num % 8U != 0U) { + /* Total 8 fields per map reg i.e. tile_0 to tile_7*/ + zcull_alloc_num += (zcull_alloc_num % 8U); + } + zcull_map_tiles = nvgpu_kzalloc(g, zcull_alloc_num * sizeof(u32)); + + if (zcull_map_tiles == NULL) { + nvgpu_err(g, + "failed to allocate zcull map titles"); + return -ENOMEM; + } + + zcull_bank_counters = nvgpu_kzalloc(g, zcull_alloc_num * sizeof(u32)); + + if (zcull_bank_counters == NULL) { + nvgpu_err(g, + "failed to allocate zcull bank counters"); + nvgpu_kfree(g, zcull_map_tiles); + return -ENOMEM; + } + + for (map_counter = 0; + map_counter < nvgpu_gr_config_get_tpc_count(gr_config); + map_counter++) { + map_tile_count = + nvgpu_gr_config_get_map_tile_count(gr_config, + map_counter); + zcull_map_tiles[map_counter] = + zcull_bank_counters[map_tile_count]; + zcull_bank_counters[map_tile_count]++; + } + + if (g->ops.gr.zcull.program_zcull_mapping != NULL) { + g->ops.gr.zcull.program_zcull_mapping(g, zcull_alloc_num, + zcull_map_tiles); + } + + nvgpu_kfree(g, zcull_map_tiles); + nvgpu_kfree(g, zcull_bank_counters); + + if (g->ops.gr.zcull.init_zcull_hw != NULL) { + ret = g->ops.gr.zcull.init_zcull_hw(g, gr_zcull, gr_config); + if (ret != 0) { + nvgpu_err(g, "failed to init zcull hw. err:%d", ret); + return ret; + } + } + + return 0; +} diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c index 524f99949..f2ef5a73c 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c @@ -131,8 +131,6 @@ static const struct gpu_ops vgpu_gp10b_ops = { .load_ctxsw_ucode = NULL, .set_gpc_tpc_mask = NULL, .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx, - .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull, - .get_zcull_info = vgpu_gr_get_zcull_info, .is_tpc_addr = gr_gm20b_is_tpc_addr, .get_tpc_num = gr_gm20b_get_tpc_num, .detect_sm_arch = vgpu_gr_detect_sm_arch, @@ -163,7 +161,6 @@ static const struct gpu_ops vgpu_gp10b_ops = { .resume_contexts = vgpu_gr_resume_contexts, .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags, .init_sm_id_table = vgpu_gr_init_sm_id_table, - .program_zcull_mapping = NULL, .commit_inst = vgpu_gr_commit_inst, .trigger_suspend = NULL, .wait_for_pause = gr_gk20a_wait_for_pause, @@ -294,6 +291,11 @@ static const struct gpu_ops vgpu_gp10b_ops = { .get_gpcs_swdx_dss_zbc_c_format_reg = NULL, .get_gpcs_swdx_dss_zbc_z_format_reg = NULL, }, + .zcull = { + .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull, + .get_zcull_info = vgpu_gr_get_zcull_info, + .program_zcull_mapping = NULL, + }, #ifdef CONFIG_GK20A_CTXSW_TRACE .fecs_trace = { .alloc_user_buffer = vgpu_alloc_user_buffer, diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c index 116c57591..e31397edb 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -121,14 +122,17 @@ int vgpu_gr_init_ctx_state(struct gk20a *g) nvgpu_log_fn(g, " "); g->gr.ctx_vars.golden_image_size = priv->constants.golden_ctx_size; - g->gr.ctx_vars.zcull_ctxsw_image_size = priv->constants.zcull_ctx_size; g->gr.ctx_vars.pm_ctxsw_image_size = priv->constants.hwpm_ctx_size; if (!g->gr.ctx_vars.golden_image_size || - !g->gr.ctx_vars.zcull_ctxsw_image_size || !g->gr.ctx_vars.pm_ctxsw_image_size) { return -ENXIO; } + g->gr.zcull->zcull_ctxsw_image_size = priv->constants.zcull_ctx_size; + if (g->gr.zcull->zcull_ctxsw_image_size == 0U) { + return -ENXIO; + } + g->gr.ctx_vars.priv_access_map_size = 512 * 1024; #ifdef CONFIG_GK20A_CTXSW_TRACE g->gr.ctx_vars.fecs_trace_buffer_size = nvgpu_gr_fecs_trace_buffer_size(g); @@ -447,9 +451,19 @@ cleanup: return err; } -int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, - struct channel_gk20a *c, u64 zcull_va, - u32 mode) +static int vgpu_gr_init_gr_zcull(struct gk20a *g, struct gr_gk20a *gr) +{ + nvgpu_log_fn(g, " "); + + gr->zcull = nvgpu_kzalloc(g, sizeof(*gr->zcull)); + if (gr->zcull == NULL) { + return -ENOMEM; + } + + return 0; +} +int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct channel_gk20a *c, + u64 zcull_va, u32 mode) { struct tegra_vgpu_cmd_msg msg; struct tegra_vgpu_zcull_bind_params *p = &msg.params.zcull_bind; @@ -467,8 +481,10 @@ int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, return (err || msg.ret) ? -ENOMEM : 0; } -int vgpu_gr_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr, - struct gr_zcull_info *zcull_params) +int vgpu_gr_get_zcull_info(struct gk20a *g, + struct nvgpu_gr_config *gr_config, + struct nvgpu_gr_zcull *zcull, + struct nvgpu_gr_zcull_info *zcull_params) { struct tegra_vgpu_cmd_msg msg; struct tegra_vgpu_zcull_info_params *p = &msg.params.zcull_info; @@ -648,6 +664,8 @@ static void vgpu_remove_gr_support(struct gr_gk20a *gr) nvgpu_gr_config_deinit(gr->g, gr->config); + nvgpu_gr_zcull_deinit(gr->g, gr->zcull); + nvgpu_kfree(gr->g, gr->sm_to_cluster); gr->sm_to_cluster = NULL; @@ -678,6 +696,11 @@ static int vgpu_gr_init_gr_setup_sw(struct gk20a *g) goto clean_up; } + err = vgpu_gr_init_gr_zcull(g, gr); + if (err) { + goto clean_up; + } + err = g->ops.gr.init_ctx_state(g); if (err) { goto clean_up; diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.h index de08d3b73..c10141077 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.h +++ b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.h @@ -28,7 +28,7 @@ struct gk20a; struct channel_gk20a; struct gr_gk20a; -struct gr_zcull_info; +struct nvgpu_gr_zcull_info; struct nvgpu_gr_zbc; struct nvgpu_gr_zbc_entry; struct nvgpu_gr_zbc_query_params; @@ -36,6 +36,7 @@ struct dbg_session_gk20a; struct tsg_gk20a; struct vm_gk20a; struct nvgpu_gr_ctx; +struct nvgpu_gr_zcull; void vgpu_gr_detect_sm_arch(struct gk20a *g); int vgpu_gr_init_ctx_state(struct gk20a *g); @@ -43,11 +44,12 @@ int vgpu_gr_alloc_global_ctx_buffers(struct gk20a *g); void vgpu_gr_free_channel_ctx(struct channel_gk20a *c, bool is_tsg); void vgpu_gr_free_tsg_ctx(struct tsg_gk20a *tsg); int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags); -int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, - struct channel_gk20a *c, u64 zcull_va, - u32 mode); -int vgpu_gr_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr, - struct gr_zcull_info *zcull_params); +int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct channel_gk20a *c, + u64 zcull_va, u32 mode); +int vgpu_gr_get_zcull_info(struct gk20a *g, + struct nvgpu_gr_config *gr_config, + struct nvgpu_gr_zcull *zcull, + struct nvgpu_gr_zcull_info *zcull_params); u32 vgpu_gr_get_gpc_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config, u32 gpc_index); u32 vgpu_gr_get_max_fbps_count(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c index f5c50752e..d6c28d1bc 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c @@ -150,8 +150,6 @@ static const struct gpu_ops vgpu_gv11b_ops = { .load_ctxsw_ucode = NULL, .set_gpc_tpc_mask = NULL, .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx, - .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull, - .get_zcull_info = vgpu_gr_get_zcull_info, .is_tpc_addr = gr_gm20b_is_tpc_addr, .get_tpc_num = gr_gm20b_get_tpc_num, .detect_sm_arch = vgpu_gr_detect_sm_arch, @@ -182,7 +180,6 @@ static const struct gpu_ops vgpu_gv11b_ops = { .resume_contexts = vgpu_gr_resume_contexts, .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags, .init_sm_id_table = vgpu_gr_init_sm_id_table, - .program_zcull_mapping = NULL, .commit_inst = vgpu_gr_commit_inst, .trigger_suspend = NULL, .wait_for_pause = gr_gk20a_wait_for_pause, @@ -339,6 +336,11 @@ static const struct gpu_ops vgpu_gv11b_ops = { .get_gpcs_swdx_dss_zbc_c_format_reg = NULL, .get_gpcs_swdx_dss_zbc_z_format_reg = NULL, }, + .zcull = { + .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull, + .get_zcull_info = vgpu_gr_get_zcull_info, + .program_zcull_mapping = NULL, + }, .hwpm_map = { .align_regs_perf_pma = gv100_gr_hwpm_map_align_regs_perf_pma, diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index c6f7b2f11..efab3dacc 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -619,39 +620,6 @@ int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, return ret; } -static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c, - struct nvgpu_gr_ctx *gr_ctx) -{ - int ret = 0; - - nvgpu_log_fn(g, " "); - - ret = gk20a_disable_channel_tsg(g, c); - if (ret != 0) { - nvgpu_err(g, "failed to disable channel/TSG"); - return ret; - } - ret = gk20a_fifo_preempt(g, c); - if (ret != 0) { - gk20a_enable_channel_tsg(g, c); - nvgpu_err(g, "failed to preempt channel/TSG"); - return ret; - } - - if (c->subctx != NULL) { - ret = nvgpu_gr_ctx_zcull_setup(g, gr_ctx, false); - if (ret == 0) { - nvgpu_gr_subctx_zcull_setup(g, c->subctx, gr_ctx); - } - } else { - ret = nvgpu_gr_ctx_zcull_setup(g, gr_ctx, true); - } - - gk20a_enable_channel_tsg(g, c); - - return ret; -} - u32 gk20a_gr_gpc_offset(struct gk20a *g, u32 gpc) { u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); @@ -1606,15 +1574,7 @@ int gr_gk20a_init_ctx_state(struct gk20a *g) "query golden image size failed"); return ret; } - op.method.addr = - gr_fecs_method_push_adr_discover_zcull_image_size_v(); - op.mailbox.ret = &g->gr.ctx_vars.zcull_ctxsw_image_size; - ret = gr_gk20a_submit_fecs_method_op(g, op, false); - if (ret != 0) { - nvgpu_err(g, - "query zcull ctx image size failed"); - return ret; - } + op.method.addr = gr_fecs_method_push_adr_discover_pm_image_size_v(); op.mailbox.ret = &g->gr.ctx_vars.pm_ctxsw_image_size; @@ -1943,6 +1903,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) nvgpu_ecc_remove_support(g); nvgpu_gr_zbc_deinit(g, gr->zbc); + nvgpu_gr_zcull_deinit(g, gr->zcull); } static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) @@ -2012,307 +1973,6 @@ clean_up: return -ENOMEM; } -static int gr_gk20a_init_zcull(struct gk20a *g, struct gr_gk20a *gr) -{ - struct gr_zcull_gk20a *zcull = &gr->zcull; - - zcull->aliquot_width = nvgpu_gr_config_get_tpc_count(gr->config) * 16U; - zcull->aliquot_height = 16; - - zcull->width_align_pixels = nvgpu_gr_config_get_tpc_count(gr->config) * 16U; - zcull->height_align_pixels = 32; - - zcull->aliquot_size = - zcull->aliquot_width * zcull->aliquot_height; - - /* assume no floor sweeping since we only have 1 tpc in 1 gpc */ - zcull->pixel_squares_by_aliquots = - nvgpu_gr_config_get_zcb_count(gr->config) * 16U * 16U * - nvgpu_gr_config_get_tpc_count(gr->config) / - (nvgpu_gr_config_get_gpc_count(gr->config) * - nvgpu_gr_config_get_gpc_tpc_count(gr->config, 0U)); - - zcull->total_aliquots = - gr_gpc0_zcull_total_ram_size_num_aliquots_f( - gk20a_readl(g, gr_gpc0_zcull_total_ram_size_r())); - - return 0; -} - -u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr) -{ - /* assuming gr has already been initialized */ - return gr->ctx_vars.zcull_ctxsw_image_size; -} - -int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, - struct channel_gk20a *c, u64 zcull_va, u32 mode) -{ - struct tsg_gk20a *tsg; - struct nvgpu_gr_ctx *gr_ctx; - - tsg = tsg_gk20a_from_ch(c); - if (tsg == NULL) { - return -EINVAL; - } - - gr_ctx = tsg->gr_ctx; - nvgpu_gr_ctx_set_zcull_ctx(g, gr_ctx, mode, zcull_va); - - /* TBD: don't disable channel in sw method processing */ - return gr_gk20a_ctx_zcull_setup(g, c, gr_ctx); -} - -int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr, - struct gr_zcull_info *zcull_params) -{ - struct gr_zcull_gk20a *zcull = &gr->zcull; - - zcull_params->width_align_pixels = zcull->width_align_pixels; - zcull_params->height_align_pixels = zcull->height_align_pixels; - zcull_params->pixel_squares_by_aliquots = - zcull->pixel_squares_by_aliquots; - zcull_params->aliquot_total = zcull->total_aliquots; - - zcull_params->region_byte_multiplier = - nvgpu_gr_config_get_gpc_count(gr->config) * - gr_zcull_bytes_per_aliquot_per_gpu_v(); - zcull_params->region_header_size = - nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS) * - gr_zcull_save_restore_header_bytes_per_gpc_v(); - - zcull_params->subregion_header_size = - nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS) * - gr_zcull_save_restore_subregion_header_bytes_per_gpc_v(); - - zcull_params->subregion_width_align_pixels = - nvgpu_gr_config_get_tpc_count(gr->config) * - gr_gpc0_zcull_zcsize_width_subregion__multiple_v(); - zcull_params->subregion_height_align_pixels = - gr_gpc0_zcull_zcsize_height_subregion__multiple_v(); - zcull_params->subregion_count = gr_zcull_subregion_qty_v(); - - return 0; -} - -void gr_gk20a_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries, - u32 *zcull_map_tiles) -{ - u32 val; - - nvgpu_log_fn(g, " "); - - if (zcull_num_entries >= 8U) { - nvgpu_log_fn(g, "map0"); - val = - gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f( - zcull_map_tiles[0]) | - gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f( - zcull_map_tiles[1]) | - gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f( - zcull_map_tiles[2]) | - gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f( - zcull_map_tiles[3]) | - gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f( - zcull_map_tiles[4]) | - gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f( - zcull_map_tiles[5]) | - gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f( - zcull_map_tiles[6]) | - gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f( - zcull_map_tiles[7]); - - gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map0_r(), val); - } - - if (zcull_num_entries >= 16U) { - nvgpu_log_fn(g, "map1"); - val = - gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f( - zcull_map_tiles[8]) | - gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f( - zcull_map_tiles[9]) | - gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f( - zcull_map_tiles[10]) | - gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f( - zcull_map_tiles[11]) | - gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f( - zcull_map_tiles[12]) | - gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f( - zcull_map_tiles[13]) | - gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f( - zcull_map_tiles[14]) | - gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f( - zcull_map_tiles[15]); - - gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map1_r(), val); - } - - if (zcull_num_entries >= 24U) { - nvgpu_log_fn(g, "map2"); - val = - gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f( - zcull_map_tiles[16]) | - gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f( - zcull_map_tiles[17]) | - gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f( - zcull_map_tiles[18]) | - gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f( - zcull_map_tiles[19]) | - gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f( - zcull_map_tiles[20]) | - gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f( - zcull_map_tiles[21]) | - gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f( - zcull_map_tiles[22]) | - gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f( - zcull_map_tiles[23]); - - gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map2_r(), val); - } - - if (zcull_num_entries >= 32U) { - nvgpu_log_fn(g, "map3"); - val = - gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f( - zcull_map_tiles[24]) | - gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f( - zcull_map_tiles[25]) | - gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f( - zcull_map_tiles[26]) | - gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f( - zcull_map_tiles[27]) | - gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f( - zcull_map_tiles[28]) | - gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f( - zcull_map_tiles[29]) | - gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f( - zcull_map_tiles[30]) | - gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f( - zcull_map_tiles[31]); - - gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map3_r(), val); - } - -} - -static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr) -{ - u32 gpc_index, gpc_tpc_count, gpc_zcull_count; - u32 *zcull_map_tiles, *zcull_bank_counters; - u32 map_counter; - u32 rcp_conserv; - u32 offset; - bool floorsweep = false; - u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); - u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); - u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, - GPU_LIT_NUM_TPC_PER_GPC); - u32 zcull_alloc_num = num_gpcs * num_tpc_per_gpc; - u32 map_tile_count; - - if (gr->config->map_tiles == NULL) { - return -1; - } - - if (zcull_alloc_num % 8U != 0U) { - /* Total 8 fields per map reg i.e. tile_0 to tile_7*/ - zcull_alloc_num += (zcull_alloc_num % 8U); - } - zcull_map_tiles = nvgpu_kzalloc(g, zcull_alloc_num * sizeof(u32)); - - if (zcull_map_tiles == NULL) { - nvgpu_err(g, - "failed to allocate zcull map titles"); - return -ENOMEM; - } - - zcull_bank_counters = nvgpu_kzalloc(g, zcull_alloc_num * sizeof(u32)); - - if (zcull_bank_counters == NULL) { - nvgpu_err(g, - "failed to allocate zcull bank counters"); - nvgpu_kfree(g, zcull_map_tiles); - return -ENOMEM; - } - - for (map_counter = 0; - map_counter < nvgpu_gr_config_get_tpc_count(gr->config); - map_counter++) { - map_tile_count = nvgpu_gr_config_get_map_tile_count(gr->config, map_counter); - zcull_map_tiles[map_counter] = - zcull_bank_counters[map_tile_count]; - zcull_bank_counters[map_tile_count]++; - } - - if (g->ops.gr.program_zcull_mapping != NULL) { - g->ops.gr.program_zcull_mapping(g, zcull_alloc_num, - zcull_map_tiles); - } - - nvgpu_kfree(g, zcull_map_tiles); - nvgpu_kfree(g, zcull_bank_counters); - - for (gpc_index = 0; - gpc_index < nvgpu_gr_config_get_gpc_count(gr->config); - gpc_index++) { - gpc_tpc_count = nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_index); - gpc_zcull_count = nvgpu_gr_config_get_gpc_zcb_count(gr->config, gpc_index); - - if (gpc_zcull_count != - nvgpu_gr_config_get_max_zcull_per_gpc_count(gr->config) && - gpc_zcull_count < gpc_tpc_count) { - nvgpu_err(g, - "zcull_banks (%d) less than tpcs (%d) for gpc (%d)", - gpc_zcull_count, gpc_tpc_count, gpc_index); - return -EINVAL; - } - if (gpc_zcull_count != - nvgpu_gr_config_get_max_zcull_per_gpc_count(gr->config) && - gpc_zcull_count != 0U) { - floorsweep = true; - } - } - - /* ceil(1.0f / SM_NUM * gr_gpc0_zcull_sm_num_rcp_conservative__max_v()) */ - rcp_conserv = DIV_ROUND_UP(gr_gpc0_zcull_sm_num_rcp_conservative__max_v(), - nvgpu_gr_config_get_gpc_tpc_count(gr->config, 0U)); - - for (gpc_index = 0; - gpc_index < nvgpu_gr_config_get_gpc_count(gr->config); - gpc_index++) { - offset = gpc_index * gpc_stride; - - if (floorsweep) { - gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset, - gr_gpc0_zcull_ram_addr_row_offset_f( - nvgpu_gr_config_get_map_row_offset(gr->config)) | - gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f( - nvgpu_gr_config_get_max_zcull_per_gpc_count(gr->config))); - } else { - gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset, - gr_gpc0_zcull_ram_addr_row_offset_f( - nvgpu_gr_config_get_map_row_offset(gr->config)) | - gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f( - nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_index))); - } - - gk20a_writel(g, gr_gpc0_zcull_fs_r() + offset, - gr_gpc0_zcull_fs_num_active_banks_f( - nvgpu_gr_config_get_gpc_zcb_count(gr->config, gpc_index)) | - gr_gpc0_zcull_fs_num_sms_f( - nvgpu_gr_config_get_tpc_count(gr->config))); - - gk20a_writel(g, gr_gpc0_zcull_sm_num_rcp_r() + offset, - gr_gpc0_zcull_sm_num_rcp_conservative_f(rcp_conserv)); - } - - gk20a_writel(g, gr_gpcs_ppcs_wwdx_sm_num_rcp_r(), - gr_gpcs_ppcs_wwdx_sm_num_rcp_conservative_f(rcp_conserv)); - - return 0; -} - void gr_gk20a_enable_hww_exceptions(struct gk20a *g) { /* enable exceptions */ @@ -2352,7 +2012,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) gr_gpc0_ppc0_pes_vsc_strem_master_pe_true_f()); gk20a_writel(g, gr_gpc0_ppc0_pes_vsc_strem_r(), data); - gr_gk20a_zcull_init_hw(g, gr); + nvgpu_gr_zcull_init_hw(g, gr->zcull, gr->config); if (g->ops.priv_ring.set_ppriv_timeout_settings != NULL) { g->ops.priv_ring.set_ppriv_timeout_settings(g); @@ -2595,7 +2255,7 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g) goto clean_up; } - err = gr_gk20a_init_zcull(g, gr); + err = nvgpu_gr_zcull_init(g, &gr->zcull); if (err != 0) { goto clean_up; } diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index aec285912..19065702d 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -109,30 +109,6 @@ struct gr_channel_map_tlb_entry { u32 tsgid; }; -struct gr_zcull_gk20a { - u32 aliquot_width; - u32 aliquot_height; - u32 aliquot_size; - u32 total_aliquots; - - u32 width_align_pixels; - u32 height_align_pixels; - u32 pixel_squares_by_aliquots; -}; - -struct gr_zcull_info { - u32 width_align_pixels; - u32 height_align_pixels; - u32 pixel_squares_by_aliquots; - u32 aliquot_total; - u32 region_byte_multiplier; - u32 region_header_size; - u32 subregion_header_size; - u32 subregion_width_align_pixels; - u32 subregion_height_align_pixels; - u32 subregion_count; -}; - struct sm_info { u32 gpc_index; u32 tpc_index; @@ -176,8 +152,6 @@ struct gr_gk20a { bool golden_image_initialized; u32 golden_image_size; - u32 zcull_ctxsw_image_size; - u32 pm_ctxsw_image_size; u32 buffer_header_size; @@ -213,7 +187,7 @@ struct gr_gk20a { struct nvgpu_gr_hwpm_map *hwpm_map; - struct gr_zcull_gk20a zcull; + struct nvgpu_gr_zcull *zcull; struct nvgpu_gr_zbc *zbc; @@ -344,14 +318,6 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags); int gk20a_gr_isr(struct gk20a *g); u32 gk20a_gr_nonstall_isr(struct gk20a *g); -/* zcull */ -u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr); -int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, - struct channel_gk20a *c, u64 zcull_va, u32 mode); -int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr, - struct gr_zcull_info *zcull_params); -void gr_gk20a_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries, - u32 *zcull_map_tiles); /* pmu */ int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size); int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 7930d715c..059486c85 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "hal/bus/bus_gm20b.h" #include "hal/bus/bus_gk20a.h" @@ -53,6 +54,7 @@ #include "hal/fifo/pbdma_status_gm20b.h" #include "hal/fifo/userd_gk20a.h" #include "hal/gr/zbc/zbc_gm20b.h" +#include "hal/gr/zcull/zcull_gm20b.h" #include "hal/gr/init/gr_init_gm20b.h" #include "hal/gr/intr/gr_intr_gm20b.h" #include "hal/falcon/falcon_gk20a.h" @@ -252,8 +254,6 @@ static const struct gpu_ops gm20b_ops = { .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, .set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask, .alloc_obj_ctx = gk20a_alloc_obj_ctx, - .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, - .get_zcull_info = gr_gk20a_get_zcull_info, .is_tpc_addr = gr_gm20b_is_tpc_addr, .get_tpc_num = gr_gm20b_get_tpc_num, .detect_sm_arch = gr_gm20b_detect_sm_arch, @@ -284,7 +284,6 @@ static const struct gpu_ops gm20b_ops = { .resume_contexts = gr_gk20a_resume_contexts, .get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags, .init_sm_id_table = gr_gk20a_init_sm_id_table, - .program_zcull_mapping = gr_gk20a_program_zcull_mapping, .commit_inst = gr_gk20a_commit_inst, .trigger_suspend = gr_gk20a_trigger_suspend, .wait_for_pause = gr_gk20a_wait_for_pause, @@ -408,6 +407,12 @@ static const struct gpu_ops gm20b_ops = { .get_gpcs_swdx_dss_zbc_c_format_reg = NULL, .get_gpcs_swdx_dss_zbc_z_format_reg = NULL, }, + .zcull = { + .init_zcull_hw = gm20b_gr_init_zcull_hw, + .bind_ctxsw_zcull = gm20b_gr_bind_ctxsw_zcull, + .get_zcull_info = gm20b_gr_get_zcull_info, + .program_zcull_mapping = gm20b_gr_program_zcull_mapping, + }, .init = { .fifo_access = gm20b_gr_init_fifo_access, .get_access_map = gm20b_gr_init_get_access_map, diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 3854882e2..a1a94d402 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include "hal/bus/bus_gk20a.h" @@ -64,6 +65,7 @@ #include "hal/gr/fecs_trace/fecs_trace_gm20b.h" #include "hal/gr/fecs_trace/fecs_trace_gp10b.h" #include "hal/gr/zbc/zbc_gp10b.h" +#include "hal/gr/zcull/zcull_gm20b.h" #include "hal/gr/init/gr_init_gm20b.h" #include "hal/gr/init/gr_init_gp10b.h" #include "hal/gr/intr/gr_intr_gm20b.h" @@ -275,8 +277,6 @@ static const struct gpu_ops gp10b_ops = { .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask, .alloc_obj_ctx = gk20a_alloc_obj_ctx, - .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, - .get_zcull_info = gr_gk20a_get_zcull_info, .is_tpc_addr = gr_gm20b_is_tpc_addr, .get_tpc_num = gr_gm20b_get_tpc_num, .detect_sm_arch = gr_gm20b_detect_sm_arch, @@ -307,7 +307,6 @@ static const struct gpu_ops gp10b_ops = { .resume_contexts = gr_gk20a_resume_contexts, .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags, .init_sm_id_table = gr_gk20a_init_sm_id_table, - .program_zcull_mapping = gr_gk20a_program_zcull_mapping, .commit_inst = gr_gk20a_commit_inst, .trigger_suspend = gr_gk20a_trigger_suspend, .wait_for_pause = gr_gk20a_wait_for_pause, @@ -479,6 +478,12 @@ static const struct gpu_ops gp10b_ops = { .get_gpcs_swdx_dss_zbc_z_format_reg = gp10b_gr_zbc_get_gpcs_swdx_dss_zbc_z_format_reg, }, + .zcull = { + .init_zcull_hw = gm20b_gr_init_zcull_hw, + .bind_ctxsw_zcull = gm20b_gr_bind_ctxsw_zcull, + .get_zcull_info = gm20b_gr_get_zcull_info, + .program_zcull_mapping = gm20b_gr_program_zcull_mapping, + }, .init = { .fifo_access = gm20b_gr_init_fifo_access, .get_access_map = gp10b_gr_init_get_access_map, diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 745c2f834..01f9789cd 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -62,6 +62,8 @@ #include "hal/gr/init/gr_init_gv11b.h" #include "hal/gr/intr/gr_intr_gm20b.h" #include "hal/gr/intr/gr_intr_gv11b.h" +#include "hal/gr/zcull/zcull_gm20b.h" +#include "hal/gr/zcull/zcull_gv11b.h" #include "hal/gr/hwpm_map/hwpm_map_gv100.h" #include "hal/falcon/falcon_gk20a.h" #include "hal/gsp/gsp_gv100.h" @@ -154,6 +156,7 @@ #include #include #include +#include #include #include @@ -384,8 +387,6 @@ static const struct gpu_ops gv100_ops = { .load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode, .set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask, .alloc_obj_ctx = gk20a_alloc_obj_ctx, - .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, - .get_zcull_info = gr_gk20a_get_zcull_info, .is_tpc_addr = gr_gm20b_is_tpc_addr, .get_tpc_num = gr_gm20b_get_tpc_num, .detect_sm_arch = gr_gv11b_detect_sm_arch, @@ -419,7 +420,6 @@ static const struct gpu_ops gv100_ops = { .resume_contexts = gr_gk20a_resume_contexts, .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags, .init_sm_id_table = gr_gv100_init_sm_id_table, - .program_zcull_mapping = gr_gv11b_program_zcull_mapping, .commit_inst = gr_gv11b_commit_inst, .trigger_suspend = gv11b_gr_sm_trigger_suspend, .wait_for_pause = gr_gk20a_wait_for_pause, @@ -614,6 +614,12 @@ static const struct gpu_ops gv100_ops = { .get_gpcs_swdx_dss_zbc_z_format_reg = gv11b_gr_zbc_get_gpcs_swdx_dss_zbc_z_format_reg, }, + .zcull = { + .init_zcull_hw = gm20b_gr_init_zcull_hw, + .bind_ctxsw_zcull = gm20b_gr_bind_ctxsw_zcull, + .get_zcull_info = gm20b_gr_get_zcull_info, + .program_zcull_mapping = gv11b_gr_program_zcull_mapping, + }, .hwpm_map = { .align_regs_perf_pma = gv100_gr_hwpm_map_align_regs_perf_pma, diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 242612607..1d6062e5d 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2269,36 +2269,6 @@ int gr_gv11b_init_sw_veid_bundle(struct gk20a *g) return err; } -void gr_gv11b_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries, - u32 *zcull_map_tiles) -{ - u32 val, i, j; - - nvgpu_log_fn(g, " "); - - for (i = 0U, j = 0U; i < (zcull_num_entries / 8U); i++, j += 8U) { - val = - gr_gpcs_zcull_sm_in_gpc_number_map_tile_0_f( - zcull_map_tiles[j+0U]) | - gr_gpcs_zcull_sm_in_gpc_number_map_tile_1_f( - zcull_map_tiles[j+1U]) | - gr_gpcs_zcull_sm_in_gpc_number_map_tile_2_f( - zcull_map_tiles[j+2U]) | - gr_gpcs_zcull_sm_in_gpc_number_map_tile_3_f( - zcull_map_tiles[j+3U]) | - gr_gpcs_zcull_sm_in_gpc_number_map_tile_4_f( - zcull_map_tiles[j+4U]) | - gr_gpcs_zcull_sm_in_gpc_number_map_tile_5_f( - zcull_map_tiles[j+5U]) | - gr_gpcs_zcull_sm_in_gpc_number_map_tile_6_f( - zcull_map_tiles[j+6U]) | - gr_gpcs_zcull_sm_in_gpc_number_map_tile_7_f( - zcull_map_tiles[j+7U]); - - gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map_r(i), val); - } -} - void gr_gv11b_detect_sm_arch(struct gk20a *g) { u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r()); diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h index bd3102236..310227fe9 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h @@ -71,9 +71,6 @@ struct gk20a_debug_output; #define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE U32(0) -/*zcull*/ -void gr_gv11b_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries, - u32 *zcull_map_tiles); void gr_gv11b_create_sysfs(struct gk20a *g); void gr_gv11b_remove_sysfs(struct gk20a *g); u32 gr_gv11b_ctxsw_checksum_mismatch_mailbox_val(void); diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index f03c94c30..994df820f 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -57,6 +57,8 @@ #include "hal/gr/fecs_trace/fecs_trace_gv11b.h" #include "hal/gr/zbc/zbc_gp10b.h" #include "hal/gr/zbc/zbc_gv11b.h" +#include "hal/gr/zcull/zcull_gm20b.h" +#include "hal/gr/zcull/zcull_gv11b.h" #include "hal/gr/init/gr_init_gm20b.h" #include "hal/gr/init/gr_init_gp10b.h" #include "hal/gr/init/gr_init_gv11b.h" @@ -128,6 +130,7 @@ #include #include #include +#include #include #include @@ -334,8 +337,6 @@ static const struct gpu_ops gv11b_ops = { .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, .set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask, .alloc_obj_ctx = gk20a_alloc_obj_ctx, - .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, - .get_zcull_info = gr_gk20a_get_zcull_info, .is_tpc_addr = gr_gm20b_is_tpc_addr, .get_tpc_num = gr_gm20b_get_tpc_num, .detect_sm_arch = gr_gv11b_detect_sm_arch, @@ -370,7 +371,6 @@ static const struct gpu_ops gv11b_ops = { .resume_contexts = gr_gk20a_resume_contexts, .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags, .init_sm_id_table = gr_gv100_init_sm_id_table, - .program_zcull_mapping = gr_gv11b_program_zcull_mapping, .commit_inst = gr_gv11b_commit_inst, .trigger_suspend = gv11b_gr_sm_trigger_suspend, .wait_for_pause = gr_gk20a_wait_for_pause, @@ -574,6 +574,12 @@ static const struct gpu_ops gv11b_ops = { .get_gpcs_swdx_dss_zbc_z_format_reg = gv11b_gr_zbc_get_gpcs_swdx_dss_zbc_z_format_reg, }, + .zcull = { + .init_zcull_hw = gm20b_gr_init_zcull_hw, + .bind_ctxsw_zcull = gm20b_gr_bind_ctxsw_zcull, + .get_zcull_info = gm20b_gr_get_zcull_info, + .program_zcull_mapping = gv11b_gr_program_zcull_mapping, + }, .hwpm_map = { .align_regs_perf_pma = gv100_gr_hwpm_map_align_regs_perf_pma, diff --git a/drivers/gpu/nvgpu/hal/gr/zcull/zcull_gm20b.c b/drivers/gpu/nvgpu/hal/gr/zcull/zcull_gm20b.c new file mode 100644 index 000000000..c45179584 --- /dev/null +++ b/drivers/gpu/nvgpu/hal/gr/zcull/zcull_gm20b.c @@ -0,0 +1,328 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "zcull_gm20b.h" + +#include + +static int gm20b_gr_init_zcull_ctxsw_image_size(struct gk20a *g, + struct nvgpu_gr_zcull *gr_zcull) +{ + int ret = 0; + struct fecs_method_op_gk20a op = { + .mailbox = { .id = 0U, .data = 0U, + .clr = ~U32(0U), .ok = 0U, .fail = 0U}, + .method.data = 0U, + .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL, + .cond.fail = GR_IS_UCODE_OP_SKIP, + }; + + if (!g->gr.ctx_vars.golden_image_initialized) { + op.method.addr = + gr_fecs_method_push_adr_discover_zcull_image_size_v(); + + op.mailbox.ret = &gr_zcull->zcull_ctxsw_image_size; + ret = gr_gk20a_submit_fecs_method_op(g, op, false); + if (ret != 0) { + nvgpu_err(g, + "query zcull ctx image size failed"); + return ret; + } + } + + return ret; +} + +int gm20b_gr_init_zcull_hw(struct gk20a *g, + struct nvgpu_gr_zcull *gr_zcull, + struct nvgpu_gr_config *gr_config) +{ + u32 gpc_index, gpc_tpc_count, gpc_zcull_count; + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + bool floorsweep = false; + u32 rcp_conserv; + u32 offset; + int ret; + + gr_zcull->total_aliquots = + gr_gpc0_zcull_total_ram_size_num_aliquots_f( + nvgpu_readl(g, gr_gpc0_zcull_total_ram_size_r())); + + ret = gm20b_gr_init_zcull_ctxsw_image_size(g, gr_zcull); + if (ret != 0) { + return ret; + } + + for (gpc_index = 0; + gpc_index < nvgpu_gr_config_get_gpc_count(gr_config); + gpc_index++) { + gpc_tpc_count = + nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc_index); + gpc_zcull_count = + nvgpu_gr_config_get_gpc_zcb_count(gr_config, gpc_index); + + if (gpc_zcull_count != + nvgpu_gr_config_get_max_zcull_per_gpc_count(gr_config) && + gpc_zcull_count < gpc_tpc_count) { + nvgpu_err(g, + "zcull_banks (%d) less than tpcs (%d) for gpc (%d)", + gpc_zcull_count, gpc_tpc_count, gpc_index); + return -EINVAL; + } + if (gpc_zcull_count != + nvgpu_gr_config_get_max_zcull_per_gpc_count(gr_config) && + gpc_zcull_count != 0U) { + floorsweep = true; + } + } + + /* ceil(1.0f / SM_NUM * gr_gpc0_zcull_sm_num_rcp_conservative__max_v()) */ + rcp_conserv = DIV_ROUND_UP(gr_gpc0_zcull_sm_num_rcp_conservative__max_v(), + nvgpu_gr_config_get_gpc_tpc_count(gr_config, 0U)); + + for (gpc_index = 0; + gpc_index < nvgpu_gr_config_get_gpc_count(gr_config); + gpc_index++) { + offset = gpc_index * gpc_stride; + + if (floorsweep) { + nvgpu_writel(g, gr_gpc0_zcull_ram_addr_r() + offset, + gr_gpc0_zcull_ram_addr_row_offset_f( + nvgpu_gr_config_get_map_row_offset(gr_config)) | + gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f( + nvgpu_gr_config_get_max_zcull_per_gpc_count(gr_config))); + } else { + nvgpu_writel(g, gr_gpc0_zcull_ram_addr_r() + offset, + gr_gpc0_zcull_ram_addr_row_offset_f( + nvgpu_gr_config_get_map_row_offset(gr_config)) | + gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f( + nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc_index))); + } + + nvgpu_writel(g, gr_gpc0_zcull_fs_r() + offset, + gr_gpc0_zcull_fs_num_active_banks_f( + nvgpu_gr_config_get_gpc_zcb_count(gr_config, gpc_index)) | + gr_gpc0_zcull_fs_num_sms_f( + nvgpu_gr_config_get_tpc_count(gr_config))); + + nvgpu_writel(g, gr_gpc0_zcull_sm_num_rcp_r() + offset, + gr_gpc0_zcull_sm_num_rcp_conservative_f(rcp_conserv)); + } + + nvgpu_writel(g, gr_gpcs_ppcs_wwdx_sm_num_rcp_r(), + gr_gpcs_ppcs_wwdx_sm_num_rcp_conservative_f(rcp_conserv)); + + return 0; +} + +int gm20b_gr_get_zcull_info(struct gk20a *g, + struct nvgpu_gr_config *gr_config, + struct nvgpu_gr_zcull *zcull, + struct nvgpu_gr_zcull_info *zcull_params) +{ + zcull_params->width_align_pixels = zcull->width_align_pixels; + zcull_params->height_align_pixels = zcull->height_align_pixels; + zcull_params->pixel_squares_by_aliquots = + zcull->pixel_squares_by_aliquots; + zcull_params->aliquot_total = zcull->total_aliquots; + + zcull_params->region_byte_multiplier = + nvgpu_gr_config_get_gpc_count(gr_config) * + gr_zcull_bytes_per_aliquot_per_gpu_v(); + zcull_params->region_header_size = + nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS) * + gr_zcull_save_restore_header_bytes_per_gpc_v(); + + zcull_params->subregion_header_size = + nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS) * + gr_zcull_save_restore_subregion_header_bytes_per_gpc_v(); + + zcull_params->subregion_width_align_pixels = + nvgpu_gr_config_get_tpc_count(gr_config) * + gr_gpc0_zcull_zcsize_width_subregion__multiple_v(); + zcull_params->subregion_height_align_pixels = + gr_gpc0_zcull_zcsize_height_subregion__multiple_v(); + zcull_params->subregion_count = gr_zcull_subregion_qty_v(); + + return 0; +} + +void gm20b_gr_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries, + u32 *zcull_map_tiles) +{ + u32 val; + + nvgpu_log_fn(g, " "); + + if (zcull_num_entries >= 8U) { + nvgpu_log_fn(g, "map0"); + val = + gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f( + zcull_map_tiles[0]) | + gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f( + zcull_map_tiles[1]) | + gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f( + zcull_map_tiles[2]) | + gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f( + zcull_map_tiles[3]) | + gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f( + zcull_map_tiles[4]) | + gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f( + zcull_map_tiles[5]) | + gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f( + zcull_map_tiles[6]) | + gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f( + zcull_map_tiles[7]); + + gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map0_r(), val); + } + + if (zcull_num_entries >= 16U) { + nvgpu_log_fn(g, "map1"); + val = + gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f( + zcull_map_tiles[8]) | + gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f( + zcull_map_tiles[9]) | + gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f( + zcull_map_tiles[10]) | + gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f( + zcull_map_tiles[11]) | + gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f( + zcull_map_tiles[12]) | + gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f( + zcull_map_tiles[13]) | + gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f( + zcull_map_tiles[14]) | + gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f( + zcull_map_tiles[15]); + + gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map1_r(), val); + } + + if (zcull_num_entries >= 24U) { + nvgpu_log_fn(g, "map2"); + val = + gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f( + zcull_map_tiles[16]) | + gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f( + zcull_map_tiles[17]) | + gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f( + zcull_map_tiles[18]) | + gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f( + zcull_map_tiles[19]) | + gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f( + zcull_map_tiles[20]) | + gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f( + zcull_map_tiles[21]) | + gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f( + zcull_map_tiles[22]) | + gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f( + zcull_map_tiles[23]); + + gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map2_r(), val); + } + + if (zcull_num_entries >= 32U) { + nvgpu_log_fn(g, "map3"); + val = + gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f( + zcull_map_tiles[24]) | + gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f( + zcull_map_tiles[25]) | + gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f( + zcull_map_tiles[26]) | + gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f( + zcull_map_tiles[27]) | + gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f( + zcull_map_tiles[28]) | + gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f( + zcull_map_tiles[29]) | + gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f( + zcull_map_tiles[30]) | + gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f( + zcull_map_tiles[31]); + + gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map3_r(), val); + } + +} + +static int gm20b_gr_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c, + struct nvgpu_gr_ctx *gr_ctx) +{ + int ret = 0; + + nvgpu_log_fn(g, " "); + + ret = gk20a_disable_channel_tsg(g, c); + if (ret != 0) { + nvgpu_err(g, "failed to disable channel/TSG"); + return ret; + } + ret = gk20a_fifo_preempt(g, c); + if (ret != 0) { + if (gk20a_enable_channel_tsg(g, c) != 0) { + nvgpu_err(g, "failed to re-enable channel/TSG"); + } + nvgpu_err(g, "failed to preempt channel/TSG"); + return ret; + } + + ret = nvgpu_channel_gr_zcull_setup(g, c, gr_ctx); + if (ret != 0) { + nvgpu_err(g, "failed to set up zcull"); + } + + ret = gk20a_enable_channel_tsg(g, c); + if (ret != 0) { + nvgpu_err(g, "failed to enable channel/TSG"); + } + + return ret; +} + +int gm20b_gr_bind_ctxsw_zcull(struct gk20a *g, struct channel_gk20a *c, + u64 zcull_va, u32 mode) +{ + struct tsg_gk20a *tsg; + struct nvgpu_gr_ctx *gr_ctx; + + tsg = tsg_gk20a_from_ch(c); + if (tsg == NULL) { + return -EINVAL; + } + + gr_ctx = tsg->gr_ctx; + nvgpu_gr_ctx_set_zcull_ctx(g, gr_ctx, mode, zcull_va); + + /* TBD: don't disable channel in sw method processing */ + return gm20b_gr_ctx_zcull_setup(g, c, gr_ctx); +} diff --git a/drivers/gpu/nvgpu/hal/gr/zcull/zcull_gm20b.h b/drivers/gpu/nvgpu/hal/gr/zcull/zcull_gm20b.h new file mode 100644 index 000000000..bf70927a2 --- /dev/null +++ b/drivers/gpu/nvgpu/hal/gr/zcull/zcull_gm20b.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_GR_ZCULL_GM20B_H +#define NVGPU_GR_ZCULL_GM20B_H + +#include + +struct gk20a; +struct nvgpu_gr_config; +struct nvgpu_gr_zcull; +struct nvgpu_gr_zcull_info; +struct channel_gk20a; + +int gm20b_gr_init_zcull_hw(struct gk20a *g, + struct nvgpu_gr_zcull *gr_zcull, + struct nvgpu_gr_config *gr_config); +int gm20b_gr_get_zcull_info(struct gk20a *g, + struct nvgpu_gr_config *gr_config, + struct nvgpu_gr_zcull *zcull, + struct nvgpu_gr_zcull_info *zcull_params); +void gm20b_gr_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries, + u32 *zcull_map_tiles); +int gm20b_gr_bind_ctxsw_zcull(struct gk20a *g, struct channel_gk20a *c, + u64 zcull_va, u32 mode); + +#endif /* NVGPU_GR_ZCULL_GM20B_H */ diff --git a/drivers/gpu/nvgpu/hal/gr/zcull/zcull_gv11b.c b/drivers/gpu/nvgpu/hal/gr/zcull/zcull_gv11b.c new file mode 100644 index 000000000..c4fcd824c --- /dev/null +++ b/drivers/gpu/nvgpu/hal/gr/zcull/zcull_gv11b.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "zcull_gv11b.h" + +#include + +void gv11b_gr_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries, + u32 *zcull_map_tiles) +{ + u32 val, i, j = 0U; + + nvgpu_log_fn(g, " "); + + for (i = 0U; i < (zcull_num_entries / 8U); i++) { + val = + gr_gpcs_zcull_sm_in_gpc_number_map_tile_0_f( + zcull_map_tiles[j+0U]) | + gr_gpcs_zcull_sm_in_gpc_number_map_tile_1_f( + zcull_map_tiles[j+1U]) | + gr_gpcs_zcull_sm_in_gpc_number_map_tile_2_f( + zcull_map_tiles[j+2U]) | + gr_gpcs_zcull_sm_in_gpc_number_map_tile_3_f( + zcull_map_tiles[j+3U]) | + gr_gpcs_zcull_sm_in_gpc_number_map_tile_4_f( + zcull_map_tiles[j+4U]) | + gr_gpcs_zcull_sm_in_gpc_number_map_tile_5_f( + zcull_map_tiles[j+5U]) | + gr_gpcs_zcull_sm_in_gpc_number_map_tile_6_f( + zcull_map_tiles[j+6U]) | + gr_gpcs_zcull_sm_in_gpc_number_map_tile_7_f( + zcull_map_tiles[j+7U]); + + gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map_r(i), val); + + j += 8U; + } +} + diff --git a/drivers/gpu/nvgpu/hal/gr/zcull/zcull_gv11b.h b/drivers/gpu/nvgpu/hal/gr/zcull/zcull_gv11b.h new file mode 100644 index 000000000..971bccec3 --- /dev/null +++ b/drivers/gpu/nvgpu/hal/gr/zcull/zcull_gv11b.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_GR_ZCULL_GV11B_H +#define NVGPU_GR_ZCULL_GV11B_H + +#include + +struct gk20a; + +void gv11b_gr_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries, + u32 *zcull_map_tiles); + +#endif /* NVGPU_GR_ZCULL_GV11B_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h index 0ba588da9..4be05a3d2 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h @@ -38,6 +38,7 @@ struct fifo_profile_gk20a; struct nvgpu_channel_sync; struct nvgpu_gpfifo_userdata; struct nvgpu_gr_subctx; +struct nvgpu_gr_ctx; /* Flags to be passed to nvgpu_channel_setup_bind() */ #define NVGPU_SETUP_BIND_FLAGS_SUPPORT_VPR BIT32(0) @@ -474,6 +475,8 @@ int nvgpu_submit_channel_gpfifo_kernel(struct channel_gk20a *c, u32 flags, struct nvgpu_channel_fence *fence, struct gk20a_fence **fence_out); +int nvgpu_channel_gr_zcull_setup(struct gk20a *g, struct channel_gk20a *c, + struct nvgpu_gr_ctx *gr_ctx); #ifdef CONFIG_DEBUG_FS void trace_write_pushbuffers(struct channel_gk20a *c, u32 count); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 808dea7ef..a01d3deb8 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -68,6 +68,7 @@ struct nvgpu_gr_subctx; struct nvgpu_gr_zbc; struct nvgpu_gr_zbc_entry; struct nvgpu_gr_zbc_query_params; +struct nvgpu_gr_zcull_info; struct nvgpu_channel_hw_state; struct nvgpu_engine_status_info; struct nvgpu_pbdma_status_info; @@ -298,11 +299,6 @@ struct gpu_ops { void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); int (*alloc_obj_ctx)(struct channel_gk20a *c, u32 class_num, u32 flags); - int (*bind_ctxsw_zcull)(struct gk20a *g, struct gr_gk20a *gr, - struct channel_gk20a *c, u64 zcull_va, - u32 mode); - int (*get_zcull_info)(struct gk20a *g, struct gr_gk20a *gr, - struct gr_zcull_info *zcull_params); int (*decode_egpc_addr)(struct gk20a *g, u32 addr, enum ctxsw_addr_type *addr_type, u32 *gpc_num, u32 *tpc_num, u32 *broadcast_flags); @@ -419,8 +415,6 @@ struct gpu_ops { int (*set_boosted_ctx)(struct channel_gk20a *ch, bool boost); int (*init_sm_id_table)(struct gk20a *g); int (*init_sw_veid_bundle)(struct gk20a *g); - void (*program_zcull_mapping)(struct gk20a *g, - u32 zcull_alloc_num, u32 *zcull_map_tiles); int (*commit_inst)(struct channel_gk20a *c, u64 gpu_va); int (*trigger_suspend)(struct gk20a *g); int (*wait_for_pause)(struct gk20a *g, struct nvgpu_warpstate *w_state); @@ -645,6 +639,23 @@ struct gpu_ops { struct gk20a *g); } zbc; + struct { + int (*init_zcull_hw)(struct gk20a *g, + struct nvgpu_gr_zcull *gr_zcull, + struct nvgpu_gr_config *gr_config); + int (*bind_ctxsw_zcull)(struct gk20a *g, + struct channel_gk20a *c, + u64 zcull_va, + u32 mode); + int (*get_zcull_info)(struct gk20a *g, + struct nvgpu_gr_config *gr_config, + struct nvgpu_gr_zcull *gr_zcull, + struct nvgpu_gr_zcull_info *zcull_params); + void (*program_zcull_mapping)(struct gk20a *g, + u32 zcull_alloc_num, + u32 *zcull_map_tiles); + } zcull; + struct { void (*align_regs_perf_pma)(u32 *offset); u32 (*get_active_fbpa_mask)(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/zcull.h b/drivers/gpu/nvgpu/include/nvgpu/gr/zcull.h new file mode 100644 index 000000000..0b2773c94 --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/zcull.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_GR_ZCULL_H +#define NVGPU_GR_ZCULL_H + +#include + +struct gk20a; +struct nvgpu_gr_config; + +struct nvgpu_gr_zcull { + struct gk20a *g; + + u32 aliquot_width; + u32 aliquot_height; + u32 aliquot_size; + u32 total_aliquots; + + u32 width_align_pixels; + u32 height_align_pixels; + u32 pixel_squares_by_aliquots; + + u32 zcull_ctxsw_image_size; +}; + +struct nvgpu_gr_zcull_info { + u32 width_align_pixels; + u32 height_align_pixels; + u32 pixel_squares_by_aliquots; + u32 aliquot_total; + u32 region_byte_multiplier; + u32 region_header_size; + u32 subregion_header_size; + u32 subregion_width_align_pixels; + u32 subregion_height_align_pixels; + u32 subregion_count; +}; + +int nvgpu_gr_zcull_init(struct gk20a *g, struct nvgpu_gr_zcull **gr_zcull); +void nvgpu_gr_zcull_deinit(struct gk20a *g, struct nvgpu_gr_zcull *gr_zcull); + +u32 nvgpu_gr_get_ctxsw_zcull_size(struct gk20a *g, + struct nvgpu_gr_zcull *gr_zcull); +int nvgpu_gr_zcull_init_hw(struct gk20a *g, + struct nvgpu_gr_zcull *gr_zcull, + struct nvgpu_gr_config *gr_config); + +#endif /* NVGPU_GR_ZCULL_H */ diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c index 687d5d015..b658d95bb 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c @@ -753,7 +753,7 @@ static int gk20a_channel_zcull_bind(struct channel_gk20a *ch, nvgpu_log_fn(gr->g, " "); - return g->ops.gr.bind_ctxsw_zcull(g, gr, ch, + return g->ops.gr.zcull.bind_ctxsw_zcull(g, ch, args->gpu_va, args->mode); } diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c index afd157813..bc375f84e 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -1632,7 +1633,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg struct nvgpu_gpu_zbc_set_table_args *set_table_args; struct nvgpu_gpu_zbc_query_table_args *query_table_args; u8 buf[NVGPU_GPU_IOCTL_MAX_ARG_SIZE]; - struct gr_zcull_info *zcull_info; + struct nvgpu_gr_zcull_info *zcull_info; struct nvgpu_gr_zbc_entry *zbc_val; struct nvgpu_gr_zbc_query_params *zbc_tbl; int err = 0; @@ -1665,7 +1666,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg case NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE: get_ctx_size_args = (struct nvgpu_gpu_zcull_get_ctx_size_args *)buf; - get_ctx_size_args->size = gr_gk20a_get_ctxsw_zcull_size(g, &g->gr); + get_ctx_size_args->size = nvgpu_gr_get_ctxsw_zcull_size(g, g->gr.zcull); break; case NVGPU_GPU_IOCTL_ZCULL_GET_INFO: @@ -1674,11 +1675,12 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg (void) memset(get_info_args, 0, sizeof(struct nvgpu_gpu_zcull_get_info_args)); - zcull_info = nvgpu_kzalloc(g, sizeof(struct gr_zcull_info)); + zcull_info = nvgpu_kzalloc(g, sizeof(*zcull_info)); if (zcull_info == NULL) return -ENOMEM; - err = g->ops.gr.get_zcull_info(g, &g->gr, zcull_info); + err = g->ops.gr.zcull.get_zcull_info(g, g->gr.config, + g->gr.zcull, zcull_info); if (err) { nvgpu_kfree(g, zcull_info); break; diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index 4d1d3dc64..bd6a6d557 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -60,6 +60,8 @@ #include "hal/gr/fecs_trace/fecs_trace_gv11b.h" #include "hal/gr/zbc/zbc_gp10b.h" #include "hal/gr/zbc/zbc_gv11b.h" +#include "hal/gr/zcull/zcull_gm20b.h" +#include "hal/gr/zcull/zcull_gv11b.h" #include "hal/gr/init/gr_init_gm20b.h" #include "hal/gr/init/gr_init_gp10b.h" #include "hal/gr/init/gr_init_gv11b.h" @@ -405,8 +407,6 @@ static const struct gpu_ops tu104_ops = { .load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode, .set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask, .alloc_obj_ctx = gk20a_alloc_obj_ctx, - .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, - .get_zcull_info = gr_gk20a_get_zcull_info, .is_tpc_addr = gr_gm20b_is_tpc_addr, .get_tpc_num = gr_gm20b_get_tpc_num, .detect_sm_arch = gr_gv11b_detect_sm_arch, @@ -440,7 +440,6 @@ static const struct gpu_ops tu104_ops = { .resume_contexts = gr_gk20a_resume_contexts, .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags, .init_sm_id_table = gr_gv100_init_sm_id_table, - .program_zcull_mapping = gr_gv11b_program_zcull_mapping, .commit_inst = gr_gv11b_commit_inst, .trigger_suspend = gv11b_gr_sm_trigger_suspend, .wait_for_pause = gr_gk20a_wait_for_pause, @@ -643,6 +642,12 @@ static const struct gpu_ops tu104_ops = { .get_gpcs_swdx_dss_zbc_z_format_reg = gv11b_gr_zbc_get_gpcs_swdx_dss_zbc_z_format_reg, }, + .zcull = { + .init_zcull_hw = gm20b_gr_init_zcull_hw, + .bind_ctxsw_zcull = gm20b_gr_bind_ctxsw_zcull, + .get_zcull_info = gm20b_gr_get_zcull_info, + .program_zcull_mapping = gv11b_gr_program_zcull_mapping, + }, .hwpm_map = { .align_regs_perf_pma = gv100_gr_hwpm_map_align_regs_perf_pma,