gpu: nvgpu: create common.gr.zcull

1. Separate out zcull unit from gr
2. Move zcull HALs from gr to common.hal.gr.zcull
3. Move common zcull functions to common.gr.zcull

JIRA NVGPU-1883

Change-Id: Icfc297cf3511f957aead01044afc6fd025a04ebb
Signed-off-by: Nitin Kumbhar <nkumbhar@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2076547
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com>
Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Nitin Kumbhar
2019-03-19 16:38:49 +05:30
committed by mobile promotions
parent 863ab23445
commit 30eea4ff2b
26 changed files with 831 additions and 459 deletions

View File

@@ -77,6 +77,7 @@ nvgpu-y += \
common/gr/global_ctx.o \
common/gr/ctx.o \
common/gr/subctx.o \
common/gr/zcull.o \
common/gr/config/gr_config.o \
common/gr/config/gr_config_gm20b.o \
common/gr/fecs_trace/fecs_trace.o \
@@ -166,6 +167,8 @@ nvgpu-y += \
hal/gr/fecs_trace/fecs_trace_gm20b.o \
hal/gr/fecs_trace/fecs_trace_gp10b.o \
hal/gr/fecs_trace/fecs_trace_gv11b.o \
hal/gr/zcull/zcull_gm20b.o \
hal/gr/zcull/zcull_gv11b.o \
hal/gr/init/gr_init_gm20b.o \
hal/gr/init/gr_init_gp10b.o \
hal/gr/init/gr_init_gv100.o \

View File

@@ -113,6 +113,7 @@ srcs += common/sim.c \
common/gr/global_ctx.c \
common/gr/subctx.c \
common/gr/ctx.c \
common/gr/zcull.c \
common/gr/config/gr_config.c \
common/gr/config/gr_config_gm20b.c \
common/gr/fecs_trace/fecs_trace.c \
@@ -320,6 +321,8 @@ srcs += common/sim.c \
hal/gr/fecs_trace/fecs_trace_gm20b.c \
hal/gr/fecs_trace/fecs_trace_gp10b.c \
hal/gr/fecs_trace/fecs_trace_gv11b.c \
hal/gr/zcull/zcull_gm20b.c \
hal/gr/zcull/zcull_gv11b.c \
hal/gr/init/gr_init_gm20b.c \
hal/gr/init/gr_init_gp10b.c \
hal/gr/init/gr_init_gv100.c \

View File

@@ -44,6 +44,8 @@
#include <nvgpu/log2.h>
#include <nvgpu/ptimer.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/subctx.h>
#include <nvgpu/channel.h>
#include <nvgpu/channel_sync.h>
#include <nvgpu/runlist.h>
@@ -213,6 +215,23 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
gk20a_channel_update(ch);
}
int nvgpu_channel_gr_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
struct nvgpu_gr_ctx *gr_ctx)
{
int ret = 0;
if (c->subctx != NULL) {
ret = nvgpu_gr_ctx_zcull_setup(g, gr_ctx, false);
if (ret == 0) {
nvgpu_gr_subctx_zcull_setup(g, c->subctx, gr_ctx);
}
} else {
ret = nvgpu_gr_ctx_zcull_setup(g, gr_ctx, true);
}
return ret;
}
void gk20a_channel_set_unserviceable(struct channel_gk20a *ch)
{
nvgpu_spinlock_acquire(&ch->unserviceable_lock);

View File

@@ -0,0 +1,145 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/log.h>
#include <nvgpu/io.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/gr/subctx.h>
#include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/zcull.h>
#include <nvgpu/gr/config.h>
int nvgpu_gr_zcull_init(struct gk20a *g, struct nvgpu_gr_zcull **gr_zcull)
{
struct nvgpu_gr_config *gr_config = g->gr.config;
struct nvgpu_gr_zcull *zcull;
int err = 0;
zcull = nvgpu_kzalloc(g, sizeof(*zcull));
if (zcull == NULL) {
err = -ENOMEM;
goto exit;
}
zcull->g = g;
zcull->aliquot_width = nvgpu_gr_config_get_tpc_count(gr_config) * 16U;
zcull->aliquot_height = 16;
zcull->width_align_pixels =
nvgpu_gr_config_get_tpc_count(gr_config) * 16U;
zcull->height_align_pixels = 32;
zcull->aliquot_size =
zcull->aliquot_width * zcull->aliquot_height;
/* assume no floor sweeping since we only have 1 tpc in 1 gpc */
zcull->pixel_squares_by_aliquots =
nvgpu_gr_config_get_zcb_count(gr_config) * 16U * 16U *
nvgpu_gr_config_get_tpc_count(gr_config) /
(nvgpu_gr_config_get_gpc_count(gr_config) *
nvgpu_gr_config_get_gpc_tpc_count(gr_config, 0U));
exit:
*gr_zcull = zcull;
return err;
}
void nvgpu_gr_zcull_deinit(struct gk20a *g, struct nvgpu_gr_zcull *gr_zcull)
{
nvgpu_kfree(g, gr_zcull);
}
u32 nvgpu_gr_get_ctxsw_zcull_size(struct gk20a *g,
struct nvgpu_gr_zcull *gr_zcull)
{
/* assuming zcull has already been initialized */
return gr_zcull->zcull_ctxsw_image_size;
}
int nvgpu_gr_zcull_init_hw(struct gk20a *g,
struct nvgpu_gr_zcull *gr_zcull,
struct nvgpu_gr_config *gr_config)
{
u32 *zcull_map_tiles, *zcull_bank_counters;
u32 map_counter;
u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
GPU_LIT_NUM_TPC_PER_GPC);
u32 zcull_alloc_num = num_gpcs * num_tpc_per_gpc;
u32 map_tile_count;
int ret = 0;
if (gr_config->map_tiles == NULL) {
return -1;
}
if (zcull_alloc_num % 8U != 0U) {
/* Total 8 fields per map reg i.e. tile_0 to tile_7*/
zcull_alloc_num += (zcull_alloc_num % 8U);
}
zcull_map_tiles = nvgpu_kzalloc(g, zcull_alloc_num * sizeof(u32));
if (zcull_map_tiles == NULL) {
nvgpu_err(g,
"failed to allocate zcull map titles");
return -ENOMEM;
}
zcull_bank_counters = nvgpu_kzalloc(g, zcull_alloc_num * sizeof(u32));
if (zcull_bank_counters == NULL) {
nvgpu_err(g,
"failed to allocate zcull bank counters");
nvgpu_kfree(g, zcull_map_tiles);
return -ENOMEM;
}
for (map_counter = 0;
map_counter < nvgpu_gr_config_get_tpc_count(gr_config);
map_counter++) {
map_tile_count =
nvgpu_gr_config_get_map_tile_count(gr_config,
map_counter);
zcull_map_tiles[map_counter] =
zcull_bank_counters[map_tile_count];
zcull_bank_counters[map_tile_count]++;
}
if (g->ops.gr.zcull.program_zcull_mapping != NULL) {
g->ops.gr.zcull.program_zcull_mapping(g, zcull_alloc_num,
zcull_map_tiles);
}
nvgpu_kfree(g, zcull_map_tiles);
nvgpu_kfree(g, zcull_bank_counters);
if (g->ops.gr.zcull.init_zcull_hw != NULL) {
ret = g->ops.gr.zcull.init_zcull_hw(g, gr_zcull, gr_config);
if (ret != 0) {
nvgpu_err(g, "failed to init zcull hw. err:%d", ret);
return ret;
}
}
return 0;
}

View File

@@ -131,8 +131,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.load_ctxsw_ucode = NULL,
.set_gpc_tpc_mask = NULL,
.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
.bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull,
.get_zcull_info = vgpu_gr_get_zcull_info,
.is_tpc_addr = gr_gm20b_is_tpc_addr,
.get_tpc_num = gr_gm20b_get_tpc_num,
.detect_sm_arch = vgpu_gr_detect_sm_arch,
@@ -163,7 +161,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.resume_contexts = vgpu_gr_resume_contexts,
.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
.init_sm_id_table = vgpu_gr_init_sm_id_table,
.program_zcull_mapping = NULL,
.commit_inst = vgpu_gr_commit_inst,
.trigger_suspend = NULL,
.wait_for_pause = gr_gk20a_wait_for_pause,
@@ -294,6 +291,11 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.get_gpcs_swdx_dss_zbc_c_format_reg = NULL,
.get_gpcs_swdx_dss_zbc_z_format_reg = NULL,
},
.zcull = {
.bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull,
.get_zcull_info = vgpu_gr_get_zcull_info,
.program_zcull_mapping = NULL,
},
#ifdef CONFIG_GK20A_CTXSW_TRACE
.fecs_trace = {
.alloc_user_buffer = vgpu_alloc_user_buffer,

View File

@@ -38,6 +38,7 @@
#include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/gr/zbc.h>
#include <nvgpu/gr/zcull.h>
#include <nvgpu/gr/fecs_trace.h>
#include <nvgpu/cyclestats_snapshot.h>
#include <nvgpu/power_features/pg.h>
@@ -121,14 +122,17 @@ int vgpu_gr_init_ctx_state(struct gk20a *g)
nvgpu_log_fn(g, " ");
g->gr.ctx_vars.golden_image_size = priv->constants.golden_ctx_size;
g->gr.ctx_vars.zcull_ctxsw_image_size = priv->constants.zcull_ctx_size;
g->gr.ctx_vars.pm_ctxsw_image_size = priv->constants.hwpm_ctx_size;
if (!g->gr.ctx_vars.golden_image_size ||
!g->gr.ctx_vars.zcull_ctxsw_image_size ||
!g->gr.ctx_vars.pm_ctxsw_image_size) {
return -ENXIO;
}
g->gr.zcull->zcull_ctxsw_image_size = priv->constants.zcull_ctx_size;
if (g->gr.zcull->zcull_ctxsw_image_size == 0U) {
return -ENXIO;
}
g->gr.ctx_vars.priv_access_map_size = 512 * 1024;
#ifdef CONFIG_GK20A_CTXSW_TRACE
g->gr.ctx_vars.fecs_trace_buffer_size = nvgpu_gr_fecs_trace_buffer_size(g);
@@ -447,9 +451,19 @@ cleanup:
return err;
}
int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
struct channel_gk20a *c, u64 zcull_va,
u32 mode)
static int vgpu_gr_init_gr_zcull(struct gk20a *g, struct gr_gk20a *gr)
{
nvgpu_log_fn(g, " ");
gr->zcull = nvgpu_kzalloc(g, sizeof(*gr->zcull));
if (gr->zcull == NULL) {
return -ENOMEM;
}
return 0;
}
int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct channel_gk20a *c,
u64 zcull_va, u32 mode)
{
struct tegra_vgpu_cmd_msg msg;
struct tegra_vgpu_zcull_bind_params *p = &msg.params.zcull_bind;
@@ -467,8 +481,10 @@ int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
return (err || msg.ret) ? -ENOMEM : 0;
}
int vgpu_gr_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
struct gr_zcull_info *zcull_params)
int vgpu_gr_get_zcull_info(struct gk20a *g,
struct nvgpu_gr_config *gr_config,
struct nvgpu_gr_zcull *zcull,
struct nvgpu_gr_zcull_info *zcull_params)
{
struct tegra_vgpu_cmd_msg msg;
struct tegra_vgpu_zcull_info_params *p = &msg.params.zcull_info;
@@ -648,6 +664,8 @@ static void vgpu_remove_gr_support(struct gr_gk20a *gr)
nvgpu_gr_config_deinit(gr->g, gr->config);
nvgpu_gr_zcull_deinit(gr->g, gr->zcull);
nvgpu_kfree(gr->g, gr->sm_to_cluster);
gr->sm_to_cluster = NULL;
@@ -678,6 +696,11 @@ static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)
goto clean_up;
}
err = vgpu_gr_init_gr_zcull(g, gr);
if (err) {
goto clean_up;
}
err = g->ops.gr.init_ctx_state(g);
if (err) {
goto clean_up;

View File

@@ -28,7 +28,7 @@
struct gk20a;
struct channel_gk20a;
struct gr_gk20a;
struct gr_zcull_info;
struct nvgpu_gr_zcull_info;
struct nvgpu_gr_zbc;
struct nvgpu_gr_zbc_entry;
struct nvgpu_gr_zbc_query_params;
@@ -36,6 +36,7 @@ struct dbg_session_gk20a;
struct tsg_gk20a;
struct vm_gk20a;
struct nvgpu_gr_ctx;
struct nvgpu_gr_zcull;
void vgpu_gr_detect_sm_arch(struct gk20a *g);
int vgpu_gr_init_ctx_state(struct gk20a *g);
@@ -43,11 +44,12 @@ int vgpu_gr_alloc_global_ctx_buffers(struct gk20a *g);
void vgpu_gr_free_channel_ctx(struct channel_gk20a *c, bool is_tsg);
void vgpu_gr_free_tsg_ctx(struct tsg_gk20a *tsg);
int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags);
int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
struct channel_gk20a *c, u64 zcull_va,
u32 mode);
int vgpu_gr_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
struct gr_zcull_info *zcull_params);
int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct channel_gk20a *c,
u64 zcull_va, u32 mode);
int vgpu_gr_get_zcull_info(struct gk20a *g,
struct nvgpu_gr_config *gr_config,
struct nvgpu_gr_zcull *zcull,
struct nvgpu_gr_zcull_info *zcull_params);
u32 vgpu_gr_get_gpc_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config,
u32 gpc_index);
u32 vgpu_gr_get_max_fbps_count(struct gk20a *g);

View File

@@ -150,8 +150,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.load_ctxsw_ucode = NULL,
.set_gpc_tpc_mask = NULL,
.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
.bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull,
.get_zcull_info = vgpu_gr_get_zcull_info,
.is_tpc_addr = gr_gm20b_is_tpc_addr,
.get_tpc_num = gr_gm20b_get_tpc_num,
.detect_sm_arch = vgpu_gr_detect_sm_arch,
@@ -182,7 +180,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.resume_contexts = vgpu_gr_resume_contexts,
.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
.init_sm_id_table = vgpu_gr_init_sm_id_table,
.program_zcull_mapping = NULL,
.commit_inst = vgpu_gr_commit_inst,
.trigger_suspend = NULL,
.wait_for_pause = gr_gk20a_wait_for_pause,
@@ -339,6 +336,11 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.get_gpcs_swdx_dss_zbc_c_format_reg = NULL,
.get_gpcs_swdx_dss_zbc_z_format_reg = NULL,
},
.zcull = {
.bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull,
.get_zcull_info = vgpu_gr_get_zcull_info,
.program_zcull_mapping = NULL,
},
.hwpm_map = {
.align_regs_perf_pma =
gv100_gr_hwpm_map_align_regs_perf_pma,

View File

@@ -51,6 +51,7 @@
#include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/zbc.h>
#include <nvgpu/gr/gr.h>
#include <nvgpu/gr/zcull.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/gr/fecs_trace.h>
#include <nvgpu/gr/hwpm_map.h>
@@ -619,39 +620,6 @@ int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
return ret;
}
static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
struct nvgpu_gr_ctx *gr_ctx)
{
int ret = 0;
nvgpu_log_fn(g, " ");
ret = gk20a_disable_channel_tsg(g, c);
if (ret != 0) {
nvgpu_err(g, "failed to disable channel/TSG");
return ret;
}
ret = gk20a_fifo_preempt(g, c);
if (ret != 0) {
gk20a_enable_channel_tsg(g, c);
nvgpu_err(g, "failed to preempt channel/TSG");
return ret;
}
if (c->subctx != NULL) {
ret = nvgpu_gr_ctx_zcull_setup(g, gr_ctx, false);
if (ret == 0) {
nvgpu_gr_subctx_zcull_setup(g, c->subctx, gr_ctx);
}
} else {
ret = nvgpu_gr_ctx_zcull_setup(g, gr_ctx, true);
}
gk20a_enable_channel_tsg(g, c);
return ret;
}
u32 gk20a_gr_gpc_offset(struct gk20a *g, u32 gpc)
{
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
@@ -1606,15 +1574,7 @@ int gr_gk20a_init_ctx_state(struct gk20a *g)
"query golden image size failed");
return ret;
}
op.method.addr =
gr_fecs_method_push_adr_discover_zcull_image_size_v();
op.mailbox.ret = &g->gr.ctx_vars.zcull_ctxsw_image_size;
ret = gr_gk20a_submit_fecs_method_op(g, op, false);
if (ret != 0) {
nvgpu_err(g,
"query zcull ctx image size failed");
return ret;
}
op.method.addr =
gr_fecs_method_push_adr_discover_pm_image_size_v();
op.mailbox.ret = &g->gr.ctx_vars.pm_ctxsw_image_size;
@@ -1943,6 +1903,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
nvgpu_ecc_remove_support(g);
nvgpu_gr_zbc_deinit(g, gr->zbc);
nvgpu_gr_zcull_deinit(g, gr->zcull);
}
static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
@@ -2012,307 +1973,6 @@ clean_up:
return -ENOMEM;
}
static int gr_gk20a_init_zcull(struct gk20a *g, struct gr_gk20a *gr)
{
struct gr_zcull_gk20a *zcull = &gr->zcull;
zcull->aliquot_width = nvgpu_gr_config_get_tpc_count(gr->config) * 16U;
zcull->aliquot_height = 16;
zcull->width_align_pixels = nvgpu_gr_config_get_tpc_count(gr->config) * 16U;
zcull->height_align_pixels = 32;
zcull->aliquot_size =
zcull->aliquot_width * zcull->aliquot_height;
/* assume no floor sweeping since we only have 1 tpc in 1 gpc */
zcull->pixel_squares_by_aliquots =
nvgpu_gr_config_get_zcb_count(gr->config) * 16U * 16U *
nvgpu_gr_config_get_tpc_count(gr->config) /
(nvgpu_gr_config_get_gpc_count(gr->config) *
nvgpu_gr_config_get_gpc_tpc_count(gr->config, 0U));
zcull->total_aliquots =
gr_gpc0_zcull_total_ram_size_num_aliquots_f(
gk20a_readl(g, gr_gpc0_zcull_total_ram_size_r()));
return 0;
}
u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr)
{
/* assuming gr has already been initialized */
return gr->ctx_vars.zcull_ctxsw_image_size;
}
int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
struct channel_gk20a *c, u64 zcull_va, u32 mode)
{
struct tsg_gk20a *tsg;
struct nvgpu_gr_ctx *gr_ctx;
tsg = tsg_gk20a_from_ch(c);
if (tsg == NULL) {
return -EINVAL;
}
gr_ctx = tsg->gr_ctx;
nvgpu_gr_ctx_set_zcull_ctx(g, gr_ctx, mode, zcull_va);
/* TBD: don't disable channel in sw method processing */
return gr_gk20a_ctx_zcull_setup(g, c, gr_ctx);
}
int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
struct gr_zcull_info *zcull_params)
{
struct gr_zcull_gk20a *zcull = &gr->zcull;
zcull_params->width_align_pixels = zcull->width_align_pixels;
zcull_params->height_align_pixels = zcull->height_align_pixels;
zcull_params->pixel_squares_by_aliquots =
zcull->pixel_squares_by_aliquots;
zcull_params->aliquot_total = zcull->total_aliquots;
zcull_params->region_byte_multiplier =
nvgpu_gr_config_get_gpc_count(gr->config) *
gr_zcull_bytes_per_aliquot_per_gpu_v();
zcull_params->region_header_size =
nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS) *
gr_zcull_save_restore_header_bytes_per_gpc_v();
zcull_params->subregion_header_size =
nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS) *
gr_zcull_save_restore_subregion_header_bytes_per_gpc_v();
zcull_params->subregion_width_align_pixels =
nvgpu_gr_config_get_tpc_count(gr->config) *
gr_gpc0_zcull_zcsize_width_subregion__multiple_v();
zcull_params->subregion_height_align_pixels =
gr_gpc0_zcull_zcsize_height_subregion__multiple_v();
zcull_params->subregion_count = gr_zcull_subregion_qty_v();
return 0;
}
void gr_gk20a_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries,
u32 *zcull_map_tiles)
{
u32 val;
nvgpu_log_fn(g, " ");
if (zcull_num_entries >= 8U) {
nvgpu_log_fn(g, "map0");
val =
gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f(
zcull_map_tiles[0]) |
gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f(
zcull_map_tiles[1]) |
gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f(
zcull_map_tiles[2]) |
gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f(
zcull_map_tiles[3]) |
gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f(
zcull_map_tiles[4]) |
gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f(
zcull_map_tiles[5]) |
gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f(
zcull_map_tiles[6]) |
gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f(
zcull_map_tiles[7]);
gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map0_r(), val);
}
if (zcull_num_entries >= 16U) {
nvgpu_log_fn(g, "map1");
val =
gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f(
zcull_map_tiles[8]) |
gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f(
zcull_map_tiles[9]) |
gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f(
zcull_map_tiles[10]) |
gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f(
zcull_map_tiles[11]) |
gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f(
zcull_map_tiles[12]) |
gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f(
zcull_map_tiles[13]) |
gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f(
zcull_map_tiles[14]) |
gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f(
zcull_map_tiles[15]);
gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map1_r(), val);
}
if (zcull_num_entries >= 24U) {
nvgpu_log_fn(g, "map2");
val =
gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f(
zcull_map_tiles[16]) |
gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f(
zcull_map_tiles[17]) |
gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f(
zcull_map_tiles[18]) |
gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f(
zcull_map_tiles[19]) |
gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f(
zcull_map_tiles[20]) |
gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f(
zcull_map_tiles[21]) |
gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f(
zcull_map_tiles[22]) |
gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f(
zcull_map_tiles[23]);
gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map2_r(), val);
}
if (zcull_num_entries >= 32U) {
nvgpu_log_fn(g, "map3");
val =
gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f(
zcull_map_tiles[24]) |
gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f(
zcull_map_tiles[25]) |
gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f(
zcull_map_tiles[26]) |
gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f(
zcull_map_tiles[27]) |
gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f(
zcull_map_tiles[28]) |
gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f(
zcull_map_tiles[29]) |
gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f(
zcull_map_tiles[30]) |
gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f(
zcull_map_tiles[31]);
gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map3_r(), val);
}
}
static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr)
{
u32 gpc_index, gpc_tpc_count, gpc_zcull_count;
u32 *zcull_map_tiles, *zcull_bank_counters;
u32 map_counter;
u32 rcp_conserv;
u32 offset;
bool floorsweep = false;
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
GPU_LIT_NUM_TPC_PER_GPC);
u32 zcull_alloc_num = num_gpcs * num_tpc_per_gpc;
u32 map_tile_count;
if (gr->config->map_tiles == NULL) {
return -1;
}
if (zcull_alloc_num % 8U != 0U) {
/* Total 8 fields per map reg i.e. tile_0 to tile_7*/
zcull_alloc_num += (zcull_alloc_num % 8U);
}
zcull_map_tiles = nvgpu_kzalloc(g, zcull_alloc_num * sizeof(u32));
if (zcull_map_tiles == NULL) {
nvgpu_err(g,
"failed to allocate zcull map titles");
return -ENOMEM;
}
zcull_bank_counters = nvgpu_kzalloc(g, zcull_alloc_num * sizeof(u32));
if (zcull_bank_counters == NULL) {
nvgpu_err(g,
"failed to allocate zcull bank counters");
nvgpu_kfree(g, zcull_map_tiles);
return -ENOMEM;
}
for (map_counter = 0;
map_counter < nvgpu_gr_config_get_tpc_count(gr->config);
map_counter++) {
map_tile_count = nvgpu_gr_config_get_map_tile_count(gr->config, map_counter);
zcull_map_tiles[map_counter] =
zcull_bank_counters[map_tile_count];
zcull_bank_counters[map_tile_count]++;
}
if (g->ops.gr.program_zcull_mapping != NULL) {
g->ops.gr.program_zcull_mapping(g, zcull_alloc_num,
zcull_map_tiles);
}
nvgpu_kfree(g, zcull_map_tiles);
nvgpu_kfree(g, zcull_bank_counters);
for (gpc_index = 0;
gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
gpc_index++) {
gpc_tpc_count = nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_index);
gpc_zcull_count = nvgpu_gr_config_get_gpc_zcb_count(gr->config, gpc_index);
if (gpc_zcull_count !=
nvgpu_gr_config_get_max_zcull_per_gpc_count(gr->config) &&
gpc_zcull_count < gpc_tpc_count) {
nvgpu_err(g,
"zcull_banks (%d) less than tpcs (%d) for gpc (%d)",
gpc_zcull_count, gpc_tpc_count, gpc_index);
return -EINVAL;
}
if (gpc_zcull_count !=
nvgpu_gr_config_get_max_zcull_per_gpc_count(gr->config) &&
gpc_zcull_count != 0U) {
floorsweep = true;
}
}
/* ceil(1.0f / SM_NUM * gr_gpc0_zcull_sm_num_rcp_conservative__max_v()) */
rcp_conserv = DIV_ROUND_UP(gr_gpc0_zcull_sm_num_rcp_conservative__max_v(),
nvgpu_gr_config_get_gpc_tpc_count(gr->config, 0U));
for (gpc_index = 0;
gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
gpc_index++) {
offset = gpc_index * gpc_stride;
if (floorsweep) {
gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset,
gr_gpc0_zcull_ram_addr_row_offset_f(
nvgpu_gr_config_get_map_row_offset(gr->config)) |
gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(
nvgpu_gr_config_get_max_zcull_per_gpc_count(gr->config)));
} else {
gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset,
gr_gpc0_zcull_ram_addr_row_offset_f(
nvgpu_gr_config_get_map_row_offset(gr->config)) |
gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(
nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_index)));
}
gk20a_writel(g, gr_gpc0_zcull_fs_r() + offset,
gr_gpc0_zcull_fs_num_active_banks_f(
nvgpu_gr_config_get_gpc_zcb_count(gr->config, gpc_index)) |
gr_gpc0_zcull_fs_num_sms_f(
nvgpu_gr_config_get_tpc_count(gr->config)));
gk20a_writel(g, gr_gpc0_zcull_sm_num_rcp_r() + offset,
gr_gpc0_zcull_sm_num_rcp_conservative_f(rcp_conserv));
}
gk20a_writel(g, gr_gpcs_ppcs_wwdx_sm_num_rcp_r(),
gr_gpcs_ppcs_wwdx_sm_num_rcp_conservative_f(rcp_conserv));
return 0;
}
void gr_gk20a_enable_hww_exceptions(struct gk20a *g)
{
/* enable exceptions */
@@ -2352,7 +2012,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
gr_gpc0_ppc0_pes_vsc_strem_master_pe_true_f());
gk20a_writel(g, gr_gpc0_ppc0_pes_vsc_strem_r(), data);
gr_gk20a_zcull_init_hw(g, gr);
nvgpu_gr_zcull_init_hw(g, gr->zcull, gr->config);
if (g->ops.priv_ring.set_ppriv_timeout_settings != NULL) {
g->ops.priv_ring.set_ppriv_timeout_settings(g);
@@ -2595,7 +2255,7 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g)
goto clean_up;
}
err = gr_gk20a_init_zcull(g, gr);
err = nvgpu_gr_zcull_init(g, &gr->zcull);
if (err != 0) {
goto clean_up;
}

View File

@@ -109,30 +109,6 @@ struct gr_channel_map_tlb_entry {
u32 tsgid;
};
struct gr_zcull_gk20a {
u32 aliquot_width;
u32 aliquot_height;
u32 aliquot_size;
u32 total_aliquots;
u32 width_align_pixels;
u32 height_align_pixels;
u32 pixel_squares_by_aliquots;
};
struct gr_zcull_info {
u32 width_align_pixels;
u32 height_align_pixels;
u32 pixel_squares_by_aliquots;
u32 aliquot_total;
u32 region_byte_multiplier;
u32 region_header_size;
u32 subregion_header_size;
u32 subregion_width_align_pixels;
u32 subregion_height_align_pixels;
u32 subregion_count;
};
struct sm_info {
u32 gpc_index;
u32 tpc_index;
@@ -176,8 +152,6 @@ struct gr_gk20a {
bool golden_image_initialized;
u32 golden_image_size;
u32 zcull_ctxsw_image_size;
u32 pm_ctxsw_image_size;
u32 buffer_header_size;
@@ -213,7 +187,7 @@ struct gr_gk20a {
struct nvgpu_gr_hwpm_map *hwpm_map;
struct gr_zcull_gk20a zcull;
struct nvgpu_gr_zcull *zcull;
struct nvgpu_gr_zbc *zbc;
@@ -344,14 +318,6 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags);
int gk20a_gr_isr(struct gk20a *g);
u32 gk20a_gr_nonstall_isr(struct gk20a *g);
/* zcull */
u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr);
int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
struct channel_gk20a *c, u64 zcull_va, u32 mode);
int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
struct gr_zcull_info *zcull_params);
void gr_gk20a_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries,
u32 *zcull_map_tiles);
/* pmu */
int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size);
int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g,

View File

@@ -37,6 +37,7 @@
#include <nvgpu/fuse.h>
#include <nvgpu/regops.h>
#include <nvgpu/gr/zbc.h>
#include <nvgpu/gr/zcull.h>
#include "hal/bus/bus_gm20b.h"
#include "hal/bus/bus_gk20a.h"
@@ -53,6 +54,7 @@
#include "hal/fifo/pbdma_status_gm20b.h"
#include "hal/fifo/userd_gk20a.h"
#include "hal/gr/zbc/zbc_gm20b.h"
#include "hal/gr/zcull/zcull_gm20b.h"
#include "hal/gr/init/gr_init_gm20b.h"
#include "hal/gr/intr/gr_intr_gm20b.h"
#include "hal/falcon/falcon_gk20a.h"
@@ -252,8 +254,6 @@ static const struct gpu_ops gm20b_ops = {
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask,
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
.get_zcull_info = gr_gk20a_get_zcull_info,
.is_tpc_addr = gr_gm20b_is_tpc_addr,
.get_tpc_num = gr_gm20b_get_tpc_num,
.detect_sm_arch = gr_gm20b_detect_sm_arch,
@@ -284,7 +284,6 @@ static const struct gpu_ops gm20b_ops = {
.resume_contexts = gr_gk20a_resume_contexts,
.get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags,
.init_sm_id_table = gr_gk20a_init_sm_id_table,
.program_zcull_mapping = gr_gk20a_program_zcull_mapping,
.commit_inst = gr_gk20a_commit_inst,
.trigger_suspend = gr_gk20a_trigger_suspend,
.wait_for_pause = gr_gk20a_wait_for_pause,
@@ -408,6 +407,12 @@ static const struct gpu_ops gm20b_ops = {
.get_gpcs_swdx_dss_zbc_c_format_reg = NULL,
.get_gpcs_swdx_dss_zbc_z_format_reg = NULL,
},
.zcull = {
.init_zcull_hw = gm20b_gr_init_zcull_hw,
.bind_ctxsw_zcull = gm20b_gr_bind_ctxsw_zcull,
.get_zcull_info = gm20b_gr_get_zcull_info,
.program_zcull_mapping = gm20b_gr_program_zcull_mapping,
},
.init = {
.fifo_access = gm20b_gr_init_fifo_access,
.get_access_map = gm20b_gr_init_get_access_map,

View File

@@ -37,6 +37,7 @@
#include <nvgpu/fuse.h>
#include <nvgpu/regops.h>
#include <nvgpu/gr/zbc.h>
#include <nvgpu/gr/zcull.h>
#include <nvgpu/gr/fecs_trace.h>
#include "hal/bus/bus_gk20a.h"
@@ -64,6 +65,7 @@
#include "hal/gr/fecs_trace/fecs_trace_gm20b.h"
#include "hal/gr/fecs_trace/fecs_trace_gp10b.h"
#include "hal/gr/zbc/zbc_gp10b.h"
#include "hal/gr/zcull/zcull_gm20b.h"
#include "hal/gr/init/gr_init_gm20b.h"
#include "hal/gr/init/gr_init_gp10b.h"
#include "hal/gr/intr/gr_intr_gm20b.h"
@@ -275,8 +277,6 @@ static const struct gpu_ops gp10b_ops = {
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask,
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
.get_zcull_info = gr_gk20a_get_zcull_info,
.is_tpc_addr = gr_gm20b_is_tpc_addr,
.get_tpc_num = gr_gm20b_get_tpc_num,
.detect_sm_arch = gr_gm20b_detect_sm_arch,
@@ -307,7 +307,6 @@ static const struct gpu_ops gp10b_ops = {
.resume_contexts = gr_gk20a_resume_contexts,
.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
.init_sm_id_table = gr_gk20a_init_sm_id_table,
.program_zcull_mapping = gr_gk20a_program_zcull_mapping,
.commit_inst = gr_gk20a_commit_inst,
.trigger_suspend = gr_gk20a_trigger_suspend,
.wait_for_pause = gr_gk20a_wait_for_pause,
@@ -479,6 +478,12 @@ static const struct gpu_ops gp10b_ops = {
.get_gpcs_swdx_dss_zbc_z_format_reg =
gp10b_gr_zbc_get_gpcs_swdx_dss_zbc_z_format_reg,
},
.zcull = {
.init_zcull_hw = gm20b_gr_init_zcull_hw,
.bind_ctxsw_zcull = gm20b_gr_bind_ctxsw_zcull,
.get_zcull_info = gm20b_gr_get_zcull_info,
.program_zcull_mapping = gm20b_gr_program_zcull_mapping,
},
.init = {
.fifo_access = gm20b_gr_init_fifo_access,
.get_access_map = gp10b_gr_init_get_access_map,

View File

@@ -62,6 +62,8 @@
#include "hal/gr/init/gr_init_gv11b.h"
#include "hal/gr/intr/gr_intr_gm20b.h"
#include "hal/gr/intr/gr_intr_gv11b.h"
#include "hal/gr/zcull/zcull_gm20b.h"
#include "hal/gr/zcull/zcull_gv11b.h"
#include "hal/gr/hwpm_map/hwpm_map_gv100.h"
#include "hal/falcon/falcon_gk20a.h"
#include "hal/gsp/gsp_gv100.h"
@@ -154,6 +156,7 @@
#include <nvgpu/cyclestats_snapshot.h>
#include <nvgpu/regops.h>
#include <nvgpu/gr/zbc.h>
#include <nvgpu/gr/zcull.h>
#include <nvgpu/gr/fecs_trace.h>
#include <nvgpu/hw/gv100/hw_proj_gv100.h>
@@ -384,8 +387,6 @@ static const struct gpu_ops gv100_ops = {
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
.get_zcull_info = gr_gk20a_get_zcull_info,
.is_tpc_addr = gr_gm20b_is_tpc_addr,
.get_tpc_num = gr_gm20b_get_tpc_num,
.detect_sm_arch = gr_gv11b_detect_sm_arch,
@@ -419,7 +420,6 @@ static const struct gpu_ops gv100_ops = {
.resume_contexts = gr_gk20a_resume_contexts,
.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
.init_sm_id_table = gr_gv100_init_sm_id_table,
.program_zcull_mapping = gr_gv11b_program_zcull_mapping,
.commit_inst = gr_gv11b_commit_inst,
.trigger_suspend = gv11b_gr_sm_trigger_suspend,
.wait_for_pause = gr_gk20a_wait_for_pause,
@@ -614,6 +614,12 @@ static const struct gpu_ops gv100_ops = {
.get_gpcs_swdx_dss_zbc_z_format_reg =
gv11b_gr_zbc_get_gpcs_swdx_dss_zbc_z_format_reg,
},
.zcull = {
.init_zcull_hw = gm20b_gr_init_zcull_hw,
.bind_ctxsw_zcull = gm20b_gr_bind_ctxsw_zcull,
.get_zcull_info = gm20b_gr_get_zcull_info,
.program_zcull_mapping = gv11b_gr_program_zcull_mapping,
},
.hwpm_map = {
.align_regs_perf_pma =
gv100_gr_hwpm_map_align_regs_perf_pma,

View File

@@ -2269,36 +2269,6 @@ int gr_gv11b_init_sw_veid_bundle(struct gk20a *g)
return err;
}
void gr_gv11b_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries,
u32 *zcull_map_tiles)
{
u32 val, i, j;
nvgpu_log_fn(g, " ");
for (i = 0U, j = 0U; i < (zcull_num_entries / 8U); i++, j += 8U) {
val =
gr_gpcs_zcull_sm_in_gpc_number_map_tile_0_f(
zcull_map_tiles[j+0U]) |
gr_gpcs_zcull_sm_in_gpc_number_map_tile_1_f(
zcull_map_tiles[j+1U]) |
gr_gpcs_zcull_sm_in_gpc_number_map_tile_2_f(
zcull_map_tiles[j+2U]) |
gr_gpcs_zcull_sm_in_gpc_number_map_tile_3_f(
zcull_map_tiles[j+3U]) |
gr_gpcs_zcull_sm_in_gpc_number_map_tile_4_f(
zcull_map_tiles[j+4U]) |
gr_gpcs_zcull_sm_in_gpc_number_map_tile_5_f(
zcull_map_tiles[j+5U]) |
gr_gpcs_zcull_sm_in_gpc_number_map_tile_6_f(
zcull_map_tiles[j+6U]) |
gr_gpcs_zcull_sm_in_gpc_number_map_tile_7_f(
zcull_map_tiles[j+7U]);
gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map_r(i), val);
}
}
void gr_gv11b_detect_sm_arch(struct gk20a *g)
{
u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());

View File

@@ -71,9 +71,6 @@ struct gk20a_debug_output;
#define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE U32(0)
/*zcull*/
void gr_gv11b_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries,
u32 *zcull_map_tiles);
void gr_gv11b_create_sysfs(struct gk20a *g);
void gr_gv11b_remove_sysfs(struct gk20a *g);
u32 gr_gv11b_ctxsw_checksum_mismatch_mailbox_val(void);

View File

@@ -57,6 +57,8 @@
#include "hal/gr/fecs_trace/fecs_trace_gv11b.h"
#include "hal/gr/zbc/zbc_gp10b.h"
#include "hal/gr/zbc/zbc_gv11b.h"
#include "hal/gr/zcull/zcull_gm20b.h"
#include "hal/gr/zcull/zcull_gv11b.h"
#include "hal/gr/init/gr_init_gm20b.h"
#include "hal/gr/init/gr_init_gp10b.h"
#include "hal/gr/init/gr_init_gv11b.h"
@@ -128,6 +130,7 @@
#include <nvgpu/perfbuf.h>
#include <nvgpu/cyclestats_snapshot.h>
#include <nvgpu/gr/zbc.h>
#include <nvgpu/gr/zcull.h>
#include <nvgpu/gr/fecs_trace.h>
#include <nvgpu/hw/gv11b/hw_proj_gv11b.h>
@@ -334,8 +337,6 @@ static const struct gpu_ops gv11b_ops = {
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask,
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
.get_zcull_info = gr_gk20a_get_zcull_info,
.is_tpc_addr = gr_gm20b_is_tpc_addr,
.get_tpc_num = gr_gm20b_get_tpc_num,
.detect_sm_arch = gr_gv11b_detect_sm_arch,
@@ -370,7 +371,6 @@ static const struct gpu_ops gv11b_ops = {
.resume_contexts = gr_gk20a_resume_contexts,
.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
.init_sm_id_table = gr_gv100_init_sm_id_table,
.program_zcull_mapping = gr_gv11b_program_zcull_mapping,
.commit_inst = gr_gv11b_commit_inst,
.trigger_suspend = gv11b_gr_sm_trigger_suspend,
.wait_for_pause = gr_gk20a_wait_for_pause,
@@ -574,6 +574,12 @@ static const struct gpu_ops gv11b_ops = {
.get_gpcs_swdx_dss_zbc_z_format_reg =
gv11b_gr_zbc_get_gpcs_swdx_dss_zbc_z_format_reg,
},
.zcull = {
.init_zcull_hw = gm20b_gr_init_zcull_hw,
.bind_ctxsw_zcull = gm20b_gr_bind_ctxsw_zcull,
.get_zcull_info = gm20b_gr_get_zcull_info,
.program_zcull_mapping = gv11b_gr_program_zcull_mapping,
},
.hwpm_map = {
.align_regs_perf_pma =
gv100_gr_hwpm_map_align_regs_perf_pma,

View File

@@ -0,0 +1,328 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/io.h>
#include <nvgpu/channel.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/gr/subctx.h>
#include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/zcull.h>
#include "zcull_gm20b.h"
#include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
static int gm20b_gr_init_zcull_ctxsw_image_size(struct gk20a *g,
struct nvgpu_gr_zcull *gr_zcull)
{
int ret = 0;
struct fecs_method_op_gk20a op = {
.mailbox = { .id = 0U, .data = 0U,
.clr = ~U32(0U), .ok = 0U, .fail = 0U},
.method.data = 0U,
.cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
.cond.fail = GR_IS_UCODE_OP_SKIP,
};
if (!g->gr.ctx_vars.golden_image_initialized) {
op.method.addr =
gr_fecs_method_push_adr_discover_zcull_image_size_v();
op.mailbox.ret = &gr_zcull->zcull_ctxsw_image_size;
ret = gr_gk20a_submit_fecs_method_op(g, op, false);
if (ret != 0) {
nvgpu_err(g,
"query zcull ctx image size failed");
return ret;
}
}
return ret;
}
int gm20b_gr_init_zcull_hw(struct gk20a *g,
struct nvgpu_gr_zcull *gr_zcull,
struct nvgpu_gr_config *gr_config)
{
u32 gpc_index, gpc_tpc_count, gpc_zcull_count;
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
bool floorsweep = false;
u32 rcp_conserv;
u32 offset;
int ret;
gr_zcull->total_aliquots =
gr_gpc0_zcull_total_ram_size_num_aliquots_f(
nvgpu_readl(g, gr_gpc0_zcull_total_ram_size_r()));
ret = gm20b_gr_init_zcull_ctxsw_image_size(g, gr_zcull);
if (ret != 0) {
return ret;
}
for (gpc_index = 0;
gpc_index < nvgpu_gr_config_get_gpc_count(gr_config);
gpc_index++) {
gpc_tpc_count =
nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc_index);
gpc_zcull_count =
nvgpu_gr_config_get_gpc_zcb_count(gr_config, gpc_index);
if (gpc_zcull_count !=
nvgpu_gr_config_get_max_zcull_per_gpc_count(gr_config) &&
gpc_zcull_count < gpc_tpc_count) {
nvgpu_err(g,
"zcull_banks (%d) less than tpcs (%d) for gpc (%d)",
gpc_zcull_count, gpc_tpc_count, gpc_index);
return -EINVAL;
}
if (gpc_zcull_count !=
nvgpu_gr_config_get_max_zcull_per_gpc_count(gr_config) &&
gpc_zcull_count != 0U) {
floorsweep = true;
}
}
/* ceil(1.0f / SM_NUM * gr_gpc0_zcull_sm_num_rcp_conservative__max_v()) */
rcp_conserv = DIV_ROUND_UP(gr_gpc0_zcull_sm_num_rcp_conservative__max_v(),
nvgpu_gr_config_get_gpc_tpc_count(gr_config, 0U));
for (gpc_index = 0;
gpc_index < nvgpu_gr_config_get_gpc_count(gr_config);
gpc_index++) {
offset = gpc_index * gpc_stride;
if (floorsweep) {
nvgpu_writel(g, gr_gpc0_zcull_ram_addr_r() + offset,
gr_gpc0_zcull_ram_addr_row_offset_f(
nvgpu_gr_config_get_map_row_offset(gr_config)) |
gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(
nvgpu_gr_config_get_max_zcull_per_gpc_count(gr_config)));
} else {
nvgpu_writel(g, gr_gpc0_zcull_ram_addr_r() + offset,
gr_gpc0_zcull_ram_addr_row_offset_f(
nvgpu_gr_config_get_map_row_offset(gr_config)) |
gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(
nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc_index)));
}
nvgpu_writel(g, gr_gpc0_zcull_fs_r() + offset,
gr_gpc0_zcull_fs_num_active_banks_f(
nvgpu_gr_config_get_gpc_zcb_count(gr_config, gpc_index)) |
gr_gpc0_zcull_fs_num_sms_f(
nvgpu_gr_config_get_tpc_count(gr_config)));
nvgpu_writel(g, gr_gpc0_zcull_sm_num_rcp_r() + offset,
gr_gpc0_zcull_sm_num_rcp_conservative_f(rcp_conserv));
}
nvgpu_writel(g, gr_gpcs_ppcs_wwdx_sm_num_rcp_r(),
gr_gpcs_ppcs_wwdx_sm_num_rcp_conservative_f(rcp_conserv));
return 0;
}
int gm20b_gr_get_zcull_info(struct gk20a *g,
struct nvgpu_gr_config *gr_config,
struct nvgpu_gr_zcull *zcull,
struct nvgpu_gr_zcull_info *zcull_params)
{
zcull_params->width_align_pixels = zcull->width_align_pixels;
zcull_params->height_align_pixels = zcull->height_align_pixels;
zcull_params->pixel_squares_by_aliquots =
zcull->pixel_squares_by_aliquots;
zcull_params->aliquot_total = zcull->total_aliquots;
zcull_params->region_byte_multiplier =
nvgpu_gr_config_get_gpc_count(gr_config) *
gr_zcull_bytes_per_aliquot_per_gpu_v();
zcull_params->region_header_size =
nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS) *
gr_zcull_save_restore_header_bytes_per_gpc_v();
zcull_params->subregion_header_size =
nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS) *
gr_zcull_save_restore_subregion_header_bytes_per_gpc_v();
zcull_params->subregion_width_align_pixels =
nvgpu_gr_config_get_tpc_count(gr_config) *
gr_gpc0_zcull_zcsize_width_subregion__multiple_v();
zcull_params->subregion_height_align_pixels =
gr_gpc0_zcull_zcsize_height_subregion__multiple_v();
zcull_params->subregion_count = gr_zcull_subregion_qty_v();
return 0;
}
void gm20b_gr_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries,
u32 *zcull_map_tiles)
{
u32 val;
nvgpu_log_fn(g, " ");
if (zcull_num_entries >= 8U) {
nvgpu_log_fn(g, "map0");
val =
gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f(
zcull_map_tiles[0]) |
gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f(
zcull_map_tiles[1]) |
gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f(
zcull_map_tiles[2]) |
gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f(
zcull_map_tiles[3]) |
gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f(
zcull_map_tiles[4]) |
gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f(
zcull_map_tiles[5]) |
gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f(
zcull_map_tiles[6]) |
gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f(
zcull_map_tiles[7]);
gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map0_r(), val);
}
if (zcull_num_entries >= 16U) {
nvgpu_log_fn(g, "map1");
val =
gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f(
zcull_map_tiles[8]) |
gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f(
zcull_map_tiles[9]) |
gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f(
zcull_map_tiles[10]) |
gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f(
zcull_map_tiles[11]) |
gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f(
zcull_map_tiles[12]) |
gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f(
zcull_map_tiles[13]) |
gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f(
zcull_map_tiles[14]) |
gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f(
zcull_map_tiles[15]);
gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map1_r(), val);
}
if (zcull_num_entries >= 24U) {
nvgpu_log_fn(g, "map2");
val =
gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f(
zcull_map_tiles[16]) |
gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f(
zcull_map_tiles[17]) |
gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f(
zcull_map_tiles[18]) |
gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f(
zcull_map_tiles[19]) |
gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f(
zcull_map_tiles[20]) |
gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f(
zcull_map_tiles[21]) |
gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f(
zcull_map_tiles[22]) |
gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f(
zcull_map_tiles[23]);
gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map2_r(), val);
}
if (zcull_num_entries >= 32U) {
nvgpu_log_fn(g, "map3");
val =
gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f(
zcull_map_tiles[24]) |
gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f(
zcull_map_tiles[25]) |
gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f(
zcull_map_tiles[26]) |
gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f(
zcull_map_tiles[27]) |
gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f(
zcull_map_tiles[28]) |
gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f(
zcull_map_tiles[29]) |
gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f(
zcull_map_tiles[30]) |
gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f(
zcull_map_tiles[31]);
gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map3_r(), val);
}
}
static int gm20b_gr_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
struct nvgpu_gr_ctx *gr_ctx)
{
int ret = 0;
nvgpu_log_fn(g, " ");
ret = gk20a_disable_channel_tsg(g, c);
if (ret != 0) {
nvgpu_err(g, "failed to disable channel/TSG");
return ret;
}
ret = gk20a_fifo_preempt(g, c);
if (ret != 0) {
if (gk20a_enable_channel_tsg(g, c) != 0) {
nvgpu_err(g, "failed to re-enable channel/TSG");
}
nvgpu_err(g, "failed to preempt channel/TSG");
return ret;
}
ret = nvgpu_channel_gr_zcull_setup(g, c, gr_ctx);
if (ret != 0) {
nvgpu_err(g, "failed to set up zcull");
}
ret = gk20a_enable_channel_tsg(g, c);
if (ret != 0) {
nvgpu_err(g, "failed to enable channel/TSG");
}
return ret;
}
int gm20b_gr_bind_ctxsw_zcull(struct gk20a *g, struct channel_gk20a *c,
u64 zcull_va, u32 mode)
{
struct tsg_gk20a *tsg;
struct nvgpu_gr_ctx *gr_ctx;
tsg = tsg_gk20a_from_ch(c);
if (tsg == NULL) {
return -EINVAL;
}
gr_ctx = tsg->gr_ctx;
nvgpu_gr_ctx_set_zcull_ctx(g, gr_ctx, mode, zcull_va);
/* TBD: don't disable channel in sw method processing */
return gm20b_gr_ctx_zcull_setup(g, c, gr_ctx);
}

View File

@@ -0,0 +1,46 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_GR_ZCULL_GM20B_H
#define NVGPU_GR_ZCULL_GM20B_H
#include <nvgpu/types.h>
struct gk20a;
struct nvgpu_gr_config;
struct nvgpu_gr_zcull;
struct nvgpu_gr_zcull_info;
struct channel_gk20a;
int gm20b_gr_init_zcull_hw(struct gk20a *g,
struct nvgpu_gr_zcull *gr_zcull,
struct nvgpu_gr_config *gr_config);
int gm20b_gr_get_zcull_info(struct gk20a *g,
struct nvgpu_gr_config *gr_config,
struct nvgpu_gr_zcull *zcull,
struct nvgpu_gr_zcull_info *zcull_params);
void gm20b_gr_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries,
u32 *zcull_map_tiles);
int gm20b_gr_bind_ctxsw_zcull(struct gk20a *g, struct channel_gk20a *c,
u64 zcull_va, u32 mode);
#endif /* NVGPU_GR_ZCULL_GM20B_H */

View File

@@ -0,0 +1,62 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/io.h>
#include <nvgpu/gr/zcull.h>
#include "zcull_gv11b.h"
#include <nvgpu/hw/gv11b/hw_gr_gv11b.h>
void gv11b_gr_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries,
u32 *zcull_map_tiles)
{
u32 val, i, j = 0U;
nvgpu_log_fn(g, " ");
for (i = 0U; i < (zcull_num_entries / 8U); i++) {
val =
gr_gpcs_zcull_sm_in_gpc_number_map_tile_0_f(
zcull_map_tiles[j+0U]) |
gr_gpcs_zcull_sm_in_gpc_number_map_tile_1_f(
zcull_map_tiles[j+1U]) |
gr_gpcs_zcull_sm_in_gpc_number_map_tile_2_f(
zcull_map_tiles[j+2U]) |
gr_gpcs_zcull_sm_in_gpc_number_map_tile_3_f(
zcull_map_tiles[j+3U]) |
gr_gpcs_zcull_sm_in_gpc_number_map_tile_4_f(
zcull_map_tiles[j+4U]) |
gr_gpcs_zcull_sm_in_gpc_number_map_tile_5_f(
zcull_map_tiles[j+5U]) |
gr_gpcs_zcull_sm_in_gpc_number_map_tile_6_f(
zcull_map_tiles[j+6U]) |
gr_gpcs_zcull_sm_in_gpc_number_map_tile_7_f(
zcull_map_tiles[j+7U]);
gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map_r(i), val);
j += 8U;
}
}

View File

@@ -0,0 +1,33 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_GR_ZCULL_GV11B_H
#define NVGPU_GR_ZCULL_GV11B_H
#include <nvgpu/types.h>
struct gk20a;
void gv11b_gr_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries,
u32 *zcull_map_tiles);
#endif /* NVGPU_GR_ZCULL_GV11B_H */

View File

@@ -38,6 +38,7 @@ struct fifo_profile_gk20a;
struct nvgpu_channel_sync;
struct nvgpu_gpfifo_userdata;
struct nvgpu_gr_subctx;
struct nvgpu_gr_ctx;
/* Flags to be passed to nvgpu_channel_setup_bind() */
#define NVGPU_SETUP_BIND_FLAGS_SUPPORT_VPR BIT32(0)
@@ -474,6 +475,8 @@ int nvgpu_submit_channel_gpfifo_kernel(struct channel_gk20a *c,
u32 flags,
struct nvgpu_channel_fence *fence,
struct gk20a_fence **fence_out);
int nvgpu_channel_gr_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
struct nvgpu_gr_ctx *gr_ctx);
#ifdef CONFIG_DEBUG_FS
void trace_write_pushbuffers(struct channel_gk20a *c, u32 count);

View File

@@ -68,6 +68,7 @@ struct nvgpu_gr_subctx;
struct nvgpu_gr_zbc;
struct nvgpu_gr_zbc_entry;
struct nvgpu_gr_zbc_query_params;
struct nvgpu_gr_zcull_info;
struct nvgpu_channel_hw_state;
struct nvgpu_engine_status_info;
struct nvgpu_pbdma_status_info;
@@ -298,11 +299,6 @@ struct gpu_ops {
void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index);
int (*alloc_obj_ctx)(struct channel_gk20a *c,
u32 class_num, u32 flags);
int (*bind_ctxsw_zcull)(struct gk20a *g, struct gr_gk20a *gr,
struct channel_gk20a *c, u64 zcull_va,
u32 mode);
int (*get_zcull_info)(struct gk20a *g, struct gr_gk20a *gr,
struct gr_zcull_info *zcull_params);
int (*decode_egpc_addr)(struct gk20a *g,
u32 addr, enum ctxsw_addr_type *addr_type,
u32 *gpc_num, u32 *tpc_num, u32 *broadcast_flags);
@@ -419,8 +415,6 @@ struct gpu_ops {
int (*set_boosted_ctx)(struct channel_gk20a *ch, bool boost);
int (*init_sm_id_table)(struct gk20a *g);
int (*init_sw_veid_bundle)(struct gk20a *g);
void (*program_zcull_mapping)(struct gk20a *g,
u32 zcull_alloc_num, u32 *zcull_map_tiles);
int (*commit_inst)(struct channel_gk20a *c, u64 gpu_va);
int (*trigger_suspend)(struct gk20a *g);
int (*wait_for_pause)(struct gk20a *g, struct nvgpu_warpstate *w_state);
@@ -645,6 +639,23 @@ struct gpu_ops {
struct gk20a *g);
} zbc;
struct {
int (*init_zcull_hw)(struct gk20a *g,
struct nvgpu_gr_zcull *gr_zcull,
struct nvgpu_gr_config *gr_config);
int (*bind_ctxsw_zcull)(struct gk20a *g,
struct channel_gk20a *c,
u64 zcull_va,
u32 mode);
int (*get_zcull_info)(struct gk20a *g,
struct nvgpu_gr_config *gr_config,
struct nvgpu_gr_zcull *gr_zcull,
struct nvgpu_gr_zcull_info *zcull_params);
void (*program_zcull_mapping)(struct gk20a *g,
u32 zcull_alloc_num,
u32 *zcull_map_tiles);
} zcull;
struct {
void (*align_regs_perf_pma)(u32 *offset);
u32 (*get_active_fbpa_mask)(struct gk20a *g);

View File

@@ -0,0 +1,68 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_GR_ZCULL_H
#define NVGPU_GR_ZCULL_H
#include <nvgpu/types.h>
struct gk20a;
struct nvgpu_gr_config;
struct nvgpu_gr_zcull {
struct gk20a *g;
u32 aliquot_width;
u32 aliquot_height;
u32 aliquot_size;
u32 total_aliquots;
u32 width_align_pixels;
u32 height_align_pixels;
u32 pixel_squares_by_aliquots;
u32 zcull_ctxsw_image_size;
};
struct nvgpu_gr_zcull_info {
u32 width_align_pixels;
u32 height_align_pixels;
u32 pixel_squares_by_aliquots;
u32 aliquot_total;
u32 region_byte_multiplier;
u32 region_header_size;
u32 subregion_header_size;
u32 subregion_width_align_pixels;
u32 subregion_height_align_pixels;
u32 subregion_count;
};
int nvgpu_gr_zcull_init(struct gk20a *g, struct nvgpu_gr_zcull **gr_zcull);
void nvgpu_gr_zcull_deinit(struct gk20a *g, struct nvgpu_gr_zcull *gr_zcull);
u32 nvgpu_gr_get_ctxsw_zcull_size(struct gk20a *g,
struct nvgpu_gr_zcull *gr_zcull);
int nvgpu_gr_zcull_init_hw(struct gk20a *g,
struct nvgpu_gr_zcull *gr_zcull,
struct nvgpu_gr_config *gr_config);
#endif /* NVGPU_GR_ZCULL_H */

View File

@@ -753,7 +753,7 @@ static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
nvgpu_log_fn(gr->g, " ");
return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
return g->ops.gr.zcull.bind_ctxsw_zcull(g, ch,
args->gpu_va, args->mode);
}

View File

@@ -34,6 +34,7 @@
#include <nvgpu/gk20a.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/gr/zbc.h>
#include <nvgpu/gr/zcull.h>
#include <nvgpu/channel.h>
#include <nvgpu/pmu/pmgr.h>
#include <nvgpu/power_features/pg.h>
@@ -1632,7 +1633,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
struct nvgpu_gpu_zbc_set_table_args *set_table_args;
struct nvgpu_gpu_zbc_query_table_args *query_table_args;
u8 buf[NVGPU_GPU_IOCTL_MAX_ARG_SIZE];
struct gr_zcull_info *zcull_info;
struct nvgpu_gr_zcull_info *zcull_info;
struct nvgpu_gr_zbc_entry *zbc_val;
struct nvgpu_gr_zbc_query_params *zbc_tbl;
int err = 0;
@@ -1665,7 +1666,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
case NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE:
get_ctx_size_args = (struct nvgpu_gpu_zcull_get_ctx_size_args *)buf;
get_ctx_size_args->size = gr_gk20a_get_ctxsw_zcull_size(g, &g->gr);
get_ctx_size_args->size = nvgpu_gr_get_ctxsw_zcull_size(g, g->gr.zcull);
break;
case NVGPU_GPU_IOCTL_ZCULL_GET_INFO:
@@ -1674,11 +1675,12 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
(void) memset(get_info_args, 0,
sizeof(struct nvgpu_gpu_zcull_get_info_args));
zcull_info = nvgpu_kzalloc(g, sizeof(struct gr_zcull_info));
zcull_info = nvgpu_kzalloc(g, sizeof(*zcull_info));
if (zcull_info == NULL)
return -ENOMEM;
err = g->ops.gr.get_zcull_info(g, &g->gr, zcull_info);
err = g->ops.gr.zcull.get_zcull_info(g, g->gr.config,
g->gr.zcull, zcull_info);
if (err) {
nvgpu_kfree(g, zcull_info);
break;

View File

@@ -60,6 +60,8 @@
#include "hal/gr/fecs_trace/fecs_trace_gv11b.h"
#include "hal/gr/zbc/zbc_gp10b.h"
#include "hal/gr/zbc/zbc_gv11b.h"
#include "hal/gr/zcull/zcull_gm20b.h"
#include "hal/gr/zcull/zcull_gv11b.h"
#include "hal/gr/init/gr_init_gm20b.h"
#include "hal/gr/init/gr_init_gp10b.h"
#include "hal/gr/init/gr_init_gv11b.h"
@@ -405,8 +407,6 @@ static const struct gpu_ops tu104_ops = {
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
.get_zcull_info = gr_gk20a_get_zcull_info,
.is_tpc_addr = gr_gm20b_is_tpc_addr,
.get_tpc_num = gr_gm20b_get_tpc_num,
.detect_sm_arch = gr_gv11b_detect_sm_arch,
@@ -440,7 +440,6 @@ static const struct gpu_ops tu104_ops = {
.resume_contexts = gr_gk20a_resume_contexts,
.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
.init_sm_id_table = gr_gv100_init_sm_id_table,
.program_zcull_mapping = gr_gv11b_program_zcull_mapping,
.commit_inst = gr_gv11b_commit_inst,
.trigger_suspend = gv11b_gr_sm_trigger_suspend,
.wait_for_pause = gr_gk20a_wait_for_pause,
@@ -643,6 +642,12 @@ static const struct gpu_ops tu104_ops = {
.get_gpcs_swdx_dss_zbc_z_format_reg =
gv11b_gr_zbc_get_gpcs_swdx_dss_zbc_z_format_reg,
},
.zcull = {
.init_zcull_hw = gm20b_gr_init_zcull_hw,
.bind_ctxsw_zcull = gm20b_gr_bind_ctxsw_zcull,
.get_zcull_info = gm20b_gr_get_zcull_info,
.program_zcull_mapping = gv11b_gr_program_zcull_mapping,
},
.hwpm_map = {
.align_regs_perf_pma =
gv100_gr_hwpm_map_align_regs_perf_pma,