gpu: nvgpu: move global circular buffer commit hal to hal.gr.init

Move g->ops.gr.commit_global_bundle_cb() hal to hal.gr.init unit as
g->ops.gr.init.commit_global_bundle_cb()

Remove register header accessor from gr_gk20a_commit_global_ctx_buffers()
and move it to hal functions

Move hal definitions to gm20b/gp10b hal files appropriately

Jira NVGPU-2961

Change-Id: I6358dce963857402aa1d4d5606bf75398b9be83d
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2077216
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Deepak Nibade
2019-03-16 15:23:14 +05:30
committed by mobile promotions
parent e7047d0151
commit e64e02aaef
17 changed files with 113 additions and 99 deletions

View File

@@ -115,7 +115,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.get_patch_slots = gr_gk20a_get_patch_slots,
.init_gpc_mmu = NULL,
.commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb,
.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
.commit_global_pagepool = gr_gp10b_commit_global_pagepool,
.handle_sw_method = NULL,
@@ -344,6 +343,8 @@ static const struct gpu_ops vgpu_gp10b_ops = {
gm20b_gr_init_get_global_ctx_cb_buffer_size,
.get_global_ctx_pagepool_buffer_size =
gm20b_gr_init_get_global_ctx_pagepool_buffer_size,
.commit_global_bundle_cb =
gp10b_gr_init_commit_global_bundle_cb,
},
},
.perf = {

View File

@@ -41,6 +41,7 @@
#include "hal/fb/fb_gp10b.h"
#include "hal/fb/fb_gv11b.h"
#include "hal/gr/init/gr_init_gm20b.h"
#include "hal/gr/init/gr_init_gp10b.h"
#include "hal/gr/init/gr_init_gv11b.h"
#include "common/netlist/netlist_gv11b.h"
@@ -133,7 +134,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.gr = {
.init_gpc_mmu = NULL,
.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb,
.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
.commit_global_pagepool = gr_gp10b_commit_global_pagepool,
.handle_sw_method = NULL,
@@ -392,6 +392,8 @@ static const struct gpu_ops vgpu_gv11b_ops = {
gm20b_gr_init_get_global_ctx_cb_buffer_size,
.get_global_ctx_pagepool_buffer_size =
gm20b_gr_init_get_global_ctx_pagepool_buffer_size,
.commit_global_bundle_cb =
gp10b_gr_init_commit_global_bundle_cb,
},
},
.perf = {

View File

@@ -705,16 +705,10 @@ int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
g->ops.gr.commit_global_pagepool(g, gr_ctx, addr, size, patch);
/* global bundle cb */
addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
NVGPU_GR_CTX_CIRCULAR_VA) >>
U64(gr_scc_bundle_cb_base_addr_39_8_align_bits_v());
addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx, NVGPU_GR_CTX_CIRCULAR_VA);
size = g->ops.gr.init.get_bundle_cb_default_size(g);
nvgpu_log_info(g, "bundle cb addr : 0x%016llx, size : %d",
addr, size);
g->ops.gr.commit_global_bundle_cb(g, gr_ctx, addr, size, patch);
g->ops.gr.init.commit_global_bundle_cb(g, gr_ctx, addr, size, patch);
/* global attrib cb */
addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,

View File

@@ -95,43 +95,6 @@ void gr_gm20b_commit_global_attrib_cb(struct gk20a *g,
gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch);
}
void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
struct nvgpu_gr_ctx *ch_ctx,
u64 addr, u64 size, bool patch)
{
u32 data;
u32 bundle_cb_token_limit = g->ops.gr.init.get_bundle_cb_token_limit(g);
nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(),
gr_scc_bundle_cb_base_addr_39_8_f(addr), patch);
nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(),
gr_scc_bundle_cb_size_div_256b_f(size) |
gr_scc_bundle_cb_size_valid_true_f(), patch);
nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(),
gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch);
nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(),
gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) |
gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch);
/* data for state_limit */
data = (g->ops.gr.init.get_bundle_cb_default_size(g) *
gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
data = min_t(u32, data, g->ops.gr.init.get_min_gpm_fifo_depth(g));
nvgpu_log_info(g, "bundle cb token limit : %d, state limit : %d",
bundle_cb_token_limit, data);
nvgpu_gr_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(),
gr_pd_ab_dist_cfg2_token_limit_f(bundle_cb_token_limit) |
gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
}
int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, bool patch)
{

View File

@@ -50,9 +50,6 @@ int gm20b_gr_tpc_disable_override(struct gk20a *g, u32 mask);
void gr_gm20b_set_rd_coalesce(struct gk20a *g, u32 data);
void gm20a_gr_disable_rd_coalesce(struct gk20a *g);
void gr_gm20b_init_gpc_mmu(struct gk20a *g);
void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
struct nvgpu_gr_ctx *ch_ctx,
u64 addr, u64 size, bool patch);
int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, bool patch);
void gr_gm20b_commit_global_pagepool(struct gk20a *g,

View File

@@ -232,7 +232,6 @@ static const struct gpu_ops gm20b_ops = {
.get_patch_slots = gr_gk20a_get_patch_slots,
.init_gpc_mmu = gr_gm20b_init_gpc_mmu,
.commit_global_attrib_cb = gr_gm20b_commit_global_attrib_cb,
.commit_global_bundle_cb = gr_gm20b_commit_global_bundle_cb,
.commit_global_cb_manager = gr_gm20b_commit_global_cb_manager,
.commit_global_pagepool = gr_gm20b_commit_global_pagepool,
.handle_sw_method = gr_gm20b_handle_sw_method,
@@ -453,6 +452,8 @@ static const struct gpu_ops gm20b_ops = {
gm20b_gr_init_get_global_ctx_cb_buffer_size,
.get_global_ctx_pagepool_buffer_size =
gm20b_gr_init_get_global_ctx_pagepool_buffer_size,
.commit_global_bundle_cb =
gm20b_gr_init_commit_global_bundle_cb,
},
},
.fb = {

View File

@@ -1203,44 +1203,6 @@ void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch);
}
void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
u64 addr, u64 size, bool patch)
{
u32 data;
u32 bundle_cb_token_limit = g->ops.gr.init.get_bundle_cb_token_limit(g);
nvgpu_assert(u64_hi32(addr) == 0U);
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_base_r(),
gr_scc_bundle_cb_base_addr_39_8_f((u32)addr), patch);
nvgpu_assert(size <= U32_MAX);
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_size_r(),
gr_scc_bundle_cb_size_div_256b_f((u32)size) |
gr_scc_bundle_cb_size_valid_true_f(), patch);
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_bundle_cb_base_r(),
gr_gpcs_swdx_bundle_cb_base_addr_39_8_f((u32)addr), patch);
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_bundle_cb_size_r(),
gr_gpcs_swdx_bundle_cb_size_div_256b_f((u32)size) |
gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch);
/* data for state_limit */
data = (g->ops.gr.init.get_bundle_cb_default_size(g) *
gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
data = min_t(u32, data, g->ops.gr.init.get_min_gpm_fifo_depth(g));
nvgpu_log_info(g, "bundle cb token limit : %d, state limit : %d",
bundle_cb_token_limit, data);
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg2_r(),
gr_pd_ab_dist_cfg2_token_limit_f(bundle_cb_token_limit) |
gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
}
void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
{
nvgpu_tegra_fuse_write_bypass(g, 0x1);

View File

@@ -98,9 +98,6 @@ int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
u64 addr, bool patch);
void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
u64 addr, u64 size, bool patch);
void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
u32 gpc, u32 tpc, u32 sm, u32 global_esr, u32 warp_esr,

View File

@@ -255,7 +255,6 @@ static const struct gpu_ops gp10b_ops = {
.get_patch_slots = gr_gk20a_get_patch_slots,
.init_gpc_mmu = gr_gm20b_init_gpc_mmu,
.commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb,
.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
.commit_global_pagepool = gr_gp10b_commit_global_pagepool,
.handle_sw_method = gr_gp10b_handle_sw_method,
@@ -529,6 +528,8 @@ static const struct gpu_ops gp10b_ops = {
gm20b_gr_init_get_global_ctx_cb_buffer_size,
.get_global_ctx_pagepool_buffer_size =
gm20b_gr_init_get_global_ctx_pagepool_buffer_size,
.commit_global_bundle_cb =
gp10b_gr_init_commit_global_bundle_cb,
},
},
.fb = {

View File

@@ -363,7 +363,6 @@ static const struct gpu_ops gv100_ops = {
.get_patch_slots = gr_gv100_get_patch_slots,
.init_gpc_mmu = gr_gv11b_init_gpc_mmu,
.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb,
.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
.commit_global_pagepool = gr_gp10b_commit_global_pagepool,
.handle_sw_method = gr_gv11b_handle_sw_method,
@@ -668,6 +667,8 @@ static const struct gpu_ops gv100_ops = {
gm20b_gr_init_get_global_ctx_cb_buffer_size,
.get_global_ctx_pagepool_buffer_size =
gm20b_gr_init_get_global_ctx_pagepool_buffer_size,
.commit_global_bundle_cb =
gp10b_gr_init_commit_global_bundle_cb,
},
},
.fb = {

View File

@@ -313,7 +313,6 @@ static const struct gpu_ops gv11b_ops = {
.get_patch_slots = gr_gv100_get_patch_slots,
.init_gpc_mmu = gr_gv11b_init_gpc_mmu,
.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb,
.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
.commit_global_pagepool = gr_gp10b_commit_global_pagepool,
.handle_sw_method = gr_gv11b_handle_sw_method,
@@ -627,6 +626,8 @@ static const struct gpu_ops gv11b_ops = {
gm20b_gr_init_get_global_ctx_cb_buffer_size,
.get_global_ctx_pagepool_buffer_size =
gm20b_gr_init_get_global_ctx_pagepool_buffer_size,
.commit_global_bundle_cb =
gp10b_gr_init_commit_global_bundle_cb,
},
},
.fb = {

View File

@@ -22,6 +22,7 @@
#include <nvgpu/gk20a.h>
#include <nvgpu/io.h>
#include <nvgpu/log.h>
#include <nvgpu/timers.h>
#include <nvgpu/enabled.h>
#include <nvgpu/engine_status.h>
@@ -752,3 +753,43 @@ u32 gm20b_gr_init_get_global_ctx_pagepool_buffer_size(struct gk20a *g)
gr_scc_pagepool_total_pages_byte_granularity_v();
}
void gm20b_gr_init_commit_global_bundle_cb(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, u64 addr, u64 size, bool patch)
{
u32 data;
u32 bundle_cb_token_limit = g->ops.gr.init.get_bundle_cb_token_limit(g);
addr = addr >> U64(gr_scc_bundle_cb_base_addr_39_8_align_bits_v());
nvgpu_log_info(g, "bundle cb addr : 0x%016llx, size : %llu",
addr, size);
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_base_r(),
gr_scc_bundle_cb_base_addr_39_8_f(addr), patch);
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_size_r(),
gr_scc_bundle_cb_size_div_256b_f(size) |
gr_scc_bundle_cb_size_valid_true_f(), patch);
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_bundle_cb_base_r(),
gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch);
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_bundle_cb_size_r(),
gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) |
gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch);
/* data for state_limit */
data = (g->ops.gr.init.get_bundle_cb_default_size(g) *
gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
data = min_t(u32, data, g->ops.gr.init.get_min_gpm_fifo_depth(g));
nvgpu_log_info(g, "bundle cb token limit : %d, state limit : %d",
bundle_cb_token_limit, data);
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg2_r(),
gr_pd_ab_dist_cfg2_token_limit_f(bundle_cb_token_limit) |
gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
}

View File

@@ -26,6 +26,7 @@
#include <nvgpu/types.h>
struct gk20a;
struct nvgpu_gr_ctx;
struct netlist_av_list;
struct nvgpu_gr_config;
@@ -68,4 +69,7 @@ u32 gm20b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count,
u32 gm20b_gr_init_get_global_ctx_cb_buffer_size(struct gk20a *g);
u32 gm20b_gr_init_get_global_ctx_pagepool_buffer_size(struct gk20a *g);
void gm20b_gr_init_commit_global_bundle_cb(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, u64 addr, u64 size, bool patch);
#endif /* NVGPU_GR_INIT_GM20B_H */

View File

@@ -22,6 +22,9 @@
#include <nvgpu/gk20a.h>
#include <nvgpu/io.h>
#include <nvgpu/log.h>
#include <nvgpu/bug.h>
#include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/gr/gr.h>
@@ -287,3 +290,45 @@ u32 gp10b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count,
return size;
}
void gp10b_gr_init_commit_global_bundle_cb(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, u64 addr, u64 size, bool patch)
{
u32 data;
u32 bundle_cb_token_limit = g->ops.gr.init.get_bundle_cb_token_limit(g);
addr = addr >> U64(gr_scc_bundle_cb_base_addr_39_8_align_bits_v());
nvgpu_log_info(g, "bundle cb addr : 0x%016llx, size : %llu",
addr, size);
nvgpu_assert(u64_hi32(addr) == 0U);
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_base_r(),
gr_scc_bundle_cb_base_addr_39_8_f((u32)addr), patch);
nvgpu_assert(size <= U32_MAX);
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_size_r(),
gr_scc_bundle_cb_size_div_256b_f((u32)size) |
gr_scc_bundle_cb_size_valid_true_f(), patch);
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_bundle_cb_base_r(),
gr_gpcs_swdx_bundle_cb_base_addr_39_8_f((u32)addr), patch);
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_bundle_cb_size_r(),
gr_gpcs_swdx_bundle_cb_size_div_256b_f((u32)size) |
gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch);
/* data for state_limit */
data = (g->ops.gr.init.get_bundle_cb_default_size(g) *
gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
data = min_t(u32, data, g->ops.gr.init.get_min_gpm_fifo_depth(g));
nvgpu_log_info(g, "bundle cb token limit : %d, state limit : %d",
bundle_cb_token_limit, data);
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg2_r(),
gr_pd_ab_dist_cfg2_token_limit_f(bundle_cb_token_limit) |
gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
}

View File

@@ -46,4 +46,7 @@ u32 gp10b_gr_init_get_alpha_cb_size(struct gk20a *g, u32 tpc_count);
u32 gp10b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count,
u32 max_tpc);
void gp10b_gr_init_commit_global_bundle_cb(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, u64 addr, u64 size, bool patch);
#endif /* NVGPU_GR_INIT_GP10B_H */

View File

@@ -260,9 +260,6 @@ struct gpu_ops {
void (*commit_global_attrib_cb)(struct gk20a *g,
struct nvgpu_gr_ctx *ch_ctx,
u64 addr, bool patch);
void (*commit_global_bundle_cb)(struct gk20a *g,
struct nvgpu_gr_ctx *ch_ctx,
u64 addr, u64 size, bool patch);
int (*commit_global_cb_manager)(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
bool patch);
@@ -711,6 +708,9 @@ struct gpu_ops {
u32 (*get_global_ctx_cb_buffer_size)(struct gk20a *g);
u32 (*get_global_ctx_pagepool_buffer_size)(
struct gk20a *g);
void (*commit_global_bundle_cb)(struct gk20a *g,
struct nvgpu_gr_ctx *ch_ctx, u64 addr, u64 size,
bool patch);
} init;
u32 (*get_ctxsw_checksum_mismatch_mailbox_val)(void);

View File

@@ -383,7 +383,6 @@ static const struct gpu_ops tu104_ops = {
.get_patch_slots = gr_gv100_get_patch_slots,
.init_gpc_mmu = gr_gv11b_init_gpc_mmu,
.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb,
.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb,
.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager,
.commit_global_pagepool = gr_gp10b_commit_global_pagepool,
.handle_sw_method = gr_tu104_handle_sw_method,
@@ -700,6 +699,8 @@ static const struct gpu_ops tu104_ops = {
gm20b_gr_init_get_global_ctx_cb_buffer_size,
.get_global_ctx_pagepool_buffer_size =
gm20b_gr_init_get_global_ctx_pagepool_buffer_size,
.commit_global_bundle_cb =
gp10b_gr_init_commit_global_bundle_cb,
},
},
.fb = {