mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: support context regoptype for egpc/etpc
- add is_egpc_addr, is_etpc_addr and get_egpc_etpc_num gr ops - add gr ops for decode and create egpc/etpc priv addr - add etpc as part of ctxsw_regs JIRA GPUT19X-49 Bug 200311674 Bug 1960226 Signed-off-by: Seema Khowala <seemaj@nvidia.com> Change-Id: I9a8be1804a9354238de2441093b3b136321b7e53 Reviewed-on: https://git-master.nvidia.com/r/1522442 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
92d476bf27
commit
f36e2a234b
@@ -216,8 +216,19 @@ struct gpu_ops {
|
||||
u32 mode);
|
||||
int (*get_zcull_info)(struct gk20a *g, struct gr_gk20a *gr,
|
||||
struct gr_zcull_info *zcull_params);
|
||||
int (*decode_egpc_addr)(struct gk20a *g,
|
||||
u32 addr, int *addr_type,
|
||||
u32 *gpc_num, u32 *tpc_num, u32 *broadcast_flags);
|
||||
void (*egpc_etpc_priv_addr_table)(struct gk20a *g, u32 addr,
|
||||
u32 gpc, u32 broadcast_flags, u32 *priv_addr_table,
|
||||
u32 *priv_addr_table_index);
|
||||
bool (*is_tpc_addr)(struct gk20a *g, u32 addr);
|
||||
bool (*is_egpc_addr)(struct gk20a *g, u32 addr);
|
||||
bool (*is_etpc_addr)(struct gk20a *g, u32 addr);
|
||||
void (*get_egpc_etpc_num)(struct gk20a *g, u32 addr,
|
||||
u32 *gpc_num, u32 *tpc_num);
|
||||
u32 (*get_tpc_num)(struct gk20a *g, u32 addr);
|
||||
u32 (*get_egpc_base)(struct gk20a *g);
|
||||
bool (*is_ltcs_ltss_addr)(struct gk20a *g, u32 addr);
|
||||
bool (*is_ltcn_ltss_addr)(struct gk20a *g, u32 addr);
|
||||
bool (*get_lts_in_ltc_shared_base)(void);
|
||||
|
||||
@@ -76,6 +76,8 @@ int gr_gk20a_init_ctx_vars_sim(struct gk20a *g, struct gr_gk20a *gr)
|
||||
&g->gr.ctx_vars.ctxsw_regs.ppc.count);
|
||||
gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ETPC_COUNT", 0,
|
||||
&g->gr.ctx_vars.ctxsw_regs.etpc.count);
|
||||
gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC_COUNT", 0,
|
||||
&g->gr.ctx_vars.ctxsw_regs.ppc.count);
|
||||
|
||||
err |= !alloc_u32_list_gk20a(g, &g->gr.ctx_vars.ucode.fecs.inst);
|
||||
err |= !alloc_u32_list_gk20a(g, &g->gr.ctx_vars.ucode.fecs.data);
|
||||
|
||||
@@ -6236,6 +6236,10 @@ static int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
} else if (g->ops.gr.is_egpc_addr && g->ops.gr.is_egpc_addr(g, addr)) {
|
||||
return g->ops.gr.decode_egpc_addr(g,
|
||||
addr, addr_type, gpc_num,
|
||||
tpc_num, broadcast_flags);
|
||||
} else {
|
||||
*addr_type = CTXSW_ADDR_TYPE_SYS;
|
||||
return 0;
|
||||
@@ -6331,9 +6335,13 @@ static int gr_gk20a_create_priv_addr_table(struct gk20a *g,
|
||||
pri_gpc_addr(g, pri_gpccs_addr_mask(addr),
|
||||
gpc_num);
|
||||
}
|
||||
}
|
||||
|
||||
if (broadcast_flags & PRI_BROADCAST_FLAGS_LTSS) {
|
||||
} else if (((addr_type == CTXSW_ADDR_TYPE_EGPC) ||
|
||||
(addr_type == CTXSW_ADDR_TYPE_ETPC)) &&
|
||||
g->ops.gr.egpc_etpc_priv_addr_table) {
|
||||
gk20a_dbg(gpu_dbg_gpu_dbg, "addr_type : EGPC/ETPC");
|
||||
g->ops.gr.egpc_etpc_priv_addr_table(g, addr, gpc_num,
|
||||
broadcast_flags, priv_addr_table, &t);
|
||||
} else if (broadcast_flags & PRI_BROADCAST_FLAGS_LTSS) {
|
||||
g->ops.gr.split_lts_broadcast_addr(g, addr,
|
||||
priv_addr_table, &t);
|
||||
} else if (broadcast_flags & PRI_BROADCAST_FLAGS_LTCS) {
|
||||
@@ -6354,8 +6362,8 @@ static int gr_gk20a_create_priv_addr_table(struct gk20a *g,
|
||||
pri_tpc_addr(g, pri_tpccs_addr_mask(addr),
|
||||
gpc_num, tpc_num);
|
||||
else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC)
|
||||
err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num,
|
||||
priv_addr_table, &t);
|
||||
err = gr_gk20a_split_ppc_broadcast_addr(g,
|
||||
addr, gpc_num, priv_addr_table, &t);
|
||||
else
|
||||
priv_addr_table[t++] = addr;
|
||||
}
|
||||
@@ -6382,8 +6390,10 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
|
||||
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
|
||||
|
||||
/* implementation is crossed-up if either of these happen */
|
||||
if (max_offsets > potential_offsets)
|
||||
if (max_offsets > potential_offsets) {
|
||||
gk20a_dbg_fn("max_offsets > potential_offsets");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!g->gr.ctx_vars.golden_image_initialized)
|
||||
return -ENODEV;
|
||||
@@ -6401,6 +6411,8 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
|
||||
gr_gk20a_create_priv_addr_table(g, addr, &priv_registers[0], &num_registers);
|
||||
|
||||
if ((max_offsets > 1) && (num_registers > max_offsets)) {
|
||||
gk20a_dbg_fn("max_offsets = %d, num_registers = %d",
|
||||
max_offsets, num_registers);
|
||||
err = -EINVAL;
|
||||
goto cleanup;
|
||||
}
|
||||
@@ -6691,8 +6703,11 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
|
||||
|
||||
gk20a_dbg_info(" gpc = %d tpc = %d",
|
||||
gpc_num, tpc_num);
|
||||
} else
|
||||
} else {
|
||||
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
|
||||
"extended region has tpc reg only");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
buffer_segments_size = ctxsw_prog_extended_buffer_segments_size_in_bytes_v();
|
||||
/* note below is in words/num_registers */
|
||||
@@ -6937,8 +6952,42 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g,
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if ((addr_type == CTXSW_ADDR_TYPE_EGPC) ||
|
||||
(addr_type == CTXSW_ADDR_TYPE_ETPC)) {
|
||||
if (!(g->ops.gr.get_egpc_base))
|
||||
return -EINVAL;
|
||||
|
||||
for (tpc_num = 0; tpc_num < num_tpcs; tpc_num++) {
|
||||
for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.etpc.count; i++) {
|
||||
reg = &g->gr.ctx_vars.ctxsw_regs.etpc.l[i];
|
||||
address = reg->addr;
|
||||
tpc_addr = pri_tpccs_addr_mask(address);
|
||||
base_address = g->ops.gr.get_egpc_base(g) +
|
||||
(gpc_num * gpc_stride) +
|
||||
tpc_in_gpc_base +
|
||||
(tpc_num * tpc_in_gpc_stride);
|
||||
address = base_address + tpc_addr;
|
||||
/*
|
||||
* The data for the TPCs is interleaved in the context buffer.
|
||||
* Example with num_tpcs = 2
|
||||
* 0 1 2 3 4 5 6 7 8 9 10 11 ...
|
||||
* 0-0 1-0 0-1 1-1 0-2 1-2 0-3 1-3 0-4 1-4 0-5 1-5 ...
|
||||
*/
|
||||
tpc_offset = (reg->index * num_tpcs) + (tpc_num * 4);
|
||||
|
||||
if (pri_addr == address) {
|
||||
*priv_offset = tpc_offset;
|
||||
nvgpu_log(g,
|
||||
gpu_dbg_fn | gpu_dbg_gpu_dbg,
|
||||
"egpc/etpc priv_offset=0x%#08x",
|
||||
*priv_offset);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Process the PPC segment. */
|
||||
if (addr_type == CTXSW_ADDR_TYPE_PPC) {
|
||||
for (ppc_num = 0; ppc_num < num_ppcs; ppc_num++) {
|
||||
@@ -6986,7 +7035,6 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -7046,6 +7094,9 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
|
||||
err = gr_gk20a_decode_priv_addr(g, addr, &addr_type,
|
||||
&gpc_num, &tpc_num, &ppc_num, &be_num,
|
||||
&broadcast_flags);
|
||||
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
|
||||
"addr_type = %d, broadcast_flags: %08x",
|
||||
addr_type, broadcast_flags);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@@ -7066,6 +7117,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
|
||||
}
|
||||
data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o());
|
||||
sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
|
||||
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "sys_priv_offset=0x%x", sys_priv_offset);
|
||||
|
||||
/* If found in Ext buffer, ok.
|
||||
* If it failed and we expected to find it there (quad offset)
|
||||
@@ -7074,8 +7126,12 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
|
||||
err = gr_gk20a_find_priv_offset_in_ext_buffer(g,
|
||||
addr, is_quad, quad, context_buffer,
|
||||
context_buffer_size, priv_offset);
|
||||
if (!err || (err && is_quad))
|
||||
if (!err || (err && is_quad)) {
|
||||
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
|
||||
"err = %d, is_quad = %s",
|
||||
err, is_quad ? "true" : false);
|
||||
return err;
|
||||
}
|
||||
|
||||
if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
|
||||
(addr_type == CTXSW_ADDR_TYPE_BE)) {
|
||||
@@ -7116,8 +7172,11 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
|
||||
err = gr_gk20a_determine_ppc_configuration(g, context,
|
||||
&num_ppcs, &ppc_mask,
|
||||
®_list_ppc_count);
|
||||
if (err)
|
||||
if (err) {
|
||||
nvgpu_err(g, "determine ppc configuration failed");
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
num_tpcs = *(u32 *)(context + ctxsw_prog_local_image_num_tpcs_o());
|
||||
|
||||
@@ -7130,16 +7189,28 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
|
||||
|
||||
/* Find the offset in the GPCCS segment.*/
|
||||
if (i == gpc_num) {
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
|
||||
"gpc_priv_offset 0x%#08x",
|
||||
gpc_priv_offset);
|
||||
offset_to_segment = gpc_priv_offset *
|
||||
ctxsw_prog_ucode_header_size_in_bytes();
|
||||
|
||||
if (addr_type == CTXSW_ADDR_TYPE_TPC) {
|
||||
/*reg = gr->ctx_vars.ctxsw_regs.tpc.l;*/
|
||||
} else if ((addr_type == CTXSW_ADDR_TYPE_EGPC) ||
|
||||
(addr_type == CTXSW_ADDR_TYPE_ETPC)) {
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gpu_dbg,
|
||||
"egpc etpc offset_to_segment 0x%#08x",
|
||||
offset_to_segment);
|
||||
offset_to_segment +=
|
||||
((gr->ctx_vars.ctxsw_regs.tpc.count *
|
||||
num_tpcs) << 2);
|
||||
} else if (addr_type == CTXSW_ADDR_TYPE_PPC) {
|
||||
/* The ucode stores TPC data before PPC data.
|
||||
* Advance offset past TPC data to PPC data. */
|
||||
offset_to_segment +=
|
||||
((gr->ctx_vars.ctxsw_regs.tpc.count *
|
||||
(((gr->ctx_vars.ctxsw_regs.tpc.count +
|
||||
gr->ctx_vars.ctxsw_regs.etpc.count) *
|
||||
num_tpcs) << 2);
|
||||
} else if (addr_type == CTXSW_ADDR_TYPE_GPC) {
|
||||
/* The ucode stores TPC/PPC data before GPC data.
|
||||
@@ -7149,13 +7220,15 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
|
||||
GPU_LIT_NUM_PES_PER_GPC);
|
||||
if (num_pes_per_gpc > 1) {
|
||||
offset_to_segment +=
|
||||
(((gr->ctx_vars.ctxsw_regs.tpc.count *
|
||||
num_tpcs) << 2) +
|
||||
((reg_list_ppc_count * num_ppcs) << 2));
|
||||
((((gr->ctx_vars.ctxsw_regs.tpc.count +
|
||||
gr->ctx_vars.ctxsw_regs.etpc.count) *
|
||||
num_tpcs) << 2) +
|
||||
((reg_list_ppc_count * num_ppcs) << 2));
|
||||
} else {
|
||||
offset_to_segment +=
|
||||
((gr->ctx_vars.ctxsw_regs.tpc.count *
|
||||
num_tpcs) << 2);
|
||||
(((gr->ctx_vars.ctxsw_regs.tpc.count +
|
||||
gr->ctx_vars.ctxsw_regs.etpc.count) *
|
||||
num_tpcs) << 2);
|
||||
}
|
||||
} else {
|
||||
gk20a_dbg_fn("Unknown address type.");
|
||||
|
||||
@@ -237,6 +237,8 @@ enum ctxsw_addr_type {
|
||||
CTXSW_ADDR_TYPE_PPC = 4,
|
||||
CTXSW_ADDR_TYPE_LTCS = 5,
|
||||
CTXSW_ADDR_TYPE_FBPA = 6,
|
||||
CTXSW_ADDR_TYPE_EGPC = 7,
|
||||
CTXSW_ADDR_TYPE_ETPC = 8,
|
||||
};
|
||||
|
||||
#define PRI_BROADCAST_FLAGS_NONE 0
|
||||
@@ -247,5 +249,7 @@ enum ctxsw_addr_type {
|
||||
#define PRI_BROADCAST_FLAGS_LTCS BIT(4)
|
||||
#define PRI_BROADCAST_FLAGS_LTSS BIT(5)
|
||||
#define PRI_BROADCAST_FLAGS_FBPA BIT(6)
|
||||
#define PRI_BROADCAST_FLAGS_EGPC BIT(7)
|
||||
#define PRI_BROADCAST_FLAGS_ETPC BIT(8)
|
||||
|
||||
#endif /* GR_PRI_GK20A_H */
|
||||
|
||||
Reference in New Issue
Block a user