gpu: nvgpu: support context regoptype for egpc/etpc

- add is_egpc_addr, is_etpc_addr and get_egpc_etpc_num gr ops
- add gr ops for decode and create egpc/etpc priv addr
- add etpc as part of ctxsw_regs

JIRA GPUT19X-49
Bug  200311674
Bug  1960226

Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Change-Id: I9a8be1804a9354238de2441093b3b136321b7e53
Reviewed-on: https://git-master.nvidia.com/r/1522442
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Seema Khowala
2017-06-06 15:47:17 -07:00
committed by mobile promotions
parent 92d476bf27
commit f36e2a234b
4 changed files with 106 additions and 16 deletions

View File

@@ -216,8 +216,19 @@ struct gpu_ops {
u32 mode);
int (*get_zcull_info)(struct gk20a *g, struct gr_gk20a *gr,
struct gr_zcull_info *zcull_params);
int (*decode_egpc_addr)(struct gk20a *g,
u32 addr, int *addr_type,
u32 *gpc_num, u32 *tpc_num, u32 *broadcast_flags);
void (*egpc_etpc_priv_addr_table)(struct gk20a *g, u32 addr,
u32 gpc, u32 broadcast_flags, u32 *priv_addr_table,
u32 *priv_addr_table_index);
bool (*is_tpc_addr)(struct gk20a *g, u32 addr);
bool (*is_egpc_addr)(struct gk20a *g, u32 addr);
bool (*is_etpc_addr)(struct gk20a *g, u32 addr);
void (*get_egpc_etpc_num)(struct gk20a *g, u32 addr,
u32 *gpc_num, u32 *tpc_num);
u32 (*get_tpc_num)(struct gk20a *g, u32 addr);
u32 (*get_egpc_base)(struct gk20a *g);
bool (*is_ltcs_ltss_addr)(struct gk20a *g, u32 addr);
bool (*is_ltcn_ltss_addr)(struct gk20a *g, u32 addr);
bool (*get_lts_in_ltc_shared_base)(void);

View File

@@ -76,6 +76,8 @@ int gr_gk20a_init_ctx_vars_sim(struct gk20a *g, struct gr_gk20a *gr)
&g->gr.ctx_vars.ctxsw_regs.ppc.count);
gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ETPC_COUNT", 0,
&g->gr.ctx_vars.ctxsw_regs.etpc.count);
gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC_COUNT", 0,
&g->gr.ctx_vars.ctxsw_regs.ppc.count);
err |= !alloc_u32_list_gk20a(g, &g->gr.ctx_vars.ucode.fecs.inst);
err |= !alloc_u32_list_gk20a(g, &g->gr.ctx_vars.ucode.fecs.data);

View File

@@ -6236,6 +6236,10 @@ static int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
return 0;
}
return 0;
} else if (g->ops.gr.is_egpc_addr && g->ops.gr.is_egpc_addr(g, addr)) {
return g->ops.gr.decode_egpc_addr(g,
addr, addr_type, gpc_num,
tpc_num, broadcast_flags);
} else {
*addr_type = CTXSW_ADDR_TYPE_SYS;
return 0;
@@ -6331,9 +6335,13 @@ static int gr_gk20a_create_priv_addr_table(struct gk20a *g,
pri_gpc_addr(g, pri_gpccs_addr_mask(addr),
gpc_num);
}
}
if (broadcast_flags & PRI_BROADCAST_FLAGS_LTSS) {
} else if (((addr_type == CTXSW_ADDR_TYPE_EGPC) ||
(addr_type == CTXSW_ADDR_TYPE_ETPC)) &&
g->ops.gr.egpc_etpc_priv_addr_table) {
gk20a_dbg(gpu_dbg_gpu_dbg, "addr_type : EGPC/ETPC");
g->ops.gr.egpc_etpc_priv_addr_table(g, addr, gpc_num,
broadcast_flags, priv_addr_table, &t);
} else if (broadcast_flags & PRI_BROADCAST_FLAGS_LTSS) {
g->ops.gr.split_lts_broadcast_addr(g, addr,
priv_addr_table, &t);
} else if (broadcast_flags & PRI_BROADCAST_FLAGS_LTCS) {
@@ -6354,8 +6362,8 @@ static int gr_gk20a_create_priv_addr_table(struct gk20a *g,
pri_tpc_addr(g, pri_tpccs_addr_mask(addr),
gpc_num, tpc_num);
else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC)
err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num,
priv_addr_table, &t);
err = gr_gk20a_split_ppc_broadcast_addr(g,
addr, gpc_num, priv_addr_table, &t);
else
priv_addr_table[t++] = addr;
}
@@ -6382,8 +6390,10 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
/* implementation is crossed-up if either of these happen */
if (max_offsets > potential_offsets)
if (max_offsets > potential_offsets) {
gk20a_dbg_fn("max_offsets > potential_offsets");
return -EINVAL;
}
if (!g->gr.ctx_vars.golden_image_initialized)
return -ENODEV;
@@ -6401,6 +6411,8 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
gr_gk20a_create_priv_addr_table(g, addr, &priv_registers[0], &num_registers);
if ((max_offsets > 1) && (num_registers > max_offsets)) {
gk20a_dbg_fn("max_offsets = %d, num_registers = %d",
max_offsets, num_registers);
err = -EINVAL;
goto cleanup;
}
@@ -6691,8 +6703,11 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
gk20a_dbg_info(" gpc = %d tpc = %d",
gpc_num, tpc_num);
} else
} else {
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
"extended region has tpc reg only");
return -EINVAL;
}
buffer_segments_size = ctxsw_prog_extended_buffer_segments_size_in_bytes_v();
/* note below is in words/num_registers */
@@ -6937,8 +6952,42 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g,
}
}
}
} else if ((addr_type == CTXSW_ADDR_TYPE_EGPC) ||
(addr_type == CTXSW_ADDR_TYPE_ETPC)) {
if (!(g->ops.gr.get_egpc_base))
return -EINVAL;
for (tpc_num = 0; tpc_num < num_tpcs; tpc_num++) {
for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.etpc.count; i++) {
reg = &g->gr.ctx_vars.ctxsw_regs.etpc.l[i];
address = reg->addr;
tpc_addr = pri_tpccs_addr_mask(address);
base_address = g->ops.gr.get_egpc_base(g) +
(gpc_num * gpc_stride) +
tpc_in_gpc_base +
(tpc_num * tpc_in_gpc_stride);
address = base_address + tpc_addr;
/*
* The data for the TPCs is interleaved in the context buffer.
* Example with num_tpcs = 2
* 0 1 2 3 4 5 6 7 8 9 10 11 ...
* 0-0 1-0 0-1 1-1 0-2 1-2 0-3 1-3 0-4 1-4 0-5 1-5 ...
*/
tpc_offset = (reg->index * num_tpcs) + (tpc_num * 4);
if (pri_addr == address) {
*priv_offset = tpc_offset;
nvgpu_log(g,
gpu_dbg_fn | gpu_dbg_gpu_dbg,
"egpc/etpc priv_offset=0x%#08x",
*priv_offset);
return 0;
}
}
}
}
/* Process the PPC segment. */
if (addr_type == CTXSW_ADDR_TYPE_PPC) {
for (ppc_num = 0; ppc_num < num_ppcs; ppc_num++) {
@@ -6986,7 +7035,6 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g,
}
}
}
return -EINVAL;
}
@@ -7046,6 +7094,9 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
err = gr_gk20a_decode_priv_addr(g, addr, &addr_type,
&gpc_num, &tpc_num, &ppc_num, &be_num,
&broadcast_flags);
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
"addr_type = %d, broadcast_flags: %08x",
addr_type, broadcast_flags);
if (err)
return err;
@@ -7066,6 +7117,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
}
data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o());
sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "sys_priv_offset=0x%x", sys_priv_offset);
/* If found in Ext buffer, ok.
* If it failed and we expected to find it there (quad offset)
@@ -7074,8 +7126,12 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
err = gr_gk20a_find_priv_offset_in_ext_buffer(g,
addr, is_quad, quad, context_buffer,
context_buffer_size, priv_offset);
if (!err || (err && is_quad))
if (!err || (err && is_quad)) {
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
"err = %d, is_quad = %s",
err, is_quad ? "true" : false);
return err;
}
if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
(addr_type == CTXSW_ADDR_TYPE_BE)) {
@@ -7116,8 +7172,11 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
err = gr_gk20a_determine_ppc_configuration(g, context,
&num_ppcs, &ppc_mask,
&reg_list_ppc_count);
if (err)
if (err) {
nvgpu_err(g, "determine ppc configuration failed");
return err;
}
num_tpcs = *(u32 *)(context + ctxsw_prog_local_image_num_tpcs_o());
@@ -7130,16 +7189,28 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
/* Find the offset in the GPCCS segment.*/
if (i == gpc_num) {
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
"gpc_priv_offset 0x%#08x",
gpc_priv_offset);
offset_to_segment = gpc_priv_offset *
ctxsw_prog_ucode_header_size_in_bytes();
if (addr_type == CTXSW_ADDR_TYPE_TPC) {
/*reg = gr->ctx_vars.ctxsw_regs.tpc.l;*/
} else if ((addr_type == CTXSW_ADDR_TYPE_EGPC) ||
(addr_type == CTXSW_ADDR_TYPE_ETPC)) {
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gpu_dbg,
"egpc etpc offset_to_segment 0x%#08x",
offset_to_segment);
offset_to_segment +=
((gr->ctx_vars.ctxsw_regs.tpc.count *
num_tpcs) << 2);
} else if (addr_type == CTXSW_ADDR_TYPE_PPC) {
/* The ucode stores TPC data before PPC data.
* Advance offset past TPC data to PPC data. */
offset_to_segment +=
((gr->ctx_vars.ctxsw_regs.tpc.count *
(((gr->ctx_vars.ctxsw_regs.tpc.count +
gr->ctx_vars.ctxsw_regs.etpc.count) *
num_tpcs) << 2);
} else if (addr_type == CTXSW_ADDR_TYPE_GPC) {
/* The ucode stores TPC/PPC data before GPC data.
@@ -7149,13 +7220,15 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
GPU_LIT_NUM_PES_PER_GPC);
if (num_pes_per_gpc > 1) {
offset_to_segment +=
(((gr->ctx_vars.ctxsw_regs.tpc.count *
num_tpcs) << 2) +
((reg_list_ppc_count * num_ppcs) << 2));
((((gr->ctx_vars.ctxsw_regs.tpc.count +
gr->ctx_vars.ctxsw_regs.etpc.count) *
num_tpcs) << 2) +
((reg_list_ppc_count * num_ppcs) << 2));
} else {
offset_to_segment +=
((gr->ctx_vars.ctxsw_regs.tpc.count *
num_tpcs) << 2);
(((gr->ctx_vars.ctxsw_regs.tpc.count +
gr->ctx_vars.ctxsw_regs.etpc.count) *
num_tpcs) << 2);
}
} else {
gk20a_dbg_fn("Unknown address type.");

View File

@@ -237,6 +237,8 @@ enum ctxsw_addr_type {
CTXSW_ADDR_TYPE_PPC = 4,
CTXSW_ADDR_TYPE_LTCS = 5,
CTXSW_ADDR_TYPE_FBPA = 6,
CTXSW_ADDR_TYPE_EGPC = 7,
CTXSW_ADDR_TYPE_ETPC = 8,
};
#define PRI_BROADCAST_FLAGS_NONE 0
@@ -247,5 +249,7 @@ enum ctxsw_addr_type {
#define PRI_BROADCAST_FLAGS_LTCS BIT(4)
#define PRI_BROADCAST_FLAGS_LTSS BIT(5)
#define PRI_BROADCAST_FLAGS_FBPA BIT(6)
#define PRI_BROADCAST_FLAGS_EGPC BIT(7)
#define PRI_BROADCAST_FLAGS_ETPC BIT(8)
#endif /* GR_PRI_GK20A_H */