mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 02:22:34 +03:00
gpu: nvgpu: support for non-secure/secure ctxsw loading
Code for secure/non-secure ctxsw booting spread across gr_gk20a.c
and gr_gm20b.c. With this change this code is move to gr falcon unit.
Ctxsw loading is now supported with 2 supported common functions:
1.Non secure boot:
int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g);
2.Secure boot:
int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g);
Now gr ops function "int (*load_ctxsw_ucode)(struct gk20a *g);" is moved to
gr falcon ops and in chip hals it is set with secure/non-secure booting.
Non-secure booting: nvgpu_gr_falcon_load_ctxsw_ucode support ctxsw loading
in 2 methods: bit-banging uode or booting with bootloader
A. Common and hal functions for non-secure bit-banging ctxsw loading:
Common: static void nvgpu_gr_falcon_load_dmem(struct gk20a *g) ->
Hals: void (*load_gpccs_dmem)(struct gk20a *g,i
const u32 *ucode_u32_data, u32 size);
void (*load_fecs_dmem)(struct gk20a *g,
const u32 *ucode_u32_data, u32 size);
Common: static void nvgpu_gr_falcon_load_imem(struct gk20a *g) ->
Hals: void (*load_gpccs_imem)(struct gk20a *g,
const u32 *ucode_u32_data, u32 size);
void (*load_fecs_imem)(struct gk20a *g,
const u32 *ucode_u32_data, u32 size);
Other basic HALs:
void (*configure_fmodel)(struct gk20a *g); -> configure fmodel for ctxsw loading
void (*start_ucode)(struct gk20a *g); -> start running ctxcw ucode
B.Common and hal functions for non-secure ctxsw loading with bootloader
First get the ctxsw ucode using: nvgpu_gr_falcon_init_ctxsw_ucode, then
Common: static void nvgpu_gr_falcon_load_with_bootloader(struct gk20a *g)
void nvgpu_gr_falcon_bind_instblk((struct gk20a *g) ->
Hal: void (*bind_instblk)(struct gk20a *g, struct nvgpu_mem *mem, u64 inst_ptr);
Common: nvgpu_gr_falcon_load_ctxsw_ucode_segments ->
nvgpu_gr_falcon_load_ctxsw_ucode_header ->
nvgpu_gr_falcon_load_ctxsw_ucode_boot for both fecs and gpccs ->
Hals: void (*load_ctxsw_ucode_header)(struct gk20a *g, u32 reg_offset,
u32 boot_signature, u32 addr_code32, u32 addr_data32,
u32 code_size, u32 data_size);
void (*load_ctxsw_ucode_boot)(struct gk20a *g, u64 reg_offset, u32 boot_entry,
u32 addr_load32, u32 blocks, u32 dst);
Other basic HAL to get gpccs start offset:
u32 (*get_gpccs_start_reg_offset)(void);
C.Secure booting is support with gpmu and acr and with following additional
common function in gr falcon.
static void nvgpu_gr_falcon_load_gpccs_with_bootloader(struct gk20a *g) ->
nvgpu_gr_falcon_bind_instblk and nvgpu_gr_falcon_load_ctxsw_ucode_segments
Additional basic hals:
void (*start_gpccs)(struct gk20a *g);
void (*start_fecs)(struct gk20a *g);
Following ops from gr is removed, since it is not required to set by chip hals:
void (*falcon_load_ucode)(struct gk20a *g, u64 addr_base,
struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
Now this is handled by static common function:
static int nvgpu_gr_falcon_copy_ctxsw_ucode_segments( struct gk20a *g,
struct nvgpu_mem *dst, struct gk20a_ctxsw_ucode_segments *segments,
u32 *bootimage, u32 *code, u32 *data)
JIRA NVGPU-1881
Change-Id: I895a03faaf1a21286316befde24765c8b55075cf
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2083388
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
b7835b5ead
commit
0f1726ae1f
@@ -195,3 +195,237 @@ clean_up:
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void nvgpu_gr_falcon_load_dmem(struct gk20a *g)
|
||||
{
|
||||
u32 ucode_u32_size;
|
||||
const u32 *ucode_u32_data;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
ucode_u32_size = g->netlist_vars->ucode.gpccs.data.count;
|
||||
ucode_u32_data = (const u32 *)g->netlist_vars->ucode.gpccs.data.l;
|
||||
g->ops.gr.falcon.load_gpccs_dmem(g, ucode_u32_data, ucode_u32_size);
|
||||
|
||||
ucode_u32_size = g->netlist_vars->ucode.fecs.data.count;
|
||||
ucode_u32_data = (const u32 *)g->netlist_vars->ucode.fecs.data.l;
|
||||
g->ops.gr.falcon.load_fecs_dmem(g, ucode_u32_data, ucode_u32_size);
|
||||
|
||||
nvgpu_log_fn(g, "done");
|
||||
}
|
||||
|
||||
static void nvgpu_gr_falcon_load_imem(struct gk20a *g)
|
||||
{
|
||||
u32 ucode_u32_size;
|
||||
const u32 *ucode_u32_data;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
ucode_u32_size = g->netlist_vars->ucode.gpccs.inst.count;
|
||||
ucode_u32_data = (const u32 *)g->netlist_vars->ucode.gpccs.inst.l;
|
||||
g->ops.gr.falcon.load_gpccs_imem(g, ucode_u32_data, ucode_u32_size);
|
||||
|
||||
|
||||
ucode_u32_size = g->netlist_vars->ucode.fecs.inst.count;
|
||||
ucode_u32_data = (const u32 *)g->netlist_vars->ucode.fecs.inst.l;
|
||||
g->ops.gr.falcon.load_fecs_imem(g, ucode_u32_data, ucode_u32_size);
|
||||
|
||||
nvgpu_log_fn(g, "done");
|
||||
}
|
||||
|
||||
static void nvgpu_gr_falcon_bind_instblk(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
|
||||
u64 inst_ptr;
|
||||
|
||||
inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc);
|
||||
|
||||
g->ops.gr.falcon.bind_instblk(g, &ucode_info->inst_blk_desc,
|
||||
inst_ptr);
|
||||
|
||||
}
|
||||
|
||||
static void nvgpu_gr_falcon_load_ctxsw_ucode_header(struct gk20a *g,
|
||||
u64 addr_base, struct gk20a_ctxsw_ucode_segments *segments,
|
||||
u32 reg_offset)
|
||||
{
|
||||
u32 addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8);
|
||||
u32 addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8);
|
||||
|
||||
g->ops.gr.falcon.load_ctxsw_ucode_header(g, reg_offset,
|
||||
segments->boot_signature, addr_code32, addr_data32,
|
||||
segments->code.size, segments->data.size);
|
||||
}
|
||||
|
||||
static void nvgpu_gr_falcon_load_ctxsw_ucode_boot(struct gk20a *g,
|
||||
u64 addr_base, struct gk20a_ctxsw_ucode_segments *segments,
|
||||
u32 reg_offset)
|
||||
{
|
||||
u32 addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8);
|
||||
u32 blocks = ((segments->boot.size + 0xFFU) & ~0xFFU) >> 8;
|
||||
u32 dst = segments->boot_imem_offset;
|
||||
|
||||
g->ops.gr.falcon.load_ctxsw_ucode_boot(g, reg_offset,
|
||||
segments->boot_entry, addr_load32, blocks, dst);
|
||||
|
||||
}
|
||||
|
||||
static void nvgpu_gr_falcon_load_ctxsw_ucode_segments(
|
||||
struct gk20a *g, u64 addr_base,
|
||||
struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
|
||||
{
|
||||
|
||||
/* Copy falcon bootloader into dmem */
|
||||
nvgpu_gr_falcon_load_ctxsw_ucode_header(g, addr_base,
|
||||
segments, reg_offset);
|
||||
nvgpu_gr_falcon_load_ctxsw_ucode_boot(g,
|
||||
addr_base, segments, reg_offset);
|
||||
}
|
||||
|
||||
|
||||
static void nvgpu_gr_falcon_load_with_bootloader(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
|
||||
u64 addr_base = ucode_info->surface_desc.gpu_va;
|
||||
|
||||
nvgpu_gr_falcon_bind_instblk(g);
|
||||
|
||||
nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base,
|
||||
&g->ctxsw_ucode_info.fecs, 0);
|
||||
|
||||
nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base,
|
||||
&g->ctxsw_ucode_info.gpccs,
|
||||
g->ops.gr.falcon.get_gpccs_start_reg_offset());
|
||||
}
|
||||
|
||||
int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g)
|
||||
{
|
||||
int err;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
|
||||
g->ops.gr.falcon.configure_fmodel(g);
|
||||
}
|
||||
|
||||
/*
|
||||
* In case bootloader is not supported, revert to the old way of
|
||||
* loading gr ucode, without the faster bootstrap routine.
|
||||
*/
|
||||
if (!nvgpu_is_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP)) {
|
||||
nvgpu_gr_falcon_load_dmem(g);
|
||||
nvgpu_gr_falcon_load_imem(g);
|
||||
g->ops.gr.falcon.start_ucode(g);
|
||||
} else {
|
||||
if (!g->gr.skip_ucode_init) {
|
||||
err = nvgpu_gr_falcon_init_ctxsw_ucode(g);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
nvgpu_gr_falcon_load_with_bootloader(g);
|
||||
g->gr.skip_ucode_init = true;
|
||||
}
|
||||
nvgpu_log_fn(g, "done");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void nvgpu_gr_falcon_load_gpccs_with_bootloader(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
|
||||
u64 addr_base = ucode_info->surface_desc.gpu_va;
|
||||
|
||||
nvgpu_gr_falcon_bind_instblk(g);
|
||||
|
||||
nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base,
|
||||
&g->ctxsw_ucode_info.gpccs,
|
||||
g->ops.gr.falcon.get_gpccs_start_reg_offset());
|
||||
}
|
||||
|
||||
int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g)
|
||||
{
|
||||
int err = 0;
|
||||
u8 falcon_id_mask = 0;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
|
||||
g->ops.gr.falcon.configure_fmodel(g);
|
||||
}
|
||||
|
||||
g->pmu_lsf_loaded_falcon_id = 0;
|
||||
if (nvgpu_is_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE)) {
|
||||
/* this must be recovery so bootstrap fecs and gpccs */
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
|
||||
nvgpu_gr_falcon_load_gpccs_with_bootloader(g);
|
||||
err = g->ops.pmu.load_lsfalcon_ucode(g,
|
||||
BIT32(FALCON_ID_FECS));
|
||||
} else {
|
||||
/* bind WPR VA inst block */
|
||||
nvgpu_gr_falcon_bind_instblk(g);
|
||||
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) {
|
||||
err = nvgpu_sec2_bootstrap_ls_falcons(g,
|
||||
&g->sec2, FALCON_ID_FECS);
|
||||
err = nvgpu_sec2_bootstrap_ls_falcons(g,
|
||||
&g->sec2, FALCON_ID_GPCCS);
|
||||
} else if (g->support_ls_pmu) {
|
||||
err = g->ops.pmu.load_lsfalcon_ucode(g,
|
||||
BIT32(FALCON_ID_FECS) |
|
||||
BIT32(FALCON_ID_GPCCS));
|
||||
} else {
|
||||
err = nvgpu_acr_bootstrap_hs_acr(g, g->acr);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g,
|
||||
"ACR GR LSF bootstrap failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Unable to recover GR falcon");
|
||||
return err;
|
||||
}
|
||||
|
||||
} else {
|
||||
/* cold boot or rg exit */
|
||||
nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, true);
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
|
||||
nvgpu_gr_falcon_load_gpccs_with_bootloader(g);
|
||||
} else {
|
||||
/* bind WPR VA inst block */
|
||||
nvgpu_gr_falcon_bind_instblk(g);
|
||||
if (nvgpu_acr_is_lsf_lazy_bootstrap(g, g->acr,
|
||||
FALCON_ID_FECS)) {
|
||||
falcon_id_mask |= BIT8(FALCON_ID_FECS);
|
||||
}
|
||||
if (nvgpu_acr_is_lsf_lazy_bootstrap(g, g->acr,
|
||||
FALCON_ID_GPCCS)) {
|
||||
falcon_id_mask |= BIT8(FALCON_ID_GPCCS);
|
||||
}
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) {
|
||||
err = nvgpu_sec2_bootstrap_ls_falcons(g,
|
||||
&g->sec2, FALCON_ID_FECS);
|
||||
err = nvgpu_sec2_bootstrap_ls_falcons(g,
|
||||
&g->sec2, FALCON_ID_GPCCS);
|
||||
} else if (g->support_ls_pmu) {
|
||||
err = g->ops.pmu.load_lsfalcon_ucode(g,
|
||||
falcon_id_mask);
|
||||
} else {
|
||||
/* GR falcons bootstrapped by ACR */
|
||||
err = 0;
|
||||
}
|
||||
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Unable to boot GPCCS");
|
||||
return err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
g->ops.gr.falcon.start_gpccs(g);
|
||||
g->ops.gr.falcon.start_fecs(g);
|
||||
|
||||
nvgpu_log_fn(g, "done");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -127,8 +127,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
|
||||
.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs,
|
||||
.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs,
|
||||
.set_hww_esr_report_mask = NULL,
|
||||
.falcon_load_ucode = NULL,
|
||||
.load_ctxsw_ucode = NULL,
|
||||
.set_gpc_tpc_mask = NULL,
|
||||
.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
|
||||
.is_tpc_addr = gr_gm20b_is_tpc_addr,
|
||||
@@ -294,6 +292,9 @@ static const struct gpu_ops vgpu_gp10b_ops = {
|
||||
.get_zcull_info = vgpu_gr_get_zcull_info,
|
||||
.program_zcull_mapping = NULL,
|
||||
},
|
||||
.falcon = {
|
||||
.load_ctxsw_ucode = NULL,
|
||||
},
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
.fecs_trace = {
|
||||
.alloc_user_buffer = vgpu_alloc_user_buffer,
|
||||
|
||||
@@ -148,8 +148,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
|
||||
.get_sm_dsm_perf_regs = gv11b_gr_get_sm_dsm_perf_regs,
|
||||
.get_sm_dsm_perf_ctrl_regs = gv11b_gr_get_sm_dsm_perf_ctrl_regs,
|
||||
.set_hww_esr_report_mask = NULL,
|
||||
.falcon_load_ucode = NULL,
|
||||
.load_ctxsw_ucode = NULL,
|
||||
.set_gpc_tpc_mask = NULL,
|
||||
.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
|
||||
.is_tpc_addr = gr_gm20b_is_tpc_addr,
|
||||
@@ -344,6 +342,9 @@ static const struct gpu_ops vgpu_gv11b_ops = {
|
||||
.align_regs_perf_pma =
|
||||
gv100_gr_hwpm_map_align_regs_perf_pma,
|
||||
},
|
||||
.falcon = {
|
||||
.load_ctxsw_ucode = NULL,
|
||||
},
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
.fecs_trace = {
|
||||
.alloc_user_buffer = vgpu_alloc_user_buffer,
|
||||
|
||||
@@ -69,12 +69,8 @@
|
||||
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
|
||||
#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
|
||||
|
||||
#define BLK_SIZE (256U)
|
||||
#define CTXSW_MEM_SCRUBBING_TIMEOUT_MAX 1000U
|
||||
#define CTXSW_MEM_SCRUBBING_TIMEOUT_DEFAULT 10U
|
||||
#define FECS_ARB_CMD_TIMEOUT_MAX 40
|
||||
#define FECS_ARB_CMD_TIMEOUT_DEFAULT 2
|
||||
|
||||
|
||||
static struct channel_gk20a *gk20a_gr_get_channel_from_ctx(
|
||||
struct gk20a *g, u32 curr_ctx, u32 *curr_tsgid);
|
||||
@@ -183,126 +179,6 @@ static void gr_report_ctxsw_error(struct gk20a *g, u32 err_type, u32 chid,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void gr_gk20a_load_falcon_dmem(struct gk20a *g)
|
||||
{
|
||||
u32 i, ucode_u32_size;
|
||||
const u32 *ucode_u32_data;
|
||||
u32 checksum;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
gk20a_writel(g, gr_gpccs_dmemc_r(0), (gr_gpccs_dmemc_offs_f(0) |
|
||||
gr_gpccs_dmemc_blk_f(0) |
|
||||
gr_gpccs_dmemc_aincw_f(1)));
|
||||
|
||||
ucode_u32_size = g->netlist_vars->ucode.gpccs.data.count;
|
||||
ucode_u32_data = (const u32 *)g->netlist_vars->ucode.gpccs.data.l;
|
||||
|
||||
for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
|
||||
gk20a_writel(g, gr_gpccs_dmemd_r(0), ucode_u32_data[i]);
|
||||
checksum += ucode_u32_data[i];
|
||||
}
|
||||
|
||||
gk20a_writel(g, gr_fecs_dmemc_r(0), (gr_fecs_dmemc_offs_f(0) |
|
||||
gr_fecs_dmemc_blk_f(0) |
|
||||
gr_fecs_dmemc_aincw_f(1)));
|
||||
|
||||
ucode_u32_size = g->netlist_vars->ucode.fecs.data.count;
|
||||
ucode_u32_data = (const u32 *)g->netlist_vars->ucode.fecs.data.l;
|
||||
|
||||
for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
|
||||
gk20a_writel(g, gr_fecs_dmemd_r(0), ucode_u32_data[i]);
|
||||
checksum += ucode_u32_data[i];
|
||||
}
|
||||
nvgpu_log_fn(g, "done");
|
||||
}
|
||||
|
||||
static void gr_gk20a_load_falcon_imem(struct gk20a *g)
|
||||
{
|
||||
u32 cfg, fecs_imem_size, gpccs_imem_size, ucode_u32_size;
|
||||
const u32 *ucode_u32_data;
|
||||
u32 tag, i, pad_start, pad_end;
|
||||
u32 checksum;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
cfg = gk20a_readl(g, gr_fecs_cfg_r());
|
||||
fecs_imem_size = gr_fecs_cfg_imem_sz_v(cfg);
|
||||
|
||||
cfg = gk20a_readl(g, gr_gpc0_cfg_r());
|
||||
gpccs_imem_size = gr_gpc0_cfg_imem_sz_v(cfg);
|
||||
|
||||
/* Use the broadcast address to access all of the GPCCS units. */
|
||||
gk20a_writel(g, gr_gpccs_imemc_r(0), (gr_gpccs_imemc_offs_f(0) |
|
||||
gr_gpccs_imemc_blk_f(0) |
|
||||
gr_gpccs_imemc_aincw_f(1)));
|
||||
|
||||
/* Setup the tags for the instruction memory. */
|
||||
tag = 0;
|
||||
gk20a_writel(g, gr_gpccs_imemt_r(0), gr_gpccs_imemt_tag_f(tag));
|
||||
|
||||
ucode_u32_size = g->netlist_vars->ucode.gpccs.inst.count;
|
||||
ucode_u32_data = (const u32 *)g->netlist_vars->ucode.gpccs.inst.l;
|
||||
|
||||
for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
|
||||
if ((i != 0U) && ((i % (256U/sizeof(u32))) == 0U)) {
|
||||
tag++;
|
||||
gk20a_writel(g, gr_gpccs_imemt_r(0),
|
||||
gr_gpccs_imemt_tag_f(tag));
|
||||
}
|
||||
gk20a_writel(g, gr_gpccs_imemd_r(0), ucode_u32_data[i]);
|
||||
checksum += ucode_u32_data[i];
|
||||
}
|
||||
|
||||
pad_start = i * 4U;
|
||||
pad_end = pad_start + (256U - pad_start % 256U) + 256U;
|
||||
for (i = pad_start;
|
||||
(i < gpccs_imem_size * 256U) && (i < pad_end);
|
||||
i += 4U) {
|
||||
if ((i != 0U) && ((i % 256U) == 0U)) {
|
||||
tag++;
|
||||
gk20a_writel(g, gr_gpccs_imemt_r(0),
|
||||
gr_gpccs_imemt_tag_f(tag));
|
||||
}
|
||||
gk20a_writel(g, gr_gpccs_imemd_r(0), 0);
|
||||
}
|
||||
|
||||
gk20a_writel(g, gr_fecs_imemc_r(0), (gr_fecs_imemc_offs_f(0) |
|
||||
gr_fecs_imemc_blk_f(0) |
|
||||
gr_fecs_imemc_aincw_f(1)));
|
||||
|
||||
/* Setup the tags for the instruction memory. */
|
||||
tag = 0;
|
||||
gk20a_writel(g, gr_fecs_imemt_r(0), gr_fecs_imemt_tag_f(tag));
|
||||
|
||||
ucode_u32_size = g->netlist_vars->ucode.fecs.inst.count;
|
||||
ucode_u32_data = (const u32 *)g->netlist_vars->ucode.fecs.inst.l;
|
||||
|
||||
for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
|
||||
if ((i != 0U) && ((i % (256U/sizeof(u32))) == 0U)) {
|
||||
tag++;
|
||||
gk20a_writel(g, gr_fecs_imemt_r(0),
|
||||
gr_fecs_imemt_tag_f(tag));
|
||||
}
|
||||
gk20a_writel(g, gr_fecs_imemd_r(0), ucode_u32_data[i]);
|
||||
checksum += ucode_u32_data[i];
|
||||
}
|
||||
|
||||
pad_start = i * 4U;
|
||||
pad_end = pad_start + (256U - pad_start % 256U) + 256U;
|
||||
for (i = pad_start;
|
||||
(i < fecs_imem_size * 256U) && i < pad_end;
|
||||
i += 4U) {
|
||||
if ((i != 0U) && ((i % 256U) == 0U)) {
|
||||
tag++;
|
||||
gk20a_writel(g, gr_fecs_imemt_r(0),
|
||||
gr_fecs_imemt_tag_f(tag));
|
||||
}
|
||||
gk20a_writel(g, gr_fecs_imemd_r(0), 0);
|
||||
}
|
||||
}
|
||||
|
||||
int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
|
||||
u32 *mailbox_ret, u32 opc_success,
|
||||
u32 mailbox_ok, u32 opc_fail,
|
||||
@@ -1076,288 +952,6 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void gr_gk20a_start_falcon_ucode(struct gk20a *g)
|
||||
{
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0U),
|
||||
gr_fecs_ctxsw_mailbox_clear_value_f(~U32(0U)));
|
||||
|
||||
gk20a_writel(g, gr_gpccs_dmactl_r(), gr_gpccs_dmactl_require_ctx_f(0U));
|
||||
gk20a_writel(g, gr_fecs_dmactl_r(), gr_fecs_dmactl_require_ctx_f(0U));
|
||||
|
||||
gk20a_writel(g, gr_gpccs_cpuctl_r(), gr_gpccs_cpuctl_startcpu_f(1U));
|
||||
gk20a_writel(g, gr_fecs_cpuctl_r(), gr_fecs_cpuctl_startcpu_f(1U));
|
||||
|
||||
nvgpu_log_fn(g, "done");
|
||||
}
|
||||
|
||||
static void gr_gk20a_wait_for_fecs_arb_idle(struct gk20a *g)
|
||||
{
|
||||
int retries = FECS_ARB_CMD_TIMEOUT_MAX / FECS_ARB_CMD_TIMEOUT_DEFAULT;
|
||||
u32 val;
|
||||
|
||||
val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r());
|
||||
while ((gr_fecs_arb_ctx_cmd_cmd_v(val) != 0U) && (retries != 0)) {
|
||||
nvgpu_udelay(FECS_ARB_CMD_TIMEOUT_DEFAULT);
|
||||
retries--;
|
||||
val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r());
|
||||
}
|
||||
|
||||
if (retries == 0) {
|
||||
nvgpu_err(g, "arbiter cmd timeout, fecs arb ctx cmd: 0x%08x",
|
||||
gk20a_readl(g, gr_fecs_arb_ctx_cmd_r()));
|
||||
}
|
||||
|
||||
retries = FECS_ARB_CMD_TIMEOUT_MAX / FECS_ARB_CMD_TIMEOUT_DEFAULT;
|
||||
while (((gk20a_readl(g, gr_fecs_ctxsw_status_1_r()) &
|
||||
gr_fecs_ctxsw_status_1_arb_busy_m()) != 0U) &&
|
||||
(retries != 0)) {
|
||||
nvgpu_udelay(FECS_ARB_CMD_TIMEOUT_DEFAULT);
|
||||
retries--;
|
||||
}
|
||||
if (retries == 0) {
|
||||
nvgpu_err(g,
|
||||
"arbiter idle timeout, fecs ctxsw status: 0x%08x",
|
||||
gk20a_readl(g, gr_fecs_ctxsw_status_1_r()));
|
||||
}
|
||||
}
|
||||
|
||||
void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
|
||||
int retries = FECS_ARB_CMD_TIMEOUT_MAX / FECS_ARB_CMD_TIMEOUT_DEFAULT;
|
||||
u64 inst_ptr_shifted_u64;
|
||||
u32 inst_ptr_shifted_u32;
|
||||
|
||||
while (((gk20a_readl(g, gr_fecs_ctxsw_status_1_r()) &
|
||||
gr_fecs_ctxsw_status_1_arb_busy_m()) != 0U) &&
|
||||
(retries != 0)) {
|
||||
nvgpu_udelay(FECS_ARB_CMD_TIMEOUT_DEFAULT);
|
||||
retries--;
|
||||
}
|
||||
if (retries == 0) {
|
||||
nvgpu_err(g,
|
||||
"arbiter idle timeout, status: %08x",
|
||||
gk20a_readl(g, gr_fecs_ctxsw_status_1_r()));
|
||||
}
|
||||
|
||||
gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0);
|
||||
|
||||
inst_ptr_shifted_u64 = nvgpu_inst_block_addr(g,
|
||||
&ucode_info->inst_blk_desc);
|
||||
inst_ptr_shifted_u64 >>= 12;
|
||||
BUG_ON(u64_hi32(inst_ptr_shifted_u64) != 0U);
|
||||
inst_ptr_shifted_u32 = (u32)inst_ptr_shifted_u64;
|
||||
gk20a_writel(g, gr_fecs_new_ctx_r(),
|
||||
gr_fecs_new_ctx_ptr_f(inst_ptr_shifted_u32) |
|
||||
nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
|
||||
gr_fecs_new_ctx_target_sys_mem_ncoh_f(),
|
||||
gr_fecs_new_ctx_target_sys_mem_coh_f(),
|
||||
gr_fecs_new_ctx_target_vid_mem_f()) |
|
||||
gr_fecs_new_ctx_valid_m());
|
||||
|
||||
gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(),
|
||||
gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr_shifted_u32) |
|
||||
nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
|
||||
gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(),
|
||||
gr_fecs_arb_ctx_ptr_target_sys_mem_coh_f(),
|
||||
gr_fecs_arb_ctx_ptr_target_vid_mem_f()));
|
||||
|
||||
gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7);
|
||||
|
||||
/* Wait for arbiter command to complete */
|
||||
gr_gk20a_wait_for_fecs_arb_idle(g);
|
||||
|
||||
gk20a_writel(g, gr_fecs_current_ctx_r(),
|
||||
gr_fecs_current_ctx_ptr_f(inst_ptr_shifted_u32) |
|
||||
gr_fecs_current_ctx_target_m() |
|
||||
gr_fecs_current_ctx_valid_m());
|
||||
/* Send command to arbiter to flush */
|
||||
gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), gr_fecs_arb_ctx_cmd_cmd_s());
|
||||
|
||||
gr_gk20a_wait_for_fecs_arb_idle(g);
|
||||
|
||||
}
|
||||
|
||||
void gr_gk20a_load_ctxsw_ucode_header(struct gk20a *g, u64 addr_base,
|
||||
struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
|
||||
{
|
||||
u32 addr_code32;
|
||||
u32 addr_data32;
|
||||
|
||||
addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8);
|
||||
addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8);
|
||||
|
||||
/*
|
||||
* Copy falcon bootloader header into dmem at offset 0.
|
||||
* Configure dmem port 0 for auto-incrementing writes starting at dmem
|
||||
* offset 0.
|
||||
*/
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemc_r(0),
|
||||
gr_fecs_dmemc_offs_f(0) |
|
||||
gr_fecs_dmemc_blk_f(0) |
|
||||
gr_fecs_dmemc_aincw_f(1));
|
||||
|
||||
/* Write out the actual data */
|
||||
switch (segments->boot_signature) {
|
||||
case FALCON_UCODE_SIG_T18X_GPCCS_WITH_RESERVED:
|
||||
case FALCON_UCODE_SIG_T21X_FECS_WITH_DMEM_SIZE:
|
||||
case FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED:
|
||||
case FALCON_UCODE_SIG_T21X_GPCCS_WITH_RESERVED:
|
||||
case FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED:
|
||||
case FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED:
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
/* fallthrough */
|
||||
case FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED:
|
||||
case FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED:
|
||||
case FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED:
|
||||
case FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2:
|
||||
case FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED:
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 4);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
|
||||
addr_code32);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
|
||||
segments->code.size);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
|
||||
addr_data32);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
|
||||
segments->data.size);
|
||||
break;
|
||||
case FALCON_UCODE_SIG_T12X_FECS_OLDER:
|
||||
case FALCON_UCODE_SIG_T12X_GPCCS_OLDER:
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
|
||||
addr_code32);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
|
||||
segments->code.size);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
|
||||
addr_data32);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
|
||||
segments->data.size);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
|
||||
addr_code32);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
break;
|
||||
default:
|
||||
nvgpu_err(g,
|
||||
"unknown falcon ucode boot signature 0x%08x"
|
||||
" with reg_offset 0x%08x",
|
||||
segments->boot_signature, reg_offset);
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base,
|
||||
struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
|
||||
{
|
||||
u32 addr_load32;
|
||||
u32 blocks;
|
||||
u32 b;
|
||||
u32 dst;
|
||||
|
||||
addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8);
|
||||
blocks = ((segments->boot.size + 0xFFU) & ~0xFFU) >> 8;
|
||||
|
||||
/*
|
||||
* Set the base FB address for the DMA transfer. Subtract off the 256
|
||||
* byte IMEM block offset such that the relative FB and IMEM offsets
|
||||
* match, allowing the IMEM tags to be properly created.
|
||||
*/
|
||||
|
||||
dst = segments->boot_imem_offset;
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmatrfbase_r(),
|
||||
(addr_load32 - (dst >> 8)));
|
||||
|
||||
for (b = 0; b < blocks; b++) {
|
||||
/* Setup destination IMEM offset */
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmatrfmoffs_r(),
|
||||
dst + (b << 8));
|
||||
|
||||
/* Setup source offset (relative to BASE) */
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmatrffboffs_r(),
|
||||
dst + (b << 8));
|
||||
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmatrfcmd_r(),
|
||||
gr_fecs_dmatrfcmd_imem_f(0x01) |
|
||||
gr_fecs_dmatrfcmd_write_f(0x00) |
|
||||
gr_fecs_dmatrfcmd_size_f(0x06) |
|
||||
gr_fecs_dmatrfcmd_ctxdma_f(0));
|
||||
}
|
||||
|
||||
/* Specify the falcon boot vector */
|
||||
gk20a_writel(g, reg_offset + gr_fecs_bootvec_r(),
|
||||
gr_fecs_bootvec_vec_f(segments->boot_entry));
|
||||
}
|
||||
|
||||
static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
|
||||
u64 addr_base = ucode_info->surface_desc.gpu_va;
|
||||
|
||||
gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0x0);
|
||||
|
||||
gr_gk20a_load_falcon_bind_instblk(g);
|
||||
|
||||
g->ops.gr.falcon_load_ucode(g, addr_base,
|
||||
&g->ctxsw_ucode_info.fecs, 0);
|
||||
|
||||
g->ops.gr.falcon_load_ucode(g, addr_base,
|
||||
&g->ctxsw_ucode_info.gpccs,
|
||||
gr_gpcs_gpccs_falcon_hwcfg_r() -
|
||||
gr_fecs_falcon_hwcfg_r());
|
||||
}
|
||||
|
||||
int gr_gk20a_load_ctxsw_ucode(struct gk20a *g)
|
||||
{
|
||||
int err;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
|
||||
gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7),
|
||||
gr_fecs_ctxsw_mailbox_value_f(0xc0de7777U));
|
||||
gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7),
|
||||
gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777U));
|
||||
}
|
||||
|
||||
/*
|
||||
* In case bootloader is not supported, revert to the old way of
|
||||
* loading gr ucode, without the faster bootstrap routine.
|
||||
*/
|
||||
if (!nvgpu_is_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP)) {
|
||||
gr_gk20a_load_falcon_dmem(g);
|
||||
gr_gk20a_load_falcon_imem(g);
|
||||
gr_gk20a_start_falcon_ucode(g);
|
||||
} else {
|
||||
if (!g->gr.skip_ucode_init) {
|
||||
err = nvgpu_gr_falcon_init_ctxsw_ucode(g);
|
||||
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
gr_gk20a_load_falcon_with_bootloader(g);
|
||||
g->gr.skip_ucode_init = true;
|
||||
}
|
||||
nvgpu_log_fn(g, "done");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gr_gk20a_wait_ctxsw_ready(struct gk20a *g)
|
||||
{
|
||||
int ret;
|
||||
@@ -1939,7 +1533,7 @@ static int gr_gk20a_init_ctxsw(struct gk20a *g)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
err = g->ops.gr.load_ctxsw_ucode(g);
|
||||
err = g->ops.gr.falcon.load_ctxsw_ucode(g);
|
||||
if (err != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -341,14 +341,6 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
|
||||
u32 mode);
|
||||
|
||||
void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data);
|
||||
int gr_gk20a_load_ctxsw_ucode(struct gk20a *g);
|
||||
void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g);
|
||||
void gr_gk20a_load_ctxsw_ucode_header(struct gk20a *g, u64 addr_base,
|
||||
struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
|
||||
void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base,
|
||||
struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
|
||||
|
||||
|
||||
void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg);
|
||||
int gr_gk20a_disable_ctxsw(struct gk20a *g);
|
||||
int gr_gk20a_enable_ctxsw(struct gk20a *g);
|
||||
|
||||
@@ -340,23 +340,6 @@ void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
|
||||
}
|
||||
}
|
||||
|
||||
void gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
|
||||
struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
|
||||
{
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(),
|
||||
gr_fecs_dmactl_require_ctx_f(0));
|
||||
|
||||
/* Copy falcon bootloader into dmem */
|
||||
gr_gk20a_load_ctxsw_ucode_header(g, addr_base, segments, reg_offset);
|
||||
gr_gk20a_load_ctxsw_ucode_boot(g, addr_base, segments, reg_offset);
|
||||
|
||||
/* start the falcon immediately if PRIV security is disabled*/
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
|
||||
gk20a_writel(g, reg_offset + gr_fecs_cpuctl_r(),
|
||||
gr_fecs_cpuctl_startcpu_f(0x01));
|
||||
}
|
||||
}
|
||||
|
||||
static bool gr_gm20b_is_tpc_addr_shared(struct gk20a *g, u32 addr)
|
||||
{
|
||||
u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
|
||||
@@ -394,122 +377,6 @@ u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void gr_gm20b_load_gpccs_with_bootloader(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
|
||||
u64 addr_base = ucode_info->surface_desc.gpu_va;
|
||||
|
||||
gr_gk20a_load_falcon_bind_instblk(g);
|
||||
|
||||
g->ops.gr.falcon_load_ucode(g, addr_base,
|
||||
&g->ctxsw_ucode_info.gpccs,
|
||||
gr_gpcs_gpccs_falcon_hwcfg_r() -
|
||||
gr_fecs_falcon_hwcfg_r());
|
||||
}
|
||||
|
||||
int gr_gm20b_load_ctxsw_ucode(struct gk20a *g)
|
||||
{
|
||||
int err = 0;
|
||||
u32 reg_offset = gr_gpcs_gpccs_falcon_hwcfg_r() -
|
||||
gr_fecs_falcon_hwcfg_r();
|
||||
u8 falcon_id_mask = 0;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
|
||||
gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7),
|
||||
gr_fecs_ctxsw_mailbox_value_f(0xc0de7777U));
|
||||
gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7),
|
||||
gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777U));
|
||||
}
|
||||
|
||||
g->pmu_lsf_loaded_falcon_id = 0;
|
||||
if (nvgpu_is_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE)) {
|
||||
/* this must be recovery so bootstrap fecs and gpccs */
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
|
||||
gr_gm20b_load_gpccs_with_bootloader(g);
|
||||
err = g->ops.pmu.load_lsfalcon_ucode(g,
|
||||
BIT32(FALCON_ID_FECS));
|
||||
} else {
|
||||
/* bind WPR VA inst block */
|
||||
gr_gk20a_load_falcon_bind_instblk(g);
|
||||
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) {
|
||||
err = nvgpu_sec2_bootstrap_ls_falcons(g, &g->sec2,
|
||||
FALCON_ID_FECS);
|
||||
err = nvgpu_sec2_bootstrap_ls_falcons(g, &g->sec2,
|
||||
FALCON_ID_GPCCS);
|
||||
} else if (g->support_ls_pmu) {
|
||||
err = g->ops.pmu.load_lsfalcon_ucode(g,
|
||||
BIT32(FALCON_ID_FECS) |
|
||||
BIT32(FALCON_ID_GPCCS));
|
||||
} else {
|
||||
err = nvgpu_acr_bootstrap_hs_acr(g, g->acr);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "GR Recovery: ACR GR LSF bootstrap failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Unable to recover GR falcon");
|
||||
return err;
|
||||
}
|
||||
|
||||
} else {
|
||||
/* cold boot or rg exit */
|
||||
nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, true);
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
|
||||
gr_gm20b_load_gpccs_with_bootloader(g);
|
||||
} else {
|
||||
/* bind WPR VA inst block */
|
||||
gr_gk20a_load_falcon_bind_instblk(g);
|
||||
if (nvgpu_acr_is_lsf_lazy_bootstrap(g, g->acr, FALCON_ID_FECS)) {
|
||||
falcon_id_mask |= BIT8(FALCON_ID_FECS);
|
||||
}
|
||||
if (nvgpu_acr_is_lsf_lazy_bootstrap(g, g->acr, FALCON_ID_GPCCS)) {
|
||||
falcon_id_mask |= BIT8(FALCON_ID_GPCCS);
|
||||
}
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) {
|
||||
err = nvgpu_sec2_bootstrap_ls_falcons(g, &g->sec2,
|
||||
FALCON_ID_FECS);
|
||||
err = nvgpu_sec2_bootstrap_ls_falcons(g, &g->sec2,
|
||||
FALCON_ID_GPCCS);
|
||||
} else if (g->support_ls_pmu) {
|
||||
err = g->ops.pmu.load_lsfalcon_ucode(g, falcon_id_mask);
|
||||
} else {
|
||||
/* GR falcons bootstrapped by ACR */
|
||||
err = 0;
|
||||
}
|
||||
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Unable to boot GPCCS");
|
||||
return err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*start gpccs */
|
||||
if (nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
|
||||
gk20a_writel(g, reg_offset +
|
||||
gr_fecs_cpuctl_alias_r(),
|
||||
gr_gpccs_cpuctl_startcpu_f(1U));
|
||||
} else {
|
||||
gk20a_writel(g, gr_gpccs_dmactl_r(),
|
||||
gr_gpccs_dmactl_require_ctx_f(0U));
|
||||
gk20a_writel(g, gr_gpccs_cpuctl_r(),
|
||||
gr_gpccs_cpuctl_startcpu_f(1U));
|
||||
}
|
||||
/* start fecs */
|
||||
gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0U), ~U32(0U));
|
||||
gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(1U), 1U);
|
||||
gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(6U), 0xffffffffU);
|
||||
gk20a_writel(g, gr_fecs_cpuctl_alias_r(),
|
||||
gr_fecs_cpuctl_startcpu_f(1U));
|
||||
nvgpu_log_fn(g, "done");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void gr_gm20b_detect_sm_arch(struct gk20a *g)
|
||||
{
|
||||
u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());
|
||||
|
||||
@@ -68,7 +68,6 @@ void gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
|
||||
struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
|
||||
bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr);
|
||||
u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr);
|
||||
int gr_gm20b_load_ctxsw_ucode(struct gk20a *g);
|
||||
void gr_gm20b_detect_sm_arch(struct gk20a *g);
|
||||
int gr_gm20b_init_ctxsw_preemption_mode(struct gk20a *g,
|
||||
struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
#include <nvgpu/regops.h>
|
||||
#include <nvgpu/gr/zbc.h>
|
||||
#include <nvgpu/gr/zcull.h>
|
||||
#include <nvgpu/gr/gr_falcon.h>
|
||||
|
||||
#include "hal/bus/bus_gm20b.h"
|
||||
#include "hal/bus/bus_gk20a.h"
|
||||
@@ -252,8 +253,6 @@ static const struct gpu_ops gm20b_ops = {
|
||||
.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs,
|
||||
.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs,
|
||||
.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask,
|
||||
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
|
||||
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
|
||||
.set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask,
|
||||
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
|
||||
.is_tpc_addr = gr_gm20b_is_tpc_addr,
|
||||
@@ -488,6 +487,23 @@ static const struct gpu_ops gm20b_ops = {
|
||||
gm20b_gr_falcon_get_fecs_ctxsw_mailbox_size,
|
||||
.get_fecs_ctx_state_store_major_rev_id =
|
||||
gm20b_gr_falcon_get_fecs_ctx_state_store_major_rev_id,
|
||||
.load_gpccs_dmem = gm20b_gr_falcon_load_gpccs_dmem,
|
||||
.load_fecs_dmem = gm20b_gr_falcon_load_fecs_dmem,
|
||||
.load_gpccs_imem = gm20b_gr_falcon_load_gpccs_imem,
|
||||
.load_fecs_imem = gm20b_gr_falcon_load_fecs_imem,
|
||||
.configure_fmodel = gm20b_gr_falcon_configure_fmodel,
|
||||
.start_ucode = gm20b_gr_falcon_start_ucode,
|
||||
.start_gpccs = gm20b_gr_falcon_start_gpccs,
|
||||
.start_fecs = gm20b_gr_falcon_start_fecs,
|
||||
.get_gpccs_start_reg_offset =
|
||||
gm20b_gr_falcon_get_gpccs_start_reg_offset,
|
||||
.bind_instblk = gm20b_gr_falcon_bind_instblk,
|
||||
.load_ctxsw_ucode_header =
|
||||
gm20b_gr_falcon_load_ctxsw_ucode_header,
|
||||
.load_ctxsw_ucode_boot =
|
||||
gm20b_gr_falcon_load_ctxsw_ucode_boot,
|
||||
.load_ctxsw_ucode =
|
||||
nvgpu_gr_falcon_load_ctxsw_ucode,
|
||||
},
|
||||
},
|
||||
.fb = {
|
||||
@@ -1028,7 +1044,8 @@ int gm20b_init_hal(struct gk20a *g)
|
||||
gops->pmu.init_wpr_region = gm20b_pmu_init_acr;
|
||||
gops->pmu.load_lsfalcon_ucode = gm20b_load_falcon_ucode;
|
||||
|
||||
gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode;
|
||||
gops->gr.falcon.load_ctxsw_ucode =
|
||||
nvgpu_gr_falcon_load_secure_ctxsw_ucode;
|
||||
} else {
|
||||
/* Inherit from gk20a */
|
||||
gops->pmu.pmu_setup_hw_and_bootstrap =
|
||||
@@ -1037,8 +1054,6 @@ int gm20b_init_hal(struct gk20a *g)
|
||||
|
||||
gops->pmu.load_lsfalcon_ucode = NULL;
|
||||
gops->pmu.init_wpr_region = NULL;
|
||||
|
||||
gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
|
||||
}
|
||||
|
||||
nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, false);
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
#include <nvgpu/regops.h>
|
||||
#include <nvgpu/gr/zbc.h>
|
||||
#include <nvgpu/gr/zcull.h>
|
||||
#include <nvgpu/gr/gr_falcon.h>
|
||||
#include <nvgpu/gr/fecs_trace.h>
|
||||
|
||||
#include "hal/bus/bus_gk20a.h"
|
||||
@@ -277,8 +278,6 @@ static const struct gpu_ops gp10b_ops = {
|
||||
.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs,
|
||||
.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs,
|
||||
.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask,
|
||||
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
|
||||
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
|
||||
.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask,
|
||||
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
|
||||
.is_tpc_addr = gr_gm20b_is_tpc_addr,
|
||||
@@ -566,6 +565,23 @@ static const struct gpu_ops gp10b_ops = {
|
||||
gm20b_gr_falcon_get_fecs_ctxsw_mailbox_size,
|
||||
.get_fecs_ctx_state_store_major_rev_id =
|
||||
gm20b_gr_falcon_get_fecs_ctx_state_store_major_rev_id,
|
||||
.load_gpccs_dmem = gm20b_gr_falcon_load_gpccs_dmem,
|
||||
.load_fecs_dmem = gm20b_gr_falcon_load_fecs_dmem,
|
||||
.load_gpccs_imem = gm20b_gr_falcon_load_gpccs_imem,
|
||||
.load_fecs_imem = gm20b_gr_falcon_load_fecs_imem,
|
||||
.configure_fmodel = gm20b_gr_falcon_configure_fmodel,
|
||||
.start_ucode = gm20b_gr_falcon_start_ucode,
|
||||
.start_gpccs = gm20b_gr_falcon_start_gpccs,
|
||||
.start_fecs = gm20b_gr_falcon_start_fecs,
|
||||
.get_gpccs_start_reg_offset =
|
||||
gm20b_gr_falcon_get_gpccs_start_reg_offset,
|
||||
.bind_instblk = gm20b_gr_falcon_bind_instblk,
|
||||
.load_ctxsw_ucode_header =
|
||||
gm20b_gr_falcon_load_ctxsw_ucode_header,
|
||||
.load_ctxsw_ucode_boot =
|
||||
gm20b_gr_falcon_load_ctxsw_ucode_boot,
|
||||
.load_ctxsw_ucode =
|
||||
nvgpu_gr_falcon_load_ctxsw_ucode,
|
||||
},
|
||||
},
|
||||
.fb = {
|
||||
@@ -1107,7 +1123,8 @@ int gp10b_init_hal(struct gk20a *g)
|
||||
gops->pmu.init_wpr_region = gm20b_pmu_init_acr;
|
||||
gops->pmu.load_lsfalcon_ucode = gp10b_load_falcon_ucode;
|
||||
|
||||
gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode;
|
||||
gops->gr.falcon.load_ctxsw_ucode =
|
||||
nvgpu_gr_falcon_load_secure_ctxsw_ucode;
|
||||
} else {
|
||||
/* Inherit from gk20a */
|
||||
gops->pmu.pmu_setup_hw_and_bootstrap =
|
||||
@@ -1117,7 +1134,6 @@ int gp10b_init_hal(struct gk20a *g)
|
||||
gops->pmu.load_lsfalcon_ucode = NULL;
|
||||
gops->pmu.init_wpr_region = NULL;
|
||||
|
||||
gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
|
||||
}
|
||||
|
||||
nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, false);
|
||||
|
||||
@@ -62,6 +62,7 @@
|
||||
#include "hal/gr/fecs_trace/fecs_trace_gm20b.h"
|
||||
#include "hal/gr/config/gr_config_gm20b.h"
|
||||
#include "hal/gr/config/gr_config_gv100.h"
|
||||
#include "hal/gr/falcon/gr_falcon_gm20b.h"
|
||||
#include "hal/gr/zbc/zbc_gp10b.h"
|
||||
#include "hal/gr/zbc/zbc_gv11b.h"
|
||||
#include "hal/gr/init/gr_init_gm20b.h"
|
||||
@@ -72,7 +73,6 @@
|
||||
#include "hal/gr/intr/gr_intr_gv11b.h"
|
||||
#include "hal/gr/zcull/zcull_gm20b.h"
|
||||
#include "hal/gr/zcull/zcull_gv11b.h"
|
||||
#include "hal/gr/falcon/gr_falcon_gm20b.h"
|
||||
#include "hal/gr/hwpm_map/hwpm_map_gv100.h"
|
||||
#include "hal/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
||||
#include "hal/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
|
||||
@@ -166,6 +166,7 @@
|
||||
#include <nvgpu/regops.h>
|
||||
#include <nvgpu/gr/zbc.h>
|
||||
#include <nvgpu/gr/zcull.h>
|
||||
#include <nvgpu/gr/gr_falcon.h>
|
||||
#include <nvgpu/gr/fecs_trace.h>
|
||||
|
||||
#include <nvgpu/hw/gv100/hw_proj_gv100.h>
|
||||
@@ -389,8 +390,6 @@ static const struct gpu_ops gv100_ops = {
|
||||
.get_sm_dsm_perf_regs = gv11b_gr_get_sm_dsm_perf_regs,
|
||||
.get_sm_dsm_perf_ctrl_regs = gv11b_gr_get_sm_dsm_perf_ctrl_regs,
|
||||
.set_hww_esr_report_mask = gv11b_gr_set_hww_esr_report_mask,
|
||||
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
|
||||
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
|
||||
.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
|
||||
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
|
||||
.is_tpc_addr = gr_gm20b_is_tpc_addr,
|
||||
@@ -710,6 +709,23 @@ static const struct gpu_ops gv100_ops = {
|
||||
gm20b_gr_falcon_get_fecs_ctxsw_mailbox_size,
|
||||
.get_fecs_ctx_state_store_major_rev_id =
|
||||
gm20b_gr_falcon_get_fecs_ctx_state_store_major_rev_id,
|
||||
.load_gpccs_dmem = gm20b_gr_falcon_load_gpccs_dmem,
|
||||
.load_fecs_dmem = gm20b_gr_falcon_load_fecs_dmem,
|
||||
.load_gpccs_imem = gm20b_gr_falcon_load_gpccs_imem,
|
||||
.load_fecs_imem = gm20b_gr_falcon_load_fecs_imem,
|
||||
.configure_fmodel = gm20b_gr_falcon_configure_fmodel,
|
||||
.start_ucode = gm20b_gr_falcon_start_ucode,
|
||||
.start_gpccs = gm20b_gr_falcon_start_gpccs,
|
||||
.start_fecs = gm20b_gr_falcon_start_fecs,
|
||||
.get_gpccs_start_reg_offset =
|
||||
gm20b_gr_falcon_get_gpccs_start_reg_offset,
|
||||
.bind_instblk = gm20b_gr_falcon_bind_instblk,
|
||||
.load_ctxsw_ucode_header =
|
||||
gm20b_gr_falcon_load_ctxsw_ucode_header,
|
||||
.load_ctxsw_ucode_boot =
|
||||
gm20b_gr_falcon_load_ctxsw_ucode_boot,
|
||||
.load_ctxsw_ucode =
|
||||
nvgpu_gr_falcon_load_secure_ctxsw_ucode,
|
||||
},
|
||||
},
|
||||
.fb = {
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/fuse.h>
|
||||
#include <nvgpu/regops.h>
|
||||
#include <nvgpu/gr/gr_falcon.h>
|
||||
|
||||
#include "hal/bus/bus_gk20a.h"
|
||||
#include "hal/bus/bus_gp10b.h"
|
||||
@@ -340,8 +341,6 @@ static const struct gpu_ops gv11b_ops = {
|
||||
.get_sm_dsm_perf_regs = gv11b_gr_get_sm_dsm_perf_regs,
|
||||
.get_sm_dsm_perf_ctrl_regs = gv11b_gr_get_sm_dsm_perf_ctrl_regs,
|
||||
.set_hww_esr_report_mask = gv11b_gr_set_hww_esr_report_mask,
|
||||
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
|
||||
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
|
||||
.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask,
|
||||
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
|
||||
.is_tpc_addr = gr_gm20b_is_tpc_addr,
|
||||
@@ -669,6 +668,23 @@ static const struct gpu_ops gv11b_ops = {
|
||||
gm20b_gr_falcon_get_fecs_ctxsw_mailbox_size,
|
||||
.get_fecs_ctx_state_store_major_rev_id =
|
||||
gm20b_gr_falcon_get_fecs_ctx_state_store_major_rev_id,
|
||||
.load_gpccs_dmem = gm20b_gr_falcon_load_gpccs_dmem,
|
||||
.load_fecs_dmem = gm20b_gr_falcon_load_fecs_dmem,
|
||||
.load_gpccs_imem = gm20b_gr_falcon_load_gpccs_imem,
|
||||
.load_fecs_imem = gm20b_gr_falcon_load_fecs_imem,
|
||||
.configure_fmodel = gm20b_gr_falcon_configure_fmodel,
|
||||
.start_ucode = gm20b_gr_falcon_start_ucode,
|
||||
.start_gpccs = gm20b_gr_falcon_start_gpccs,
|
||||
.start_fecs = gm20b_gr_falcon_start_fecs,
|
||||
.get_gpccs_start_reg_offset =
|
||||
gm20b_gr_falcon_get_gpccs_start_reg_offset,
|
||||
.bind_instblk = gm20b_gr_falcon_bind_instblk,
|
||||
.load_ctxsw_ucode_header =
|
||||
gm20b_gr_falcon_load_ctxsw_ucode_header,
|
||||
.load_ctxsw_ucode_boot =
|
||||
gm20b_gr_falcon_load_ctxsw_ucode_boot,
|
||||
.load_ctxsw_ucode =
|
||||
nvgpu_gr_falcon_load_ctxsw_ucode,
|
||||
},
|
||||
},
|
||||
.fb = {
|
||||
@@ -1257,7 +1273,8 @@ int gv11b_init_hal(struct gk20a *g)
|
||||
|
||||
/* priv security dependent ops */
|
||||
if (nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
|
||||
gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode;
|
||||
gops->gr.falcon.load_ctxsw_ucode =
|
||||
nvgpu_gr_falcon_load_secure_ctxsw_ucode;
|
||||
} else {
|
||||
/* non-secure boot */
|
||||
gops->pmu.pmu_nsbootstrap = gv11b_pmu_bootstrap;
|
||||
@@ -1267,7 +1284,6 @@ int gv11b_init_hal(struct gk20a *g)
|
||||
gops->pmu.load_lsfalcon_ucode = NULL;
|
||||
gops->pmu.init_wpr_region = NULL;
|
||||
|
||||
gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
|
||||
}
|
||||
|
||||
nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/gr/gr_falcon.h>
|
||||
#include <nvgpu/io.h>
|
||||
#include <nvgpu/debug.h>
|
||||
|
||||
@@ -28,6 +29,402 @@
|
||||
|
||||
#include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
|
||||
|
||||
#define FECS_ARB_CMD_TIMEOUT_MAX_US 40U
|
||||
#define FECS_ARB_CMD_TIMEOUT_DEFAULT_US 2U
|
||||
|
||||
void gm20b_gr_falcon_load_gpccs_dmem(struct gk20a *g,
|
||||
const u32 *ucode_u32_data, u32 ucode_u32_size)
|
||||
{
|
||||
u32 i, checksum;
|
||||
|
||||
/* enable access for gpccs dmem */
|
||||
nvgpu_writel(g, gr_gpccs_dmemc_r(0), (gr_gpccs_dmemc_offs_f(0) |
|
||||
gr_gpccs_dmemc_blk_f(0) |
|
||||
gr_gpccs_dmemc_aincw_f(1)));
|
||||
|
||||
for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
|
||||
nvgpu_writel(g, gr_gpccs_dmemd_r(0), ucode_u32_data[i]);
|
||||
checksum += ucode_u32_data[i];
|
||||
}
|
||||
nvgpu_log_info(g, "gpccs dmem checksum: 0x%x", checksum);
|
||||
}
|
||||
|
||||
void gm20b_gr_falcon_load_fecs_dmem(struct gk20a *g,
|
||||
const u32 *ucode_u32_data, u32 ucode_u32_size)
|
||||
{
|
||||
u32 i, checksum;
|
||||
|
||||
/* set access for fecs dmem */
|
||||
nvgpu_writel(g, gr_fecs_dmemc_r(0), (gr_fecs_dmemc_offs_f(0) |
|
||||
gr_fecs_dmemc_blk_f(0) |
|
||||
gr_fecs_dmemc_aincw_f(1)));
|
||||
|
||||
for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
|
||||
nvgpu_writel(g, gr_fecs_dmemd_r(0), ucode_u32_data[i]);
|
||||
checksum += ucode_u32_data[i];
|
||||
}
|
||||
nvgpu_log_info(g, "fecs dmem checksum: 0x%x", checksum);
|
||||
}
|
||||
|
||||
void gm20b_gr_falcon_load_gpccs_imem(struct gk20a *g,
|
||||
const u32 *ucode_u32_data, u32 ucode_u32_size)
|
||||
{
|
||||
u32 cfg, gpccs_imem_size;
|
||||
u32 tag, i, pad_start, pad_end;
|
||||
u32 checksum;
|
||||
|
||||
/* enable access for gpccs imem */
|
||||
nvgpu_writel(g, gr_gpccs_imemc_r(0), (gr_gpccs_imemc_offs_f(0) |
|
||||
gr_gpccs_imemc_blk_f(0) |
|
||||
gr_gpccs_imemc_aincw_f(1)));
|
||||
|
||||
cfg = nvgpu_readl(g, gr_gpc0_cfg_r());
|
||||
gpccs_imem_size = gr_gpc0_cfg_imem_sz_v(cfg);
|
||||
|
||||
/* Setup the tags for the instruction memory. */
|
||||
tag = 0;
|
||||
nvgpu_writel(g, gr_gpccs_imemt_r(0), gr_gpccs_imemt_tag_f(tag));
|
||||
|
||||
for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
|
||||
if ((i != 0U) && ((i % (256U/sizeof(u32))) == 0U)) {
|
||||
tag++;
|
||||
nvgpu_writel(g, gr_gpccs_imemt_r(0),
|
||||
gr_gpccs_imemt_tag_f(tag));
|
||||
}
|
||||
nvgpu_writel(g, gr_gpccs_imemd_r(0), ucode_u32_data[i]);
|
||||
checksum += ucode_u32_data[i];
|
||||
}
|
||||
|
||||
pad_start = i * 4U;
|
||||
pad_end = pad_start + (256U - pad_start % 256U) + 256U;
|
||||
for (i = pad_start;
|
||||
(i < gpccs_imem_size * 256U) && (i < pad_end); i += 4U) {
|
||||
if ((i != 0U) && ((i % 256U) == 0U)) {
|
||||
tag++;
|
||||
nvgpu_writel(g, gr_gpccs_imemt_r(0),
|
||||
gr_gpccs_imemt_tag_f(tag));
|
||||
}
|
||||
nvgpu_writel(g, gr_gpccs_imemd_r(0), 0);
|
||||
}
|
||||
|
||||
nvgpu_log_info(g, "gpccs imem checksum: 0x%x", checksum);
|
||||
}
|
||||
|
||||
void gm20b_gr_falcon_load_fecs_imem(struct gk20a *g,
|
||||
const u32 *ucode_u32_data, u32 ucode_u32_size)
|
||||
{
|
||||
u32 cfg, fecs_imem_size;
|
||||
u32 tag, i, pad_start, pad_end;
|
||||
u32 checksum;
|
||||
|
||||
/* set access for fecs imem */
|
||||
nvgpu_writel(g, gr_fecs_imemc_r(0), (gr_fecs_imemc_offs_f(0) |
|
||||
gr_fecs_imemc_blk_f(0) |
|
||||
gr_fecs_imemc_aincw_f(1)));
|
||||
|
||||
cfg = nvgpu_readl(g, gr_fecs_cfg_r());
|
||||
fecs_imem_size = gr_fecs_cfg_imem_sz_v(cfg);
|
||||
|
||||
/* Setup the tags for the instruction memory. */
|
||||
tag = 0;
|
||||
nvgpu_writel(g, gr_fecs_imemt_r(0), gr_fecs_imemt_tag_f(tag));
|
||||
|
||||
for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
|
||||
if ((i != 0U) && ((i % (256U/sizeof(u32))) == 0U)) {
|
||||
tag++;
|
||||
nvgpu_writel(g, gr_fecs_imemt_r(0),
|
||||
gr_fecs_imemt_tag_f(tag));
|
||||
}
|
||||
nvgpu_writel(g, gr_fecs_imemd_r(0), ucode_u32_data[i]);
|
||||
checksum += ucode_u32_data[i];
|
||||
}
|
||||
|
||||
pad_start = i * 4U;
|
||||
pad_end = pad_start + (256U - pad_start % 256U) + 256U;
|
||||
for (i = pad_start;
|
||||
(i < fecs_imem_size * 256U) && i < pad_end;
|
||||
i += 4U) {
|
||||
if ((i != 0U) && ((i % 256U) == 0U)) {
|
||||
tag++;
|
||||
nvgpu_writel(g, gr_fecs_imemt_r(0),
|
||||
gr_fecs_imemt_tag_f(tag));
|
||||
}
|
||||
nvgpu_writel(g, gr_fecs_imemd_r(0), 0);
|
||||
}
|
||||
nvgpu_log_info(g, "fecs imem checksum: 0x%x", checksum);
|
||||
}
|
||||
|
||||
u32 gm20b_gr_falcon_get_gpccs_start_reg_offset(void)
|
||||
{
|
||||
return (gr_gpcs_gpccs_falcon_hwcfg_r() - gr_fecs_falcon_hwcfg_r());
|
||||
}
|
||||
|
||||
void gm20b_gr_falcon_configure_fmodel(struct gk20a *g)
|
||||
{
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
nvgpu_writel(g, gr_fecs_ctxsw_mailbox_r(7),
|
||||
gr_fecs_ctxsw_mailbox_value_f(0xc0de7777U));
|
||||
nvgpu_writel(g, gr_gpccs_ctxsw_mailbox_r(7),
|
||||
gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777U));
|
||||
|
||||
}
|
||||
|
||||
void gm20b_gr_falcon_start_ucode(struct gk20a *g)
|
||||
{
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
nvgpu_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0U),
|
||||
gr_fecs_ctxsw_mailbox_clear_value_f(~U32(0U)));
|
||||
|
||||
nvgpu_writel(g, gr_gpccs_dmactl_r(), gr_gpccs_dmactl_require_ctx_f(0U));
|
||||
nvgpu_writel(g, gr_fecs_dmactl_r(), gr_fecs_dmactl_require_ctx_f(0U));
|
||||
|
||||
nvgpu_writel(g, gr_gpccs_cpuctl_r(), gr_gpccs_cpuctl_startcpu_f(1U));
|
||||
nvgpu_writel(g, gr_fecs_cpuctl_r(), gr_fecs_cpuctl_startcpu_f(1U));
|
||||
|
||||
nvgpu_log_fn(g, "done");
|
||||
}
|
||||
|
||||
|
||||
void gm20b_gr_falcon_start_gpccs(struct gk20a *g)
|
||||
{
|
||||
u32 reg_offset = gr_gpcs_gpccs_falcon_hwcfg_r() -
|
||||
gr_fecs_falcon_hwcfg_r();
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
|
||||
nvgpu_writel(g, reg_offset +
|
||||
gr_fecs_cpuctl_alias_r(),
|
||||
gr_gpccs_cpuctl_startcpu_f(1U));
|
||||
} else {
|
||||
nvgpu_writel(g, gr_gpccs_dmactl_r(),
|
||||
gr_gpccs_dmactl_require_ctx_f(0U));
|
||||
nvgpu_writel(g, gr_gpccs_cpuctl_r(),
|
||||
gr_gpccs_cpuctl_startcpu_f(1U));
|
||||
}
|
||||
}
|
||||
|
||||
void gm20b_gr_falcon_start_fecs(struct gk20a *g)
|
||||
{
|
||||
nvgpu_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0U), ~U32(0U));
|
||||
nvgpu_writel(g, gr_fecs_ctxsw_mailbox_r(1U), 1U);
|
||||
nvgpu_writel(g, gr_fecs_ctxsw_mailbox_clear_r(6U), 0xffffffffU);
|
||||
nvgpu_writel(g, gr_fecs_cpuctl_alias_r(),
|
||||
gr_fecs_cpuctl_startcpu_f(1U));
|
||||
}
|
||||
|
||||
static void gm20b_gr_falcon_wait_for_fecs_arb_idle(struct gk20a *g)
|
||||
{
|
||||
int retries = FECS_ARB_CMD_TIMEOUT_MAX_US /
|
||||
FECS_ARB_CMD_TIMEOUT_DEFAULT_US;
|
||||
u32 val;
|
||||
|
||||
val = nvgpu_readl(g, gr_fecs_arb_ctx_cmd_r());
|
||||
while ((gr_fecs_arb_ctx_cmd_cmd_v(val) != 0U) && (retries != 0)) {
|
||||
nvgpu_udelay(FECS_ARB_CMD_TIMEOUT_DEFAULT_US);
|
||||
retries--;
|
||||
val = nvgpu_readl(g, gr_fecs_arb_ctx_cmd_r());
|
||||
}
|
||||
|
||||
if (retries == 0) {
|
||||
nvgpu_err(g, "arbiter cmd timeout, fecs arb ctx cmd: 0x%08x",
|
||||
nvgpu_readl(g, gr_fecs_arb_ctx_cmd_r()));
|
||||
}
|
||||
|
||||
retries = FECS_ARB_CMD_TIMEOUT_MAX_US /
|
||||
FECS_ARB_CMD_TIMEOUT_DEFAULT_US;
|
||||
while (((nvgpu_readl(g, gr_fecs_ctxsw_status_1_r()) &
|
||||
gr_fecs_ctxsw_status_1_arb_busy_m()) != 0U) &&
|
||||
(retries != 0)) {
|
||||
nvgpu_udelay(FECS_ARB_CMD_TIMEOUT_DEFAULT_US);
|
||||
retries--;
|
||||
}
|
||||
if (retries == 0) {
|
||||
nvgpu_err(g,
|
||||
"arbiter idle timeout, fecs ctxsw status: 0x%08x",
|
||||
nvgpu_readl(g, gr_fecs_ctxsw_status_1_r()));
|
||||
}
|
||||
}
|
||||
|
||||
void gm20b_gr_falcon_bind_instblk(struct gk20a *g,
|
||||
struct nvgpu_mem *mem, u64 inst_ptr)
|
||||
{
|
||||
u32 retries = FECS_ARB_CMD_TIMEOUT_MAX_US /
|
||||
FECS_ARB_CMD_TIMEOUT_DEFAULT_US;
|
||||
u32 inst_ptr_u32;
|
||||
|
||||
nvgpu_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0x0);
|
||||
|
||||
while (((nvgpu_readl(g, gr_fecs_ctxsw_status_1_r()) &
|
||||
gr_fecs_ctxsw_status_1_arb_busy_m()) != 0U) &&
|
||||
(retries != 0)) {
|
||||
nvgpu_udelay(FECS_ARB_CMD_TIMEOUT_DEFAULT_US);
|
||||
retries--;
|
||||
}
|
||||
if (retries == 0) {
|
||||
nvgpu_err(g,
|
||||
"arbiter idle timeout, status: %08x",
|
||||
nvgpu_readl(g, gr_fecs_ctxsw_status_1_r()));
|
||||
}
|
||||
|
||||
nvgpu_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0);
|
||||
|
||||
inst_ptr >>= 12;
|
||||
BUG_ON(u64_hi32(inst_ptr) != 0U);
|
||||
inst_ptr_u32 = (u32)inst_ptr;
|
||||
nvgpu_writel(g, gr_fecs_new_ctx_r(),
|
||||
gr_fecs_new_ctx_ptr_f(inst_ptr_u32) |
|
||||
nvgpu_aperture_mask(g, mem,
|
||||
gr_fecs_new_ctx_target_sys_mem_ncoh_f(),
|
||||
gr_fecs_new_ctx_target_sys_mem_coh_f(),
|
||||
gr_fecs_new_ctx_target_vid_mem_f()) |
|
||||
gr_fecs_new_ctx_valid_m());
|
||||
|
||||
nvgpu_writel(g, gr_fecs_arb_ctx_ptr_r(),
|
||||
gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr_u32) |
|
||||
nvgpu_aperture_mask(g, mem,
|
||||
gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(),
|
||||
gr_fecs_arb_ctx_ptr_target_sys_mem_coh_f(),
|
||||
gr_fecs_arb_ctx_ptr_target_vid_mem_f()));
|
||||
|
||||
nvgpu_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7);
|
||||
|
||||
/* Wait for arbiter command to complete */
|
||||
gm20b_gr_falcon_wait_for_fecs_arb_idle(g);
|
||||
|
||||
nvgpu_writel(g, gr_fecs_current_ctx_r(),
|
||||
gr_fecs_current_ctx_ptr_f(inst_ptr_u32) |
|
||||
gr_fecs_current_ctx_target_m() |
|
||||
gr_fecs_current_ctx_valid_m());
|
||||
/* Send command to arbiter to flush */
|
||||
nvgpu_writel(g, gr_fecs_arb_ctx_cmd_r(), gr_fecs_arb_ctx_cmd_cmd_s());
|
||||
|
||||
gm20b_gr_falcon_wait_for_fecs_arb_idle(g);
|
||||
|
||||
}
|
||||
|
||||
void gm20b_gr_falcon_load_ctxsw_ucode_header(struct gk20a *g,
|
||||
u32 reg_offset, u32 boot_signature, u32 addr_code32,
|
||||
u32 addr_data32, u32 code_size, u32 data_size)
|
||||
{
|
||||
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmactl_r(),
|
||||
gr_fecs_dmactl_require_ctx_f(0));
|
||||
|
||||
/*
|
||||
* Copy falcon bootloader header into dmem at offset 0.
|
||||
* Configure dmem port 0 for auto-incrementing writes starting at dmem
|
||||
* offset 0.
|
||||
*/
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemc_r(0),
|
||||
gr_fecs_dmemc_offs_f(0) |
|
||||
gr_fecs_dmemc_blk_f(0) |
|
||||
gr_fecs_dmemc_aincw_f(1));
|
||||
|
||||
/* Write out the actual data */
|
||||
switch (boot_signature) {
|
||||
case FALCON_UCODE_SIG_T18X_GPCCS_WITH_RESERVED:
|
||||
case FALCON_UCODE_SIG_T21X_FECS_WITH_DMEM_SIZE:
|
||||
case FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED:
|
||||
case FALCON_UCODE_SIG_T21X_GPCCS_WITH_RESERVED:
|
||||
case FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED:
|
||||
case FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED:
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
/* fallthrough */
|
||||
case FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED:
|
||||
case FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED:
|
||||
case FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED:
|
||||
case FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2:
|
||||
case FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED:
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 4);
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0),
|
||||
addr_code32);
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0),
|
||||
code_size);
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0),
|
||||
addr_data32);
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0),
|
||||
data_size);
|
||||
break;
|
||||
case FALCON_UCODE_SIG_T12X_FECS_OLDER:
|
||||
case FALCON_UCODE_SIG_T12X_GPCCS_OLDER:
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0),
|
||||
addr_code32);
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0),
|
||||
code_size);
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0),
|
||||
addr_data32);
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0),
|
||||
data_size);
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0),
|
||||
addr_code32);
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
break;
|
||||
default:
|
||||
nvgpu_err(g,
|
||||
"unknown falcon ucode boot signature 0x%08x"
|
||||
" with reg_offset 0x%08x",
|
||||
boot_signature, reg_offset);
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
void gm20b_gr_falcon_load_ctxsw_ucode_boot(struct gk20a *g, u32 reg_offset,
|
||||
u32 boot_entry, u32 addr_load32, u32 blocks, u32 dst)
|
||||
{
|
||||
u32 b;
|
||||
|
||||
/*
|
||||
* Set the base FB address for the DMA transfer. Subtract off the 256
|
||||
* byte IMEM block offset such that the relative FB and IMEM offsets
|
||||
* match, allowing the IMEM tags to be properly created.
|
||||
*/
|
||||
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmatrfbase_r(),
|
||||
(addr_load32 - (dst >> 8)));
|
||||
|
||||
for (b = 0; b < blocks; b++) {
|
||||
/* Setup destination IMEM offset */
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmatrfmoffs_r(),
|
||||
dst + (b << 8));
|
||||
|
||||
/* Setup source offset (relative to BASE) */
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmatrffboffs_r(),
|
||||
dst + (b << 8));
|
||||
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_dmatrfcmd_r(),
|
||||
gr_fecs_dmatrfcmd_imem_f(0x01) |
|
||||
gr_fecs_dmatrfcmd_write_f(0x00) |
|
||||
gr_fecs_dmatrfcmd_size_f(0x06) |
|
||||
gr_fecs_dmatrfcmd_ctxdma_f(0));
|
||||
}
|
||||
|
||||
/* Specify the falcon boot vector */
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_bootvec_r(),
|
||||
gr_fecs_bootvec_vec_f(boot_entry));
|
||||
|
||||
/* start the falcon immediately if PRIV security is disabled*/
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
|
||||
nvgpu_writel(g, reg_offset + gr_fecs_cpuctl_r(),
|
||||
gr_fecs_cpuctl_startcpu_f(0x01));
|
||||
}
|
||||
}
|
||||
|
||||
u32 gm20b_gr_falcon_fecs_base_addr(void)
|
||||
{
|
||||
return gr_fecs_irqsset_r();
|
||||
|
||||
@@ -32,5 +32,26 @@ u32 gm20b_gr_falcon_gpccs_base_addr(void);
|
||||
void gm20b_gr_falcon_fecs_dump_stats(struct gk20a *g);
|
||||
u32 gm20b_gr_falcon_get_fecs_ctx_state_store_major_rev_id(struct gk20a *g);
|
||||
u32 gm20b_gr_falcon_get_fecs_ctxsw_mailbox_size(void);
|
||||
void gm20b_gr_falcon_load_gpccs_dmem(struct gk20a *g,
|
||||
const u32 *ucode_u32_data, u32 ucode_u32_size);
|
||||
void gm20b_gr_falcon_load_fecs_dmem(struct gk20a *g,
|
||||
const u32 *ucode_u32_data, u32 ucode_u32_size);
|
||||
void gm20b_gr_falcon_load_gpccs_imem(struct gk20a *g,
|
||||
const u32 *ucode_u32_data, u32 ucode_u32_size);
|
||||
void gm20b_gr_falcon_load_fecs_imem(struct gk20a *g,
|
||||
const u32 *ucode_u32_data, u32 ucode_u32_size);
|
||||
void gm20b_gr_falcon_configure_fmodel(struct gk20a *g);
|
||||
void gm20b_gr_falcon_start_ucode(struct gk20a *g);
|
||||
void gm20b_gr_falcon_start_gpccs(struct gk20a *g);
|
||||
void gm20b_gr_falcon_start_fecs(struct gk20a *g);
|
||||
u32 gm20b_gr_falcon_get_gpccs_start_reg_offset(void);
|
||||
void gm20b_gr_falcon_bind_instblk(struct gk20a *g,
|
||||
struct nvgpu_mem *mem, u64 inst_ptr);
|
||||
void gm20b_gr_falcon_load_ctxsw_ucode_header(struct gk20a *g,
|
||||
u32 reg_offset, u32 boot_signature, u32 addr_code32,
|
||||
u32 addr_data32, u32 code_size, u32 data_size);
|
||||
void gm20b_gr_falcon_load_ctxsw_ucode_boot(struct gk20a *g,
|
||||
u32 reg_offset, u32 boot_entry, u32 addr_load32, u32 blocks,
|
||||
u32 dst);
|
||||
|
||||
#endif /* NVGPU_GR_FALCON_GM20B_H */
|
||||
|
||||
@@ -285,11 +285,6 @@ struct gpu_ops {
|
||||
u32 *num_ovr_perf_regs,
|
||||
u32 **ovr_perf_regsr);
|
||||
void (*set_hww_esr_report_mask)(struct gk20a *g);
|
||||
void (*falcon_load_ucode)(struct gk20a *g,
|
||||
u64 addr_base,
|
||||
struct gk20a_ctxsw_ucode_segments *segments,
|
||||
u32 reg_offset);
|
||||
int (*load_ctxsw_ucode)(struct gk20a *g);
|
||||
void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index);
|
||||
int (*alloc_obj_ctx)(struct channel_gk20a *c,
|
||||
u32 class_num, u32 flags);
|
||||
@@ -574,9 +569,33 @@ struct gpu_ops {
|
||||
u32 (*gpccs_base_addr)(void);
|
||||
void (*dump_stats)(struct gk20a *g);
|
||||
u32 (*fecs_ctxsw_mailbox_size)(void);
|
||||
u32 (*get_fecs_ctx_state_store_major_rev_id)
|
||||
(struct gk20a *g);
|
||||
u32 (*get_fecs_ctx_state_store_major_rev_id)(
|
||||
struct gk20a *g);
|
||||
void (*load_gpccs_dmem)(struct gk20a *g,
|
||||
const u32 *ucode_u32_data, u32 size);
|
||||
void (*load_fecs_dmem)(struct gk20a *g,
|
||||
const u32 *ucode_u32_data, u32 size);
|
||||
void (*load_gpccs_imem)(struct gk20a *g,
|
||||
const u32 *ucode_u32_data, u32 size);
|
||||
void (*load_fecs_imem)(struct gk20a *g,
|
||||
const u32 *ucode_u32_data, u32 size);
|
||||
void (*configure_fmodel)(struct gk20a *g);
|
||||
void (*start_ucode)(struct gk20a *g);
|
||||
void (*start_gpccs)(struct gk20a *g);
|
||||
void (*start_fecs)(struct gk20a *g);
|
||||
u32 (*get_gpccs_start_reg_offset)(void);
|
||||
void (*bind_instblk)(struct gk20a *g,
|
||||
struct nvgpu_mem *mem, u64 inst_ptr);
|
||||
void (*load_ctxsw_ucode_header)(struct gk20a *g,
|
||||
u32 reg_offset, u32 boot_signature,
|
||||
u32 addr_code32, u32 addr_data32,
|
||||
u32 code_size, u32 data_size);
|
||||
void (*load_ctxsw_ucode_boot)(struct gk20a *g,
|
||||
u32 reg_offset, u32 boot_entry,
|
||||
u32 addr_load32, u32 blocks, u32 dst);
|
||||
int (*load_ctxsw_ucode)(struct gk20a *g);
|
||||
} falcon;
|
||||
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
struct {
|
||||
int (*init)(struct gk20a *g);
|
||||
|
||||
@@ -27,6 +27,9 @@
|
||||
|
||||
struct gk20a;
|
||||
|
||||
int nvgpu_gr_falcon_init_ctxsw(struct gk20a *g);
|
||||
int nvgpu_gr_falcon_init_ctxsw_ucode(struct gk20a *g);
|
||||
int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g);
|
||||
int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g);
|
||||
|
||||
#endif /* NVGPU_GR_SUBCTX_H */
|
||||
#endif /* NVGPU_GR_FALCON_H */
|
||||
|
||||
@@ -186,6 +186,7 @@
|
||||
#include <nvgpu/gr/zbc.h>
|
||||
#include <nvgpu/gr/fecs_trace.h>
|
||||
#include <nvgpu/pmu/perf.h>
|
||||
#include <nvgpu/gr/gr_falcon.h>
|
||||
|
||||
#include <nvgpu/hw/tu104/hw_proj_tu104.h>
|
||||
#include <nvgpu/hw/tu104/hw_top_tu104.h>
|
||||
@@ -410,8 +411,6 @@ static const struct gpu_ops tu104_ops = {
|
||||
.get_sm_dsm_perf_regs = gv11b_gr_get_sm_dsm_perf_regs,
|
||||
.get_sm_dsm_perf_ctrl_regs = gr_tu104_get_sm_dsm_perf_ctrl_regs,
|
||||
.set_hww_esr_report_mask = gv11b_gr_set_hww_esr_report_mask,
|
||||
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
|
||||
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
|
||||
.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
|
||||
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
|
||||
.is_tpc_addr = gr_gm20b_is_tpc_addr,
|
||||
@@ -743,6 +742,23 @@ static const struct gpu_ops tu104_ops = {
|
||||
gm20b_gr_falcon_get_fecs_ctxsw_mailbox_size,
|
||||
.get_fecs_ctx_state_store_major_rev_id =
|
||||
gm20b_gr_falcon_get_fecs_ctx_state_store_major_rev_id,
|
||||
.load_gpccs_dmem = gm20b_gr_falcon_load_gpccs_dmem,
|
||||
.load_fecs_dmem = gm20b_gr_falcon_load_fecs_dmem,
|
||||
.load_gpccs_imem = gm20b_gr_falcon_load_gpccs_imem,
|
||||
.load_fecs_imem = gm20b_gr_falcon_load_fecs_imem,
|
||||
.configure_fmodel = gm20b_gr_falcon_configure_fmodel,
|
||||
.start_ucode = gm20b_gr_falcon_start_ucode,
|
||||
.start_gpccs = gm20b_gr_falcon_start_gpccs,
|
||||
.start_fecs = gm20b_gr_falcon_start_fecs,
|
||||
.get_gpccs_start_reg_offset =
|
||||
gm20b_gr_falcon_get_gpccs_start_reg_offset,
|
||||
.bind_instblk = gm20b_gr_falcon_bind_instblk,
|
||||
.load_ctxsw_ucode_header =
|
||||
gm20b_gr_falcon_load_ctxsw_ucode_header,
|
||||
.load_ctxsw_ucode_boot =
|
||||
gm20b_gr_falcon_load_ctxsw_ucode_boot,
|
||||
.load_ctxsw_ucode =
|
||||
nvgpu_gr_falcon_load_secure_ctxsw_ucode,
|
||||
},
|
||||
},
|
||||
.fb = {
|
||||
@@ -1465,7 +1481,8 @@ int tu104_init_hal(struct gk20a *g)
|
||||
gops->cbc.ctrl = NULL;
|
||||
gops->cbc.alloc_comptags = NULL;
|
||||
|
||||
gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
|
||||
gops->gr.falcon.load_ctxsw_ucode =
|
||||
nvgpu_gr_falcon_load_ctxsw_ucode;
|
||||
|
||||
/* Disable pmu pstate, as there is no pmu support */
|
||||
nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, false);
|
||||
|
||||
Reference in New Issue
Block a user