gpu: nvgpu: support for non-secure/secure ctxsw loading

Code for secure/non-secure ctxsw booting spread across gr_gk20a.c
and gr_gm20b.c. With this change this code is move to gr falcon unit.

Ctxsw loading is now supported with 2 supported common functions:
1.Non secure boot:
 int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g);
2.Secure boot:
int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g);

Now gr ops function "int (*load_ctxsw_ucode)(struct gk20a *g);" is moved to
gr falcon ops and in chip hals it is set with secure/non-secure booting.

Non-secure booting: nvgpu_gr_falcon_load_ctxsw_ucode support ctxsw loading
in 2 methods: bit-banging uode or booting with bootloader

A. Common and hal functions for non-secure bit-banging ctxsw loading:
Common: static void nvgpu_gr_falcon_load_dmem(struct gk20a *g) ->
Hals: void (*load_gpccs_dmem)(struct gk20a *g,i
			 const u32 *ucode_u32_data, u32 size);
      void (*load_fecs_dmem)(struct gk20a *g,
			const u32 *ucode_u32_data, u32 size);
Common: static void nvgpu_gr_falcon_load_imem(struct gk20a *g) ->
Hals:  void (*load_gpccs_imem)(struct gk20a *g,
			 const u32 *ucode_u32_data, u32 size);
       void (*load_fecs_imem)(struct gk20a *g,
			const u32 *ucode_u32_data, u32 size);
Other basic HALs:
void (*configure_fmodel)(struct gk20a *g); -> configure fmodel for ctxsw loading
void (*start_ucode)(struct gk20a *g);  -> start running ctxcw ucode

B.Common and hal functions for non-secure ctxsw loading with bootloader
First get the ctxsw ucode using: nvgpu_gr_falcon_init_ctxsw_ucode, then
Common: static void nvgpu_gr_falcon_load_with_bootloader(struct gk20a *g)
        void nvgpu_gr_falcon_bind_instblk((struct gk20a *g) ->
Hal: void (*bind_instblk)(struct gk20a *g, struct nvgpu_mem *mem, u64 inst_ptr);

Common: nvgpu_gr_falcon_load_ctxsw_ucode_segments ->
		nvgpu_gr_falcon_load_ctxsw_ucode_header ->
		nvgpu_gr_falcon_load_ctxsw_ucode_boot for both fecs and gpccs ->
Hals: void (*load_ctxsw_ucode_header)(struct gk20a *g, u32 reg_offset,
	u32 boot_signature, u32 addr_code32, u32 addr_data32,
	u32 code_size, u32 data_size);
void (*load_ctxsw_ucode_boot)(struct gk20a *g, u64 reg_offset, u32 boot_entry,
	u32 addr_load32, u32 blocks, u32 dst);
Other basic HAL to get gpccs start offset:
  u32 (*get_gpccs_start_reg_offset)(void);

C.Secure booting is support with gpmu and acr and with following additional
common function in gr falcon.
static void nvgpu_gr_falcon_load_gpccs_with_bootloader(struct gk20a *g) ->
  nvgpu_gr_falcon_bind_instblk and  nvgpu_gr_falcon_load_ctxsw_ucode_segments
Additional basic hals:
void (*start_gpccs)(struct gk20a *g);
void (*start_fecs)(struct gk20a *g);

Following ops from gr is removed, since it is not required to set by chip hals:
void (*falcon_load_ucode)(struct gk20a *g, u64 addr_base,
	struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);

Now this is handled by static common function:
static int nvgpu_gr_falcon_copy_ctxsw_ucode_segments( struct gk20a *g,
	struct nvgpu_mem *dst, struct gk20a_ctxsw_ucode_segments *segments,
	u32 *bootimage, u32 *code, u32 *data)

JIRA NVGPU-1881

Change-Id: I895a03faaf1a21286316befde24765c8b55075cf
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2083388
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Seshendra Gadagottu
2019-03-27 18:56:51 -07:00
committed by mobile promotions
parent b7835b5ead
commit 0f1726ae1f
16 changed files with 788 additions and 580 deletions

View File

@@ -195,3 +195,237 @@ clean_up:
return err;
}
static void nvgpu_gr_falcon_load_dmem(struct gk20a *g)
{
u32 ucode_u32_size;
const u32 *ucode_u32_data;
nvgpu_log_fn(g, " ");
ucode_u32_size = g->netlist_vars->ucode.gpccs.data.count;
ucode_u32_data = (const u32 *)g->netlist_vars->ucode.gpccs.data.l;
g->ops.gr.falcon.load_gpccs_dmem(g, ucode_u32_data, ucode_u32_size);
ucode_u32_size = g->netlist_vars->ucode.fecs.data.count;
ucode_u32_data = (const u32 *)g->netlist_vars->ucode.fecs.data.l;
g->ops.gr.falcon.load_fecs_dmem(g, ucode_u32_data, ucode_u32_size);
nvgpu_log_fn(g, "done");
}
static void nvgpu_gr_falcon_load_imem(struct gk20a *g)
{
u32 ucode_u32_size;
const u32 *ucode_u32_data;
nvgpu_log_fn(g, " ");
ucode_u32_size = g->netlist_vars->ucode.gpccs.inst.count;
ucode_u32_data = (const u32 *)g->netlist_vars->ucode.gpccs.inst.l;
g->ops.gr.falcon.load_gpccs_imem(g, ucode_u32_data, ucode_u32_size);
ucode_u32_size = g->netlist_vars->ucode.fecs.inst.count;
ucode_u32_data = (const u32 *)g->netlist_vars->ucode.fecs.inst.l;
g->ops.gr.falcon.load_fecs_imem(g, ucode_u32_data, ucode_u32_size);
nvgpu_log_fn(g, "done");
}
static void nvgpu_gr_falcon_bind_instblk(struct gk20a *g)
{
struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
u64 inst_ptr;
inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc);
g->ops.gr.falcon.bind_instblk(g, &ucode_info->inst_blk_desc,
inst_ptr);
}
static void nvgpu_gr_falcon_load_ctxsw_ucode_header(struct gk20a *g,
u64 addr_base, struct gk20a_ctxsw_ucode_segments *segments,
u32 reg_offset)
{
u32 addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8);
u32 addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8);
g->ops.gr.falcon.load_ctxsw_ucode_header(g, reg_offset,
segments->boot_signature, addr_code32, addr_data32,
segments->code.size, segments->data.size);
}
static void nvgpu_gr_falcon_load_ctxsw_ucode_boot(struct gk20a *g,
u64 addr_base, struct gk20a_ctxsw_ucode_segments *segments,
u32 reg_offset)
{
u32 addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8);
u32 blocks = ((segments->boot.size + 0xFFU) & ~0xFFU) >> 8;
u32 dst = segments->boot_imem_offset;
g->ops.gr.falcon.load_ctxsw_ucode_boot(g, reg_offset,
segments->boot_entry, addr_load32, blocks, dst);
}
static void nvgpu_gr_falcon_load_ctxsw_ucode_segments(
struct gk20a *g, u64 addr_base,
struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
{
/* Copy falcon bootloader into dmem */
nvgpu_gr_falcon_load_ctxsw_ucode_header(g, addr_base,
segments, reg_offset);
nvgpu_gr_falcon_load_ctxsw_ucode_boot(g,
addr_base, segments, reg_offset);
}
static void nvgpu_gr_falcon_load_with_bootloader(struct gk20a *g)
{
struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
u64 addr_base = ucode_info->surface_desc.gpu_va;
nvgpu_gr_falcon_bind_instblk(g);
nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base,
&g->ctxsw_ucode_info.fecs, 0);
nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base,
&g->ctxsw_ucode_info.gpccs,
g->ops.gr.falcon.get_gpccs_start_reg_offset());
}
int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g)
{
int err;
nvgpu_log_fn(g, " ");
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
g->ops.gr.falcon.configure_fmodel(g);
}
/*
* In case bootloader is not supported, revert to the old way of
* loading gr ucode, without the faster bootstrap routine.
*/
if (!nvgpu_is_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP)) {
nvgpu_gr_falcon_load_dmem(g);
nvgpu_gr_falcon_load_imem(g);
g->ops.gr.falcon.start_ucode(g);
} else {
if (!g->gr.skip_ucode_init) {
err = nvgpu_gr_falcon_init_ctxsw_ucode(g);
if (err != 0) {
return err;
}
}
nvgpu_gr_falcon_load_with_bootloader(g);
g->gr.skip_ucode_init = true;
}
nvgpu_log_fn(g, "done");
return 0;
}
static void nvgpu_gr_falcon_load_gpccs_with_bootloader(struct gk20a *g)
{
struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
u64 addr_base = ucode_info->surface_desc.gpu_va;
nvgpu_gr_falcon_bind_instblk(g);
nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base,
&g->ctxsw_ucode_info.gpccs,
g->ops.gr.falcon.get_gpccs_start_reg_offset());
}
int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g)
{
int err = 0;
u8 falcon_id_mask = 0;
nvgpu_log_fn(g, " ");
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
g->ops.gr.falcon.configure_fmodel(g);
}
g->pmu_lsf_loaded_falcon_id = 0;
if (nvgpu_is_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE)) {
/* this must be recovery so bootstrap fecs and gpccs */
if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
nvgpu_gr_falcon_load_gpccs_with_bootloader(g);
err = g->ops.pmu.load_lsfalcon_ucode(g,
BIT32(FALCON_ID_FECS));
} else {
/* bind WPR VA inst block */
nvgpu_gr_falcon_bind_instblk(g);
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) {
err = nvgpu_sec2_bootstrap_ls_falcons(g,
&g->sec2, FALCON_ID_FECS);
err = nvgpu_sec2_bootstrap_ls_falcons(g,
&g->sec2, FALCON_ID_GPCCS);
} else if (g->support_ls_pmu) {
err = g->ops.pmu.load_lsfalcon_ucode(g,
BIT32(FALCON_ID_FECS) |
BIT32(FALCON_ID_GPCCS));
} else {
err = nvgpu_acr_bootstrap_hs_acr(g, g->acr);
if (err != 0) {
nvgpu_err(g,
"ACR GR LSF bootstrap failed");
}
}
}
if (err != 0) {
nvgpu_err(g, "Unable to recover GR falcon");
return err;
}
} else {
/* cold boot or rg exit */
nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, true);
if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
nvgpu_gr_falcon_load_gpccs_with_bootloader(g);
} else {
/* bind WPR VA inst block */
nvgpu_gr_falcon_bind_instblk(g);
if (nvgpu_acr_is_lsf_lazy_bootstrap(g, g->acr,
FALCON_ID_FECS)) {
falcon_id_mask |= BIT8(FALCON_ID_FECS);
}
if (nvgpu_acr_is_lsf_lazy_bootstrap(g, g->acr,
FALCON_ID_GPCCS)) {
falcon_id_mask |= BIT8(FALCON_ID_GPCCS);
}
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) {
err = nvgpu_sec2_bootstrap_ls_falcons(g,
&g->sec2, FALCON_ID_FECS);
err = nvgpu_sec2_bootstrap_ls_falcons(g,
&g->sec2, FALCON_ID_GPCCS);
} else if (g->support_ls_pmu) {
err = g->ops.pmu.load_lsfalcon_ucode(g,
falcon_id_mask);
} else {
/* GR falcons bootstrapped by ACR */
err = 0;
}
if (err != 0) {
nvgpu_err(g, "Unable to boot GPCCS");
return err;
}
}
}
g->ops.gr.falcon.start_gpccs(g);
g->ops.gr.falcon.start_fecs(g);
nvgpu_log_fn(g, "done");
return 0;
}

View File

@@ -127,8 +127,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs,
.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs,
.set_hww_esr_report_mask = NULL,
.falcon_load_ucode = NULL,
.load_ctxsw_ucode = NULL,
.set_gpc_tpc_mask = NULL,
.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
.is_tpc_addr = gr_gm20b_is_tpc_addr,
@@ -294,6 +292,9 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.get_zcull_info = vgpu_gr_get_zcull_info,
.program_zcull_mapping = NULL,
},
.falcon = {
.load_ctxsw_ucode = NULL,
},
#ifdef CONFIG_GK20A_CTXSW_TRACE
.fecs_trace = {
.alloc_user_buffer = vgpu_alloc_user_buffer,

View File

@@ -148,8 +148,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.get_sm_dsm_perf_regs = gv11b_gr_get_sm_dsm_perf_regs,
.get_sm_dsm_perf_ctrl_regs = gv11b_gr_get_sm_dsm_perf_ctrl_regs,
.set_hww_esr_report_mask = NULL,
.falcon_load_ucode = NULL,
.load_ctxsw_ucode = NULL,
.set_gpc_tpc_mask = NULL,
.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
.is_tpc_addr = gr_gm20b_is_tpc_addr,
@@ -344,6 +342,9 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.align_regs_perf_pma =
gv100_gr_hwpm_map_align_regs_perf_pma,
},
.falcon = {
.load_ctxsw_ucode = NULL,
},
#ifdef CONFIG_GK20A_CTXSW_TRACE
.fecs_trace = {
.alloc_user_buffer = vgpu_alloc_user_buffer,

View File

@@ -69,12 +69,8 @@
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
#define BLK_SIZE (256U)
#define CTXSW_MEM_SCRUBBING_TIMEOUT_MAX 1000U
#define CTXSW_MEM_SCRUBBING_TIMEOUT_DEFAULT 10U
#define FECS_ARB_CMD_TIMEOUT_MAX 40
#define FECS_ARB_CMD_TIMEOUT_DEFAULT 2
static struct channel_gk20a *gk20a_gr_get_channel_from_ctx(
struct gk20a *g, u32 curr_ctx, u32 *curr_tsgid);
@@ -183,126 +179,6 @@ static void gr_report_ctxsw_error(struct gk20a *g, u32 err_type, u32 chid,
}
}
}
static void gr_gk20a_load_falcon_dmem(struct gk20a *g)
{
u32 i, ucode_u32_size;
const u32 *ucode_u32_data;
u32 checksum;
nvgpu_log_fn(g, " ");
gk20a_writel(g, gr_gpccs_dmemc_r(0), (gr_gpccs_dmemc_offs_f(0) |
gr_gpccs_dmemc_blk_f(0) |
gr_gpccs_dmemc_aincw_f(1)));
ucode_u32_size = g->netlist_vars->ucode.gpccs.data.count;
ucode_u32_data = (const u32 *)g->netlist_vars->ucode.gpccs.data.l;
for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
gk20a_writel(g, gr_gpccs_dmemd_r(0), ucode_u32_data[i]);
checksum += ucode_u32_data[i];
}
gk20a_writel(g, gr_fecs_dmemc_r(0), (gr_fecs_dmemc_offs_f(0) |
gr_fecs_dmemc_blk_f(0) |
gr_fecs_dmemc_aincw_f(1)));
ucode_u32_size = g->netlist_vars->ucode.fecs.data.count;
ucode_u32_data = (const u32 *)g->netlist_vars->ucode.fecs.data.l;
for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
gk20a_writel(g, gr_fecs_dmemd_r(0), ucode_u32_data[i]);
checksum += ucode_u32_data[i];
}
nvgpu_log_fn(g, "done");
}
static void gr_gk20a_load_falcon_imem(struct gk20a *g)
{
u32 cfg, fecs_imem_size, gpccs_imem_size, ucode_u32_size;
const u32 *ucode_u32_data;
u32 tag, i, pad_start, pad_end;
u32 checksum;
nvgpu_log_fn(g, " ");
cfg = gk20a_readl(g, gr_fecs_cfg_r());
fecs_imem_size = gr_fecs_cfg_imem_sz_v(cfg);
cfg = gk20a_readl(g, gr_gpc0_cfg_r());
gpccs_imem_size = gr_gpc0_cfg_imem_sz_v(cfg);
/* Use the broadcast address to access all of the GPCCS units. */
gk20a_writel(g, gr_gpccs_imemc_r(0), (gr_gpccs_imemc_offs_f(0) |
gr_gpccs_imemc_blk_f(0) |
gr_gpccs_imemc_aincw_f(1)));
/* Setup the tags for the instruction memory. */
tag = 0;
gk20a_writel(g, gr_gpccs_imemt_r(0), gr_gpccs_imemt_tag_f(tag));
ucode_u32_size = g->netlist_vars->ucode.gpccs.inst.count;
ucode_u32_data = (const u32 *)g->netlist_vars->ucode.gpccs.inst.l;
for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
if ((i != 0U) && ((i % (256U/sizeof(u32))) == 0U)) {
tag++;
gk20a_writel(g, gr_gpccs_imemt_r(0),
gr_gpccs_imemt_tag_f(tag));
}
gk20a_writel(g, gr_gpccs_imemd_r(0), ucode_u32_data[i]);
checksum += ucode_u32_data[i];
}
pad_start = i * 4U;
pad_end = pad_start + (256U - pad_start % 256U) + 256U;
for (i = pad_start;
(i < gpccs_imem_size * 256U) && (i < pad_end);
i += 4U) {
if ((i != 0U) && ((i % 256U) == 0U)) {
tag++;
gk20a_writel(g, gr_gpccs_imemt_r(0),
gr_gpccs_imemt_tag_f(tag));
}
gk20a_writel(g, gr_gpccs_imemd_r(0), 0);
}
gk20a_writel(g, gr_fecs_imemc_r(0), (gr_fecs_imemc_offs_f(0) |
gr_fecs_imemc_blk_f(0) |
gr_fecs_imemc_aincw_f(1)));
/* Setup the tags for the instruction memory. */
tag = 0;
gk20a_writel(g, gr_fecs_imemt_r(0), gr_fecs_imemt_tag_f(tag));
ucode_u32_size = g->netlist_vars->ucode.fecs.inst.count;
ucode_u32_data = (const u32 *)g->netlist_vars->ucode.fecs.inst.l;
for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
if ((i != 0U) && ((i % (256U/sizeof(u32))) == 0U)) {
tag++;
gk20a_writel(g, gr_fecs_imemt_r(0),
gr_fecs_imemt_tag_f(tag));
}
gk20a_writel(g, gr_fecs_imemd_r(0), ucode_u32_data[i]);
checksum += ucode_u32_data[i];
}
pad_start = i * 4U;
pad_end = pad_start + (256U - pad_start % 256U) + 256U;
for (i = pad_start;
(i < fecs_imem_size * 256U) && i < pad_end;
i += 4U) {
if ((i != 0U) && ((i % 256U) == 0U)) {
tag++;
gk20a_writel(g, gr_fecs_imemt_r(0),
gr_fecs_imemt_tag_f(tag));
}
gk20a_writel(g, gr_fecs_imemd_r(0), 0);
}
}
int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
u32 *mailbox_ret, u32 opc_success,
u32 mailbox_ok, u32 opc_fail,
@@ -1076,288 +952,6 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
return ret;
}
static void gr_gk20a_start_falcon_ucode(struct gk20a *g)
{
nvgpu_log_fn(g, " ");
gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0U),
gr_fecs_ctxsw_mailbox_clear_value_f(~U32(0U)));
gk20a_writel(g, gr_gpccs_dmactl_r(), gr_gpccs_dmactl_require_ctx_f(0U));
gk20a_writel(g, gr_fecs_dmactl_r(), gr_fecs_dmactl_require_ctx_f(0U));
gk20a_writel(g, gr_gpccs_cpuctl_r(), gr_gpccs_cpuctl_startcpu_f(1U));
gk20a_writel(g, gr_fecs_cpuctl_r(), gr_fecs_cpuctl_startcpu_f(1U));
nvgpu_log_fn(g, "done");
}
static void gr_gk20a_wait_for_fecs_arb_idle(struct gk20a *g)
{
int retries = FECS_ARB_CMD_TIMEOUT_MAX / FECS_ARB_CMD_TIMEOUT_DEFAULT;
u32 val;
val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r());
while ((gr_fecs_arb_ctx_cmd_cmd_v(val) != 0U) && (retries != 0)) {
nvgpu_udelay(FECS_ARB_CMD_TIMEOUT_DEFAULT);
retries--;
val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r());
}
if (retries == 0) {
nvgpu_err(g, "arbiter cmd timeout, fecs arb ctx cmd: 0x%08x",
gk20a_readl(g, gr_fecs_arb_ctx_cmd_r()));
}
retries = FECS_ARB_CMD_TIMEOUT_MAX / FECS_ARB_CMD_TIMEOUT_DEFAULT;
while (((gk20a_readl(g, gr_fecs_ctxsw_status_1_r()) &
gr_fecs_ctxsw_status_1_arb_busy_m()) != 0U) &&
(retries != 0)) {
nvgpu_udelay(FECS_ARB_CMD_TIMEOUT_DEFAULT);
retries--;
}
if (retries == 0) {
nvgpu_err(g,
"arbiter idle timeout, fecs ctxsw status: 0x%08x",
gk20a_readl(g, gr_fecs_ctxsw_status_1_r()));
}
}
void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g)
{
struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
int retries = FECS_ARB_CMD_TIMEOUT_MAX / FECS_ARB_CMD_TIMEOUT_DEFAULT;
u64 inst_ptr_shifted_u64;
u32 inst_ptr_shifted_u32;
while (((gk20a_readl(g, gr_fecs_ctxsw_status_1_r()) &
gr_fecs_ctxsw_status_1_arb_busy_m()) != 0U) &&
(retries != 0)) {
nvgpu_udelay(FECS_ARB_CMD_TIMEOUT_DEFAULT);
retries--;
}
if (retries == 0) {
nvgpu_err(g,
"arbiter idle timeout, status: %08x",
gk20a_readl(g, gr_fecs_ctxsw_status_1_r()));
}
gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0);
inst_ptr_shifted_u64 = nvgpu_inst_block_addr(g,
&ucode_info->inst_blk_desc);
inst_ptr_shifted_u64 >>= 12;
BUG_ON(u64_hi32(inst_ptr_shifted_u64) != 0U);
inst_ptr_shifted_u32 = (u32)inst_ptr_shifted_u64;
gk20a_writel(g, gr_fecs_new_ctx_r(),
gr_fecs_new_ctx_ptr_f(inst_ptr_shifted_u32) |
nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
gr_fecs_new_ctx_target_sys_mem_ncoh_f(),
gr_fecs_new_ctx_target_sys_mem_coh_f(),
gr_fecs_new_ctx_target_vid_mem_f()) |
gr_fecs_new_ctx_valid_m());
gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(),
gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr_shifted_u32) |
nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(),
gr_fecs_arb_ctx_ptr_target_sys_mem_coh_f(),
gr_fecs_arb_ctx_ptr_target_vid_mem_f()));
gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7);
/* Wait for arbiter command to complete */
gr_gk20a_wait_for_fecs_arb_idle(g);
gk20a_writel(g, gr_fecs_current_ctx_r(),
gr_fecs_current_ctx_ptr_f(inst_ptr_shifted_u32) |
gr_fecs_current_ctx_target_m() |
gr_fecs_current_ctx_valid_m());
/* Send command to arbiter to flush */
gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), gr_fecs_arb_ctx_cmd_cmd_s());
gr_gk20a_wait_for_fecs_arb_idle(g);
}
void gr_gk20a_load_ctxsw_ucode_header(struct gk20a *g, u64 addr_base,
struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
{
u32 addr_code32;
u32 addr_data32;
addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8);
addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8);
/*
* Copy falcon bootloader header into dmem at offset 0.
* Configure dmem port 0 for auto-incrementing writes starting at dmem
* offset 0.
*/
gk20a_writel(g, reg_offset + gr_fecs_dmemc_r(0),
gr_fecs_dmemc_offs_f(0) |
gr_fecs_dmemc_blk_f(0) |
gr_fecs_dmemc_aincw_f(1));
/* Write out the actual data */
switch (segments->boot_signature) {
case FALCON_UCODE_SIG_T18X_GPCCS_WITH_RESERVED:
case FALCON_UCODE_SIG_T21X_FECS_WITH_DMEM_SIZE:
case FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED:
case FALCON_UCODE_SIG_T21X_GPCCS_WITH_RESERVED:
case FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED:
case FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED:
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
/* fallthrough */
case FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED:
case FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED:
case FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED:
case FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2:
case FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED:
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 4);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
addr_code32);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
segments->code.size);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
addr_data32);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
segments->data.size);
break;
case FALCON_UCODE_SIG_T12X_FECS_OLDER:
case FALCON_UCODE_SIG_T12X_GPCCS_OLDER:
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
addr_code32);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
segments->code.size);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
addr_data32);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
segments->data.size);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
addr_code32);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
break;
default:
nvgpu_err(g,
"unknown falcon ucode boot signature 0x%08x"
" with reg_offset 0x%08x",
segments->boot_signature, reg_offset);
BUG();
}
}
void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base,
struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
{
u32 addr_load32;
u32 blocks;
u32 b;
u32 dst;
addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8);
blocks = ((segments->boot.size + 0xFFU) & ~0xFFU) >> 8;
/*
* Set the base FB address for the DMA transfer. Subtract off the 256
* byte IMEM block offset such that the relative FB and IMEM offsets
* match, allowing the IMEM tags to be properly created.
*/
dst = segments->boot_imem_offset;
gk20a_writel(g, reg_offset + gr_fecs_dmatrfbase_r(),
(addr_load32 - (dst >> 8)));
for (b = 0; b < blocks; b++) {
/* Setup destination IMEM offset */
gk20a_writel(g, reg_offset + gr_fecs_dmatrfmoffs_r(),
dst + (b << 8));
/* Setup source offset (relative to BASE) */
gk20a_writel(g, reg_offset + gr_fecs_dmatrffboffs_r(),
dst + (b << 8));
gk20a_writel(g, reg_offset + gr_fecs_dmatrfcmd_r(),
gr_fecs_dmatrfcmd_imem_f(0x01) |
gr_fecs_dmatrfcmd_write_f(0x00) |
gr_fecs_dmatrfcmd_size_f(0x06) |
gr_fecs_dmatrfcmd_ctxdma_f(0));
}
/* Specify the falcon boot vector */
gk20a_writel(g, reg_offset + gr_fecs_bootvec_r(),
gr_fecs_bootvec_vec_f(segments->boot_entry));
}
static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g)
{
struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
u64 addr_base = ucode_info->surface_desc.gpu_va;
gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0x0);
gr_gk20a_load_falcon_bind_instblk(g);
g->ops.gr.falcon_load_ucode(g, addr_base,
&g->ctxsw_ucode_info.fecs, 0);
g->ops.gr.falcon_load_ucode(g, addr_base,
&g->ctxsw_ucode_info.gpccs,
gr_gpcs_gpccs_falcon_hwcfg_r() -
gr_fecs_falcon_hwcfg_r());
}
int gr_gk20a_load_ctxsw_ucode(struct gk20a *g)
{
int err;
nvgpu_log_fn(g, " ");
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7),
gr_fecs_ctxsw_mailbox_value_f(0xc0de7777U));
gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7),
gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777U));
}
/*
* In case bootloader is not supported, revert to the old way of
* loading gr ucode, without the faster bootstrap routine.
*/
if (!nvgpu_is_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP)) {
gr_gk20a_load_falcon_dmem(g);
gr_gk20a_load_falcon_imem(g);
gr_gk20a_start_falcon_ucode(g);
} else {
if (!g->gr.skip_ucode_init) {
err = nvgpu_gr_falcon_init_ctxsw_ucode(g);
if (err != 0) {
return err;
}
}
gr_gk20a_load_falcon_with_bootloader(g);
g->gr.skip_ucode_init = true;
}
nvgpu_log_fn(g, "done");
return 0;
}
static int gr_gk20a_wait_ctxsw_ready(struct gk20a *g)
{
int ret;
@@ -1939,7 +1533,7 @@ static int gr_gk20a_init_ctxsw(struct gk20a *g)
{
int err = 0;
err = g->ops.gr.load_ctxsw_ucode(g);
err = g->ops.gr.falcon.load_ctxsw_ucode(g);
if (err != 0) {
goto out;
}

View File

@@ -341,14 +341,6 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
u32 mode);
void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data);
int gr_gk20a_load_ctxsw_ucode(struct gk20a *g);
void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g);
void gr_gk20a_load_ctxsw_ucode_header(struct gk20a *g, u64 addr_base,
struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base,
struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg);
int gr_gk20a_disable_ctxsw(struct gk20a *g);
int gr_gk20a_enable_ctxsw(struct gk20a *g);

View File

@@ -340,23 +340,6 @@ void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
}
}
void gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
{
gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(),
gr_fecs_dmactl_require_ctx_f(0));
/* Copy falcon bootloader into dmem */
gr_gk20a_load_ctxsw_ucode_header(g, addr_base, segments, reg_offset);
gr_gk20a_load_ctxsw_ucode_boot(g, addr_base, segments, reg_offset);
/* start the falcon immediately if PRIV security is disabled*/
if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
gk20a_writel(g, reg_offset + gr_fecs_cpuctl_r(),
gr_fecs_cpuctl_startcpu_f(0x01));
}
}
static bool gr_gm20b_is_tpc_addr_shared(struct gk20a *g, u32 addr)
{
u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
@@ -394,122 +377,6 @@ u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr)
return 0;
}
static void gr_gm20b_load_gpccs_with_bootloader(struct gk20a *g)
{
struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
u64 addr_base = ucode_info->surface_desc.gpu_va;
gr_gk20a_load_falcon_bind_instblk(g);
g->ops.gr.falcon_load_ucode(g, addr_base,
&g->ctxsw_ucode_info.gpccs,
gr_gpcs_gpccs_falcon_hwcfg_r() -
gr_fecs_falcon_hwcfg_r());
}
int gr_gm20b_load_ctxsw_ucode(struct gk20a *g)
{
int err = 0;
u32 reg_offset = gr_gpcs_gpccs_falcon_hwcfg_r() -
gr_fecs_falcon_hwcfg_r();
u8 falcon_id_mask = 0;
nvgpu_log_fn(g, " ");
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7),
gr_fecs_ctxsw_mailbox_value_f(0xc0de7777U));
gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7),
gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777U));
}
g->pmu_lsf_loaded_falcon_id = 0;
if (nvgpu_is_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE)) {
/* this must be recovery so bootstrap fecs and gpccs */
if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
gr_gm20b_load_gpccs_with_bootloader(g);
err = g->ops.pmu.load_lsfalcon_ucode(g,
BIT32(FALCON_ID_FECS));
} else {
/* bind WPR VA inst block */
gr_gk20a_load_falcon_bind_instblk(g);
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) {
err = nvgpu_sec2_bootstrap_ls_falcons(g, &g->sec2,
FALCON_ID_FECS);
err = nvgpu_sec2_bootstrap_ls_falcons(g, &g->sec2,
FALCON_ID_GPCCS);
} else if (g->support_ls_pmu) {
err = g->ops.pmu.load_lsfalcon_ucode(g,
BIT32(FALCON_ID_FECS) |
BIT32(FALCON_ID_GPCCS));
} else {
err = nvgpu_acr_bootstrap_hs_acr(g, g->acr);
if (err != 0) {
nvgpu_err(g, "GR Recovery: ACR GR LSF bootstrap failed");
}
}
}
if (err != 0) {
nvgpu_err(g, "Unable to recover GR falcon");
return err;
}
} else {
/* cold boot or rg exit */
nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, true);
if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
gr_gm20b_load_gpccs_with_bootloader(g);
} else {
/* bind WPR VA inst block */
gr_gk20a_load_falcon_bind_instblk(g);
if (nvgpu_acr_is_lsf_lazy_bootstrap(g, g->acr, FALCON_ID_FECS)) {
falcon_id_mask |= BIT8(FALCON_ID_FECS);
}
if (nvgpu_acr_is_lsf_lazy_bootstrap(g, g->acr, FALCON_ID_GPCCS)) {
falcon_id_mask |= BIT8(FALCON_ID_GPCCS);
}
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) {
err = nvgpu_sec2_bootstrap_ls_falcons(g, &g->sec2,
FALCON_ID_FECS);
err = nvgpu_sec2_bootstrap_ls_falcons(g, &g->sec2,
FALCON_ID_GPCCS);
} else if (g->support_ls_pmu) {
err = g->ops.pmu.load_lsfalcon_ucode(g, falcon_id_mask);
} else {
/* GR falcons bootstrapped by ACR */
err = 0;
}
if (err != 0) {
nvgpu_err(g, "Unable to boot GPCCS");
return err;
}
}
}
/*start gpccs */
if (nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
gk20a_writel(g, reg_offset +
gr_fecs_cpuctl_alias_r(),
gr_gpccs_cpuctl_startcpu_f(1U));
} else {
gk20a_writel(g, gr_gpccs_dmactl_r(),
gr_gpccs_dmactl_require_ctx_f(0U));
gk20a_writel(g, gr_gpccs_cpuctl_r(),
gr_gpccs_cpuctl_startcpu_f(1U));
}
/* start fecs */
gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0U), ~U32(0U));
gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(1U), 1U);
gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(6U), 0xffffffffU);
gk20a_writel(g, gr_fecs_cpuctl_alias_r(),
gr_fecs_cpuctl_startcpu_f(1U));
nvgpu_log_fn(g, "done");
return 0;
}
void gr_gm20b_detect_sm_arch(struct gk20a *g)
{
u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());

View File

@@ -68,7 +68,6 @@ void gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr);
u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr);
int gr_gm20b_load_ctxsw_ucode(struct gk20a *g);
void gr_gm20b_detect_sm_arch(struct gk20a *g);
int gr_gm20b_init_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,

View File

@@ -38,6 +38,7 @@
#include <nvgpu/regops.h>
#include <nvgpu/gr/zbc.h>
#include <nvgpu/gr/zcull.h>
#include <nvgpu/gr/gr_falcon.h>
#include "hal/bus/bus_gm20b.h"
#include "hal/bus/bus_gk20a.h"
@@ -252,8 +253,6 @@ static const struct gpu_ops gm20b_ops = {
.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs,
.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs,
.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask,
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask,
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
.is_tpc_addr = gr_gm20b_is_tpc_addr,
@@ -488,6 +487,23 @@ static const struct gpu_ops gm20b_ops = {
gm20b_gr_falcon_get_fecs_ctxsw_mailbox_size,
.get_fecs_ctx_state_store_major_rev_id =
gm20b_gr_falcon_get_fecs_ctx_state_store_major_rev_id,
.load_gpccs_dmem = gm20b_gr_falcon_load_gpccs_dmem,
.load_fecs_dmem = gm20b_gr_falcon_load_fecs_dmem,
.load_gpccs_imem = gm20b_gr_falcon_load_gpccs_imem,
.load_fecs_imem = gm20b_gr_falcon_load_fecs_imem,
.configure_fmodel = gm20b_gr_falcon_configure_fmodel,
.start_ucode = gm20b_gr_falcon_start_ucode,
.start_gpccs = gm20b_gr_falcon_start_gpccs,
.start_fecs = gm20b_gr_falcon_start_fecs,
.get_gpccs_start_reg_offset =
gm20b_gr_falcon_get_gpccs_start_reg_offset,
.bind_instblk = gm20b_gr_falcon_bind_instblk,
.load_ctxsw_ucode_header =
gm20b_gr_falcon_load_ctxsw_ucode_header,
.load_ctxsw_ucode_boot =
gm20b_gr_falcon_load_ctxsw_ucode_boot,
.load_ctxsw_ucode =
nvgpu_gr_falcon_load_ctxsw_ucode,
},
},
.fb = {
@@ -1028,7 +1044,8 @@ int gm20b_init_hal(struct gk20a *g)
gops->pmu.init_wpr_region = gm20b_pmu_init_acr;
gops->pmu.load_lsfalcon_ucode = gm20b_load_falcon_ucode;
gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode;
gops->gr.falcon.load_ctxsw_ucode =
nvgpu_gr_falcon_load_secure_ctxsw_ucode;
} else {
/* Inherit from gk20a */
gops->pmu.pmu_setup_hw_and_bootstrap =
@@ -1037,8 +1054,6 @@ int gm20b_init_hal(struct gk20a *g)
gops->pmu.load_lsfalcon_ucode = NULL;
gops->pmu.init_wpr_region = NULL;
gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
}
nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, false);

View File

@@ -38,6 +38,7 @@
#include <nvgpu/regops.h>
#include <nvgpu/gr/zbc.h>
#include <nvgpu/gr/zcull.h>
#include <nvgpu/gr/gr_falcon.h>
#include <nvgpu/gr/fecs_trace.h>
#include "hal/bus/bus_gk20a.h"
@@ -277,8 +278,6 @@ static const struct gpu_ops gp10b_ops = {
.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs,
.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs,
.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask,
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask,
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
.is_tpc_addr = gr_gm20b_is_tpc_addr,
@@ -566,6 +565,23 @@ static const struct gpu_ops gp10b_ops = {
gm20b_gr_falcon_get_fecs_ctxsw_mailbox_size,
.get_fecs_ctx_state_store_major_rev_id =
gm20b_gr_falcon_get_fecs_ctx_state_store_major_rev_id,
.load_gpccs_dmem = gm20b_gr_falcon_load_gpccs_dmem,
.load_fecs_dmem = gm20b_gr_falcon_load_fecs_dmem,
.load_gpccs_imem = gm20b_gr_falcon_load_gpccs_imem,
.load_fecs_imem = gm20b_gr_falcon_load_fecs_imem,
.configure_fmodel = gm20b_gr_falcon_configure_fmodel,
.start_ucode = gm20b_gr_falcon_start_ucode,
.start_gpccs = gm20b_gr_falcon_start_gpccs,
.start_fecs = gm20b_gr_falcon_start_fecs,
.get_gpccs_start_reg_offset =
gm20b_gr_falcon_get_gpccs_start_reg_offset,
.bind_instblk = gm20b_gr_falcon_bind_instblk,
.load_ctxsw_ucode_header =
gm20b_gr_falcon_load_ctxsw_ucode_header,
.load_ctxsw_ucode_boot =
gm20b_gr_falcon_load_ctxsw_ucode_boot,
.load_ctxsw_ucode =
nvgpu_gr_falcon_load_ctxsw_ucode,
},
},
.fb = {
@@ -1107,7 +1123,8 @@ int gp10b_init_hal(struct gk20a *g)
gops->pmu.init_wpr_region = gm20b_pmu_init_acr;
gops->pmu.load_lsfalcon_ucode = gp10b_load_falcon_ucode;
gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode;
gops->gr.falcon.load_ctxsw_ucode =
nvgpu_gr_falcon_load_secure_ctxsw_ucode;
} else {
/* Inherit from gk20a */
gops->pmu.pmu_setup_hw_and_bootstrap =
@@ -1117,7 +1134,6 @@ int gp10b_init_hal(struct gk20a *g)
gops->pmu.load_lsfalcon_ucode = NULL;
gops->pmu.init_wpr_region = NULL;
gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
}
nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, false);

View File

@@ -62,6 +62,7 @@
#include "hal/gr/fecs_trace/fecs_trace_gm20b.h"
#include "hal/gr/config/gr_config_gm20b.h"
#include "hal/gr/config/gr_config_gv100.h"
#include "hal/gr/falcon/gr_falcon_gm20b.h"
#include "hal/gr/zbc/zbc_gp10b.h"
#include "hal/gr/zbc/zbc_gv11b.h"
#include "hal/gr/init/gr_init_gm20b.h"
@@ -72,7 +73,6 @@
#include "hal/gr/intr/gr_intr_gv11b.h"
#include "hal/gr/zcull/zcull_gm20b.h"
#include "hal/gr/zcull/zcull_gv11b.h"
#include "hal/gr/falcon/gr_falcon_gm20b.h"
#include "hal/gr/hwpm_map/hwpm_map_gv100.h"
#include "hal/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
#include "hal/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
@@ -166,6 +166,7 @@
#include <nvgpu/regops.h>
#include <nvgpu/gr/zbc.h>
#include <nvgpu/gr/zcull.h>
#include <nvgpu/gr/gr_falcon.h>
#include <nvgpu/gr/fecs_trace.h>
#include <nvgpu/hw/gv100/hw_proj_gv100.h>
@@ -389,8 +390,6 @@ static const struct gpu_ops gv100_ops = {
.get_sm_dsm_perf_regs = gv11b_gr_get_sm_dsm_perf_regs,
.get_sm_dsm_perf_ctrl_regs = gv11b_gr_get_sm_dsm_perf_ctrl_regs,
.set_hww_esr_report_mask = gv11b_gr_set_hww_esr_report_mask,
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
.is_tpc_addr = gr_gm20b_is_tpc_addr,
@@ -710,6 +709,23 @@ static const struct gpu_ops gv100_ops = {
gm20b_gr_falcon_get_fecs_ctxsw_mailbox_size,
.get_fecs_ctx_state_store_major_rev_id =
gm20b_gr_falcon_get_fecs_ctx_state_store_major_rev_id,
.load_gpccs_dmem = gm20b_gr_falcon_load_gpccs_dmem,
.load_fecs_dmem = gm20b_gr_falcon_load_fecs_dmem,
.load_gpccs_imem = gm20b_gr_falcon_load_gpccs_imem,
.load_fecs_imem = gm20b_gr_falcon_load_fecs_imem,
.configure_fmodel = gm20b_gr_falcon_configure_fmodel,
.start_ucode = gm20b_gr_falcon_start_ucode,
.start_gpccs = gm20b_gr_falcon_start_gpccs,
.start_fecs = gm20b_gr_falcon_start_fecs,
.get_gpccs_start_reg_offset =
gm20b_gr_falcon_get_gpccs_start_reg_offset,
.bind_instblk = gm20b_gr_falcon_bind_instblk,
.load_ctxsw_ucode_header =
gm20b_gr_falcon_load_ctxsw_ucode_header,
.load_ctxsw_ucode_boot =
gm20b_gr_falcon_load_ctxsw_ucode_boot,
.load_ctxsw_ucode =
nvgpu_gr_falcon_load_secure_ctxsw_ucode,
},
},
.fb = {

View File

@@ -24,6 +24,7 @@
#include <nvgpu/gk20a.h>
#include <nvgpu/fuse.h>
#include <nvgpu/regops.h>
#include <nvgpu/gr/gr_falcon.h>
#include "hal/bus/bus_gk20a.h"
#include "hal/bus/bus_gp10b.h"
@@ -340,8 +341,6 @@ static const struct gpu_ops gv11b_ops = {
.get_sm_dsm_perf_regs = gv11b_gr_get_sm_dsm_perf_regs,
.get_sm_dsm_perf_ctrl_regs = gv11b_gr_get_sm_dsm_perf_ctrl_regs,
.set_hww_esr_report_mask = gv11b_gr_set_hww_esr_report_mask,
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask,
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
.is_tpc_addr = gr_gm20b_is_tpc_addr,
@@ -669,6 +668,23 @@ static const struct gpu_ops gv11b_ops = {
gm20b_gr_falcon_get_fecs_ctxsw_mailbox_size,
.get_fecs_ctx_state_store_major_rev_id =
gm20b_gr_falcon_get_fecs_ctx_state_store_major_rev_id,
.load_gpccs_dmem = gm20b_gr_falcon_load_gpccs_dmem,
.load_fecs_dmem = gm20b_gr_falcon_load_fecs_dmem,
.load_gpccs_imem = gm20b_gr_falcon_load_gpccs_imem,
.load_fecs_imem = gm20b_gr_falcon_load_fecs_imem,
.configure_fmodel = gm20b_gr_falcon_configure_fmodel,
.start_ucode = gm20b_gr_falcon_start_ucode,
.start_gpccs = gm20b_gr_falcon_start_gpccs,
.start_fecs = gm20b_gr_falcon_start_fecs,
.get_gpccs_start_reg_offset =
gm20b_gr_falcon_get_gpccs_start_reg_offset,
.bind_instblk = gm20b_gr_falcon_bind_instblk,
.load_ctxsw_ucode_header =
gm20b_gr_falcon_load_ctxsw_ucode_header,
.load_ctxsw_ucode_boot =
gm20b_gr_falcon_load_ctxsw_ucode_boot,
.load_ctxsw_ucode =
nvgpu_gr_falcon_load_ctxsw_ucode,
},
},
.fb = {
@@ -1257,7 +1273,8 @@ int gv11b_init_hal(struct gk20a *g)
/* priv security dependent ops */
if (nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode;
gops->gr.falcon.load_ctxsw_ucode =
nvgpu_gr_falcon_load_secure_ctxsw_ucode;
} else {
/* non-secure boot */
gops->pmu.pmu_nsbootstrap = gv11b_pmu_bootstrap;
@@ -1267,7 +1284,6 @@ int gv11b_init_hal(struct gk20a *g)
gops->pmu.load_lsfalcon_ucode = NULL;
gops->pmu.init_wpr_region = NULL;
gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
}
nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);

View File

@@ -21,6 +21,7 @@
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/gr/gr_falcon.h>
#include <nvgpu/io.h>
#include <nvgpu/debug.h>
@@ -28,6 +29,402 @@
#include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
#define FECS_ARB_CMD_TIMEOUT_MAX_US 40U
#define FECS_ARB_CMD_TIMEOUT_DEFAULT_US 2U
void gm20b_gr_falcon_load_gpccs_dmem(struct gk20a *g,
const u32 *ucode_u32_data, u32 ucode_u32_size)
{
u32 i, checksum;
/* enable access for gpccs dmem */
nvgpu_writel(g, gr_gpccs_dmemc_r(0), (gr_gpccs_dmemc_offs_f(0) |
gr_gpccs_dmemc_blk_f(0) |
gr_gpccs_dmemc_aincw_f(1)));
for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
nvgpu_writel(g, gr_gpccs_dmemd_r(0), ucode_u32_data[i]);
checksum += ucode_u32_data[i];
}
nvgpu_log_info(g, "gpccs dmem checksum: 0x%x", checksum);
}
void gm20b_gr_falcon_load_fecs_dmem(struct gk20a *g,
const u32 *ucode_u32_data, u32 ucode_u32_size)
{
u32 i, checksum;
/* set access for fecs dmem */
nvgpu_writel(g, gr_fecs_dmemc_r(0), (gr_fecs_dmemc_offs_f(0) |
gr_fecs_dmemc_blk_f(0) |
gr_fecs_dmemc_aincw_f(1)));
for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
nvgpu_writel(g, gr_fecs_dmemd_r(0), ucode_u32_data[i]);
checksum += ucode_u32_data[i];
}
nvgpu_log_info(g, "fecs dmem checksum: 0x%x", checksum);
}
void gm20b_gr_falcon_load_gpccs_imem(struct gk20a *g,
const u32 *ucode_u32_data, u32 ucode_u32_size)
{
u32 cfg, gpccs_imem_size;
u32 tag, i, pad_start, pad_end;
u32 checksum;
/* enable access for gpccs imem */
nvgpu_writel(g, gr_gpccs_imemc_r(0), (gr_gpccs_imemc_offs_f(0) |
gr_gpccs_imemc_blk_f(0) |
gr_gpccs_imemc_aincw_f(1)));
cfg = nvgpu_readl(g, gr_gpc0_cfg_r());
gpccs_imem_size = gr_gpc0_cfg_imem_sz_v(cfg);
/* Setup the tags for the instruction memory. */
tag = 0;
nvgpu_writel(g, gr_gpccs_imemt_r(0), gr_gpccs_imemt_tag_f(tag));
for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
if ((i != 0U) && ((i % (256U/sizeof(u32))) == 0U)) {
tag++;
nvgpu_writel(g, gr_gpccs_imemt_r(0),
gr_gpccs_imemt_tag_f(tag));
}
nvgpu_writel(g, gr_gpccs_imemd_r(0), ucode_u32_data[i]);
checksum += ucode_u32_data[i];
}
pad_start = i * 4U;
pad_end = pad_start + (256U - pad_start % 256U) + 256U;
for (i = pad_start;
(i < gpccs_imem_size * 256U) && (i < pad_end); i += 4U) {
if ((i != 0U) && ((i % 256U) == 0U)) {
tag++;
nvgpu_writel(g, gr_gpccs_imemt_r(0),
gr_gpccs_imemt_tag_f(tag));
}
nvgpu_writel(g, gr_gpccs_imemd_r(0), 0);
}
nvgpu_log_info(g, "gpccs imem checksum: 0x%x", checksum);
}
void gm20b_gr_falcon_load_fecs_imem(struct gk20a *g,
const u32 *ucode_u32_data, u32 ucode_u32_size)
{
u32 cfg, fecs_imem_size;
u32 tag, i, pad_start, pad_end;
u32 checksum;
/* set access for fecs imem */
nvgpu_writel(g, gr_fecs_imemc_r(0), (gr_fecs_imemc_offs_f(0) |
gr_fecs_imemc_blk_f(0) |
gr_fecs_imemc_aincw_f(1)));
cfg = nvgpu_readl(g, gr_fecs_cfg_r());
fecs_imem_size = gr_fecs_cfg_imem_sz_v(cfg);
/* Setup the tags for the instruction memory. */
tag = 0;
nvgpu_writel(g, gr_fecs_imemt_r(0), gr_fecs_imemt_tag_f(tag));
for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
if ((i != 0U) && ((i % (256U/sizeof(u32))) == 0U)) {
tag++;
nvgpu_writel(g, gr_fecs_imemt_r(0),
gr_fecs_imemt_tag_f(tag));
}
nvgpu_writel(g, gr_fecs_imemd_r(0), ucode_u32_data[i]);
checksum += ucode_u32_data[i];
}
pad_start = i * 4U;
pad_end = pad_start + (256U - pad_start % 256U) + 256U;
for (i = pad_start;
(i < fecs_imem_size * 256U) && i < pad_end;
i += 4U) {
if ((i != 0U) && ((i % 256U) == 0U)) {
tag++;
nvgpu_writel(g, gr_fecs_imemt_r(0),
gr_fecs_imemt_tag_f(tag));
}
nvgpu_writel(g, gr_fecs_imemd_r(0), 0);
}
nvgpu_log_info(g, "fecs imem checksum: 0x%x", checksum);
}
u32 gm20b_gr_falcon_get_gpccs_start_reg_offset(void)
{
return (gr_gpcs_gpccs_falcon_hwcfg_r() - gr_fecs_falcon_hwcfg_r());
}
void gm20b_gr_falcon_configure_fmodel(struct gk20a *g)
{
nvgpu_log_fn(g, " ");
nvgpu_writel(g, gr_fecs_ctxsw_mailbox_r(7),
gr_fecs_ctxsw_mailbox_value_f(0xc0de7777U));
nvgpu_writel(g, gr_gpccs_ctxsw_mailbox_r(7),
gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777U));
}
void gm20b_gr_falcon_start_ucode(struct gk20a *g)
{
nvgpu_log_fn(g, " ");
nvgpu_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0U),
gr_fecs_ctxsw_mailbox_clear_value_f(~U32(0U)));
nvgpu_writel(g, gr_gpccs_dmactl_r(), gr_gpccs_dmactl_require_ctx_f(0U));
nvgpu_writel(g, gr_fecs_dmactl_r(), gr_fecs_dmactl_require_ctx_f(0U));
nvgpu_writel(g, gr_gpccs_cpuctl_r(), gr_gpccs_cpuctl_startcpu_f(1U));
nvgpu_writel(g, gr_fecs_cpuctl_r(), gr_fecs_cpuctl_startcpu_f(1U));
nvgpu_log_fn(g, "done");
}
void gm20b_gr_falcon_start_gpccs(struct gk20a *g)
{
u32 reg_offset = gr_gpcs_gpccs_falcon_hwcfg_r() -
gr_fecs_falcon_hwcfg_r();
if (nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
nvgpu_writel(g, reg_offset +
gr_fecs_cpuctl_alias_r(),
gr_gpccs_cpuctl_startcpu_f(1U));
} else {
nvgpu_writel(g, gr_gpccs_dmactl_r(),
gr_gpccs_dmactl_require_ctx_f(0U));
nvgpu_writel(g, gr_gpccs_cpuctl_r(),
gr_gpccs_cpuctl_startcpu_f(1U));
}
}
void gm20b_gr_falcon_start_fecs(struct gk20a *g)
{
nvgpu_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0U), ~U32(0U));
nvgpu_writel(g, gr_fecs_ctxsw_mailbox_r(1U), 1U);
nvgpu_writel(g, gr_fecs_ctxsw_mailbox_clear_r(6U), 0xffffffffU);
nvgpu_writel(g, gr_fecs_cpuctl_alias_r(),
gr_fecs_cpuctl_startcpu_f(1U));
}
static void gm20b_gr_falcon_wait_for_fecs_arb_idle(struct gk20a *g)
{
int retries = FECS_ARB_CMD_TIMEOUT_MAX_US /
FECS_ARB_CMD_TIMEOUT_DEFAULT_US;
u32 val;
val = nvgpu_readl(g, gr_fecs_arb_ctx_cmd_r());
while ((gr_fecs_arb_ctx_cmd_cmd_v(val) != 0U) && (retries != 0)) {
nvgpu_udelay(FECS_ARB_CMD_TIMEOUT_DEFAULT_US);
retries--;
val = nvgpu_readl(g, gr_fecs_arb_ctx_cmd_r());
}
if (retries == 0) {
nvgpu_err(g, "arbiter cmd timeout, fecs arb ctx cmd: 0x%08x",
nvgpu_readl(g, gr_fecs_arb_ctx_cmd_r()));
}
retries = FECS_ARB_CMD_TIMEOUT_MAX_US /
FECS_ARB_CMD_TIMEOUT_DEFAULT_US;
while (((nvgpu_readl(g, gr_fecs_ctxsw_status_1_r()) &
gr_fecs_ctxsw_status_1_arb_busy_m()) != 0U) &&
(retries != 0)) {
nvgpu_udelay(FECS_ARB_CMD_TIMEOUT_DEFAULT_US);
retries--;
}
if (retries == 0) {
nvgpu_err(g,
"arbiter idle timeout, fecs ctxsw status: 0x%08x",
nvgpu_readl(g, gr_fecs_ctxsw_status_1_r()));
}
}
void gm20b_gr_falcon_bind_instblk(struct gk20a *g,
struct nvgpu_mem *mem, u64 inst_ptr)
{
u32 retries = FECS_ARB_CMD_TIMEOUT_MAX_US /
FECS_ARB_CMD_TIMEOUT_DEFAULT_US;
u32 inst_ptr_u32;
nvgpu_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0x0);
while (((nvgpu_readl(g, gr_fecs_ctxsw_status_1_r()) &
gr_fecs_ctxsw_status_1_arb_busy_m()) != 0U) &&
(retries != 0)) {
nvgpu_udelay(FECS_ARB_CMD_TIMEOUT_DEFAULT_US);
retries--;
}
if (retries == 0) {
nvgpu_err(g,
"arbiter idle timeout, status: %08x",
nvgpu_readl(g, gr_fecs_ctxsw_status_1_r()));
}
nvgpu_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0);
inst_ptr >>= 12;
BUG_ON(u64_hi32(inst_ptr) != 0U);
inst_ptr_u32 = (u32)inst_ptr;
nvgpu_writel(g, gr_fecs_new_ctx_r(),
gr_fecs_new_ctx_ptr_f(inst_ptr_u32) |
nvgpu_aperture_mask(g, mem,
gr_fecs_new_ctx_target_sys_mem_ncoh_f(),
gr_fecs_new_ctx_target_sys_mem_coh_f(),
gr_fecs_new_ctx_target_vid_mem_f()) |
gr_fecs_new_ctx_valid_m());
nvgpu_writel(g, gr_fecs_arb_ctx_ptr_r(),
gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr_u32) |
nvgpu_aperture_mask(g, mem,
gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(),
gr_fecs_arb_ctx_ptr_target_sys_mem_coh_f(),
gr_fecs_arb_ctx_ptr_target_vid_mem_f()));
nvgpu_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7);
/* Wait for arbiter command to complete */
gm20b_gr_falcon_wait_for_fecs_arb_idle(g);
nvgpu_writel(g, gr_fecs_current_ctx_r(),
gr_fecs_current_ctx_ptr_f(inst_ptr_u32) |
gr_fecs_current_ctx_target_m() |
gr_fecs_current_ctx_valid_m());
/* Send command to arbiter to flush */
nvgpu_writel(g, gr_fecs_arb_ctx_cmd_r(), gr_fecs_arb_ctx_cmd_cmd_s());
gm20b_gr_falcon_wait_for_fecs_arb_idle(g);
}
void gm20b_gr_falcon_load_ctxsw_ucode_header(struct gk20a *g,
u32 reg_offset, u32 boot_signature, u32 addr_code32,
u32 addr_data32, u32 code_size, u32 data_size)
{
nvgpu_writel(g, reg_offset + gr_fecs_dmactl_r(),
gr_fecs_dmactl_require_ctx_f(0));
/*
* Copy falcon bootloader header into dmem at offset 0.
* Configure dmem port 0 for auto-incrementing writes starting at dmem
* offset 0.
*/
nvgpu_writel(g, reg_offset + gr_fecs_dmemc_r(0),
gr_fecs_dmemc_offs_f(0) |
gr_fecs_dmemc_blk_f(0) |
gr_fecs_dmemc_aincw_f(1));
/* Write out the actual data */
switch (boot_signature) {
case FALCON_UCODE_SIG_T18X_GPCCS_WITH_RESERVED:
case FALCON_UCODE_SIG_T21X_FECS_WITH_DMEM_SIZE:
case FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED:
case FALCON_UCODE_SIG_T21X_GPCCS_WITH_RESERVED:
case FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED:
case FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED:
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
/* fallthrough */
case FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED:
case FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED:
case FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED:
case FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2:
case FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED:
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 4);
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0),
addr_code32);
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0),
code_size);
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0),
addr_data32);
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0),
data_size);
break;
case FALCON_UCODE_SIG_T12X_FECS_OLDER:
case FALCON_UCODE_SIG_T12X_GPCCS_OLDER:
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0),
addr_code32);
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0),
code_size);
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0),
addr_data32);
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0),
data_size);
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0),
addr_code32);
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
break;
default:
nvgpu_err(g,
"unknown falcon ucode boot signature 0x%08x"
" with reg_offset 0x%08x",
boot_signature, reg_offset);
BUG();
}
}
void gm20b_gr_falcon_load_ctxsw_ucode_boot(struct gk20a *g, u32 reg_offset,
u32 boot_entry, u32 addr_load32, u32 blocks, u32 dst)
{
u32 b;
/*
* Set the base FB address for the DMA transfer. Subtract off the 256
* byte IMEM block offset such that the relative FB and IMEM offsets
* match, allowing the IMEM tags to be properly created.
*/
nvgpu_writel(g, reg_offset + gr_fecs_dmatrfbase_r(),
(addr_load32 - (dst >> 8)));
for (b = 0; b < blocks; b++) {
/* Setup destination IMEM offset */
nvgpu_writel(g, reg_offset + gr_fecs_dmatrfmoffs_r(),
dst + (b << 8));
/* Setup source offset (relative to BASE) */
nvgpu_writel(g, reg_offset + gr_fecs_dmatrffboffs_r(),
dst + (b << 8));
nvgpu_writel(g, reg_offset + gr_fecs_dmatrfcmd_r(),
gr_fecs_dmatrfcmd_imem_f(0x01) |
gr_fecs_dmatrfcmd_write_f(0x00) |
gr_fecs_dmatrfcmd_size_f(0x06) |
gr_fecs_dmatrfcmd_ctxdma_f(0));
}
/* Specify the falcon boot vector */
nvgpu_writel(g, reg_offset + gr_fecs_bootvec_r(),
gr_fecs_bootvec_vec_f(boot_entry));
/* start the falcon immediately if PRIV security is disabled*/
if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
nvgpu_writel(g, reg_offset + gr_fecs_cpuctl_r(),
gr_fecs_cpuctl_startcpu_f(0x01));
}
}
u32 gm20b_gr_falcon_fecs_base_addr(void)
{
return gr_fecs_irqsset_r();

View File

@@ -32,5 +32,26 @@ u32 gm20b_gr_falcon_gpccs_base_addr(void);
void gm20b_gr_falcon_fecs_dump_stats(struct gk20a *g);
u32 gm20b_gr_falcon_get_fecs_ctx_state_store_major_rev_id(struct gk20a *g);
u32 gm20b_gr_falcon_get_fecs_ctxsw_mailbox_size(void);
void gm20b_gr_falcon_load_gpccs_dmem(struct gk20a *g,
const u32 *ucode_u32_data, u32 ucode_u32_size);
void gm20b_gr_falcon_load_fecs_dmem(struct gk20a *g,
const u32 *ucode_u32_data, u32 ucode_u32_size);
void gm20b_gr_falcon_load_gpccs_imem(struct gk20a *g,
const u32 *ucode_u32_data, u32 ucode_u32_size);
void gm20b_gr_falcon_load_fecs_imem(struct gk20a *g,
const u32 *ucode_u32_data, u32 ucode_u32_size);
void gm20b_gr_falcon_configure_fmodel(struct gk20a *g);
void gm20b_gr_falcon_start_ucode(struct gk20a *g);
void gm20b_gr_falcon_start_gpccs(struct gk20a *g);
void gm20b_gr_falcon_start_fecs(struct gk20a *g);
u32 gm20b_gr_falcon_get_gpccs_start_reg_offset(void);
void gm20b_gr_falcon_bind_instblk(struct gk20a *g,
struct nvgpu_mem *mem, u64 inst_ptr);
void gm20b_gr_falcon_load_ctxsw_ucode_header(struct gk20a *g,
u32 reg_offset, u32 boot_signature, u32 addr_code32,
u32 addr_data32, u32 code_size, u32 data_size);
void gm20b_gr_falcon_load_ctxsw_ucode_boot(struct gk20a *g,
u32 reg_offset, u32 boot_entry, u32 addr_load32, u32 blocks,
u32 dst);
#endif /* NVGPU_GR_FALCON_GM20B_H */

View File

@@ -285,11 +285,6 @@ struct gpu_ops {
u32 *num_ovr_perf_regs,
u32 **ovr_perf_regsr);
void (*set_hww_esr_report_mask)(struct gk20a *g);
void (*falcon_load_ucode)(struct gk20a *g,
u64 addr_base,
struct gk20a_ctxsw_ucode_segments *segments,
u32 reg_offset);
int (*load_ctxsw_ucode)(struct gk20a *g);
void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index);
int (*alloc_obj_ctx)(struct channel_gk20a *c,
u32 class_num, u32 flags);
@@ -574,9 +569,33 @@ struct gpu_ops {
u32 (*gpccs_base_addr)(void);
void (*dump_stats)(struct gk20a *g);
u32 (*fecs_ctxsw_mailbox_size)(void);
u32 (*get_fecs_ctx_state_store_major_rev_id)
(struct gk20a *g);
u32 (*get_fecs_ctx_state_store_major_rev_id)(
struct gk20a *g);
void (*load_gpccs_dmem)(struct gk20a *g,
const u32 *ucode_u32_data, u32 size);
void (*load_fecs_dmem)(struct gk20a *g,
const u32 *ucode_u32_data, u32 size);
void (*load_gpccs_imem)(struct gk20a *g,
const u32 *ucode_u32_data, u32 size);
void (*load_fecs_imem)(struct gk20a *g,
const u32 *ucode_u32_data, u32 size);
void (*configure_fmodel)(struct gk20a *g);
void (*start_ucode)(struct gk20a *g);
void (*start_gpccs)(struct gk20a *g);
void (*start_fecs)(struct gk20a *g);
u32 (*get_gpccs_start_reg_offset)(void);
void (*bind_instblk)(struct gk20a *g,
struct nvgpu_mem *mem, u64 inst_ptr);
void (*load_ctxsw_ucode_header)(struct gk20a *g,
u32 reg_offset, u32 boot_signature,
u32 addr_code32, u32 addr_data32,
u32 code_size, u32 data_size);
void (*load_ctxsw_ucode_boot)(struct gk20a *g,
u32 reg_offset, u32 boot_entry,
u32 addr_load32, u32 blocks, u32 dst);
int (*load_ctxsw_ucode)(struct gk20a *g);
} falcon;
#ifdef CONFIG_GK20A_CTXSW_TRACE
struct {
int (*init)(struct gk20a *g);

View File

@@ -27,6 +27,9 @@
struct gk20a;
int nvgpu_gr_falcon_init_ctxsw(struct gk20a *g);
int nvgpu_gr_falcon_init_ctxsw_ucode(struct gk20a *g);
int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g);
int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g);
#endif /* NVGPU_GR_SUBCTX_H */
#endif /* NVGPU_GR_FALCON_H */

View File

@@ -186,6 +186,7 @@
#include <nvgpu/gr/zbc.h>
#include <nvgpu/gr/fecs_trace.h>
#include <nvgpu/pmu/perf.h>
#include <nvgpu/gr/gr_falcon.h>
#include <nvgpu/hw/tu104/hw_proj_tu104.h>
#include <nvgpu/hw/tu104/hw_top_tu104.h>
@@ -410,8 +411,6 @@ static const struct gpu_ops tu104_ops = {
.get_sm_dsm_perf_regs = gv11b_gr_get_sm_dsm_perf_regs,
.get_sm_dsm_perf_ctrl_regs = gr_tu104_get_sm_dsm_perf_ctrl_regs,
.set_hww_esr_report_mask = gv11b_gr_set_hww_esr_report_mask,
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
.alloc_obj_ctx = gk20a_alloc_obj_ctx,
.is_tpc_addr = gr_gm20b_is_tpc_addr,
@@ -743,6 +742,23 @@ static const struct gpu_ops tu104_ops = {
gm20b_gr_falcon_get_fecs_ctxsw_mailbox_size,
.get_fecs_ctx_state_store_major_rev_id =
gm20b_gr_falcon_get_fecs_ctx_state_store_major_rev_id,
.load_gpccs_dmem = gm20b_gr_falcon_load_gpccs_dmem,
.load_fecs_dmem = gm20b_gr_falcon_load_fecs_dmem,
.load_gpccs_imem = gm20b_gr_falcon_load_gpccs_imem,
.load_fecs_imem = gm20b_gr_falcon_load_fecs_imem,
.configure_fmodel = gm20b_gr_falcon_configure_fmodel,
.start_ucode = gm20b_gr_falcon_start_ucode,
.start_gpccs = gm20b_gr_falcon_start_gpccs,
.start_fecs = gm20b_gr_falcon_start_fecs,
.get_gpccs_start_reg_offset =
gm20b_gr_falcon_get_gpccs_start_reg_offset,
.bind_instblk = gm20b_gr_falcon_bind_instblk,
.load_ctxsw_ucode_header =
gm20b_gr_falcon_load_ctxsw_ucode_header,
.load_ctxsw_ucode_boot =
gm20b_gr_falcon_load_ctxsw_ucode_boot,
.load_ctxsw_ucode =
nvgpu_gr_falcon_load_secure_ctxsw_ucode,
},
},
.fb = {
@@ -1465,7 +1481,8 @@ int tu104_init_hal(struct gk20a *g)
gops->cbc.ctrl = NULL;
gops->cbc.alloc_comptags = NULL;
gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
gops->gr.falcon.load_ctxsw_ucode =
nvgpu_gr_falcon_load_ctxsw_ucode;
/* Disable pmu pstate, as there is no pmu support */
nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, false);