mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: Add gm20b fecs/gpccs bootloader support
Add support for booting FECS and GPCCS via faster bootloader method. We leave this disabled until the bootloader binaries are checked in. Change-Id: I39df5d116f7a33486407518c743638b01923970d Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/413005
This commit is contained in:
committed by
Dan Willemsen
parent
d78dca61e0
commit
48f0b407f9
@@ -27,6 +27,7 @@ struct fifo_gk20a;
|
||||
struct channel_gk20a;
|
||||
struct gr_gk20a;
|
||||
struct sim_gk20a;
|
||||
struct gk20a_ctxsw_ucode_segments;
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/spinlock.h>
|
||||
@@ -121,6 +122,10 @@ struct gpu_ops {
|
||||
void (*set_hww_esr_report_mask)(struct gk20a *g);
|
||||
int (*setup_alpha_beta_tables)(struct gk20a *g,
|
||||
struct gr_gk20a *gr);
|
||||
int (*falcon_load_ucode)(struct gk20a *g,
|
||||
u64 addr_base,
|
||||
struct gk20a_ctxsw_ucode_segments *segments,
|
||||
u32 reg_offset);
|
||||
} gr;
|
||||
const char *name;
|
||||
struct {
|
||||
|
||||
@@ -2141,10 +2141,10 @@ static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g)
|
||||
|
||||
gr_gk20a_load_falcon_bind_instblk(g);
|
||||
|
||||
gr_gk20a_load_ctxsw_ucode_segments(g, addr_base,
|
||||
g->ops.gr.falcon_load_ucode(g, addr_base,
|
||||
&g->ctxsw_ucode_info.fecs, 0);
|
||||
|
||||
gr_gk20a_load_ctxsw_ucode_segments(g, addr_base,
|
||||
g->ops.gr.falcon_load_ucode(g, addr_base,
|
||||
&g->ctxsw_ucode_info.gpccs,
|
||||
gr_gpcs_gpccs_falcon_hwcfg_r() -
|
||||
gr_fecs_falcon_hwcfg_r());
|
||||
@@ -6845,4 +6845,5 @@ void gk20a_init_gr(struct gpu_ops *gops)
|
||||
gops->gr.init_fs_state = gr_gk20a_ctx_state_floorsweep;
|
||||
gops->gr.set_hww_esr_report_mask = gr_gk20a_set_hww_esr_report_mask;
|
||||
gops->gr.setup_alpha_beta_tables = gr_gk20a_setup_alpha_beta_tables;
|
||||
gops->gr.falcon_load_ucode = gr_gk20a_load_ctxsw_ucode_segments;
|
||||
}
|
||||
|
||||
@@ -570,6 +570,87 @@ static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
|
||||
struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
|
||||
{
|
||||
u32 addr_code32;
|
||||
u32 addr_data32;
|
||||
u32 addr_load32;
|
||||
u32 dst = 0;
|
||||
u32 blocks;
|
||||
u32 b;
|
||||
|
||||
addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8);
|
||||
addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8);
|
||||
addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8);
|
||||
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(),
|
||||
gr_fecs_dmactl_require_ctx_f(0));
|
||||
|
||||
/*
|
||||
* Copy falcon bootloader header into dmem at offset 0.
|
||||
* Configure dmem port 0 for auto-incrementing writes starting at dmem
|
||||
* offset 0.
|
||||
*/
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemc_r(0),
|
||||
gr_fecs_dmemc_offs_f(0) |
|
||||
gr_fecs_dmemc_blk_f(0) |
|
||||
gr_fecs_dmemc_aincw_f(1));
|
||||
|
||||
/* Write out the actual data */
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 4);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_code32);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->code.size);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_data32);
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->data.size);
|
||||
|
||||
blocks = ((segments->boot.size + 0xFF) & ~0xFF) >> 8;
|
||||
|
||||
/*
|
||||
* Set the base FB address for the DMA transfer. Subtract off the 256
|
||||
* byte IMEM block offset such that the relative FB and IMEM offsets
|
||||
* match, allowing the IMEM tags to be properly created.
|
||||
*/
|
||||
|
||||
dst = segments->boot_imem_offset;
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmatrfbase_r(),
|
||||
(addr_load32 - (dst >> 8)));
|
||||
|
||||
for (b = 0; b < blocks; b++) {
|
||||
/* Setup destination IMEM offset */
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmatrfmoffs_r(),
|
||||
dst + (b << 8));
|
||||
|
||||
/* Setup source offset (relative to BASE) */
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmatrffboffs_r(),
|
||||
dst + (b << 8));
|
||||
|
||||
gk20a_writel(g, reg_offset + gr_fecs_dmatrfcmd_r(),
|
||||
gr_fecs_dmatrfcmd_imem_f(0x01) |
|
||||
gr_fecs_dmatrfcmd_write_f(0x00) |
|
||||
gr_fecs_dmatrfcmd_size_f(0x06) |
|
||||
gr_fecs_dmatrfcmd_ctxdma_f(0));
|
||||
}
|
||||
|
||||
/* Specify the falcon boot vector */
|
||||
gk20a_writel(g, reg_offset + gr_fecs_bootvec_r(),
|
||||
gr_fecs_bootvec_vec_f(segments->boot_entry));
|
||||
|
||||
/* Write to CPUCTL to start the falcon */
|
||||
gk20a_writel(g, reg_offset + gr_fecs_cpuctl_r(),
|
||||
gr_fecs_cpuctl_startcpu_f(0x01));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void gm20b_init_gr(struct gpu_ops *gops)
|
||||
{
|
||||
gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;
|
||||
@@ -590,4 +671,5 @@ void gm20b_init_gr(struct gpu_ops *gops)
|
||||
gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs;
|
||||
gops->gr.init_fs_state = gr_gm20b_ctx_state_floorsweep;
|
||||
gops->gr.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask;
|
||||
gops->gr.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user