gpu: nvgpu: Add gm20b fecs/gpccs bootloader support

Add support for booting FECS and GPCCS via faster bootloader method.
We leave this disabled until the bootloader binaries are checked in.

Change-Id: I39df5d116f7a33486407518c743638b01923970d
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/413005
This commit is contained in:
Terje Bergstrom
2014-05-22 09:53:51 +03:00
committed by Dan Willemsen
parent d78dca61e0
commit 48f0b407f9
3 changed files with 90 additions and 2 deletions

View File

@@ -27,6 +27,7 @@ struct fifo_gk20a;
struct channel_gk20a;
struct gr_gk20a;
struct sim_gk20a;
struct gk20a_ctxsw_ucode_segments;
#include <linux/sched.h>
#include <linux/spinlock.h>
@@ -121,6 +122,10 @@ struct gpu_ops {
void (*set_hww_esr_report_mask)(struct gk20a *g);
int (*setup_alpha_beta_tables)(struct gk20a *g,
struct gr_gk20a *gr);
int (*falcon_load_ucode)(struct gk20a *g,
u64 addr_base,
struct gk20a_ctxsw_ucode_segments *segments,
u32 reg_offset);
} gr;
const char *name;
struct {

View File

@@ -2141,10 +2141,10 @@ static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g)
gr_gk20a_load_falcon_bind_instblk(g);
gr_gk20a_load_ctxsw_ucode_segments(g, addr_base,
g->ops.gr.falcon_load_ucode(g, addr_base,
&g->ctxsw_ucode_info.fecs, 0);
gr_gk20a_load_ctxsw_ucode_segments(g, addr_base,
g->ops.gr.falcon_load_ucode(g, addr_base,
&g->ctxsw_ucode_info.gpccs,
gr_gpcs_gpccs_falcon_hwcfg_r() -
gr_fecs_falcon_hwcfg_r());
@@ -6845,4 +6845,5 @@ void gk20a_init_gr(struct gpu_ops *gops)
gops->gr.init_fs_state = gr_gk20a_ctx_state_floorsweep;
gops->gr.set_hww_esr_report_mask = gr_gk20a_set_hww_esr_report_mask;
gops->gr.setup_alpha_beta_tables = gr_gk20a_setup_alpha_beta_tables;
gops->gr.falcon_load_ucode = gr_gk20a_load_ctxsw_ucode_segments;
}

View File

@@ -570,6 +570,87 @@ static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g)
return 0;
}
static int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
{
u32 addr_code32;
u32 addr_data32;
u32 addr_load32;
u32 dst = 0;
u32 blocks;
u32 b;
addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8);
addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8);
addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8);
gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(),
gr_fecs_dmactl_require_ctx_f(0));
/*
* Copy falcon bootloader header into dmem at offset 0.
* Configure dmem port 0 for auto-incrementing writes starting at dmem
* offset 0.
*/
gk20a_writel(g, reg_offset + gr_fecs_dmemc_r(0),
gr_fecs_dmemc_offs_f(0) |
gr_fecs_dmemc_blk_f(0) |
gr_fecs_dmemc_aincw_f(1));
/* Write out the actual data */
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 4);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_code32);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->code.size);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_data32);
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->data.size);
blocks = ((segments->boot.size + 0xFF) & ~0xFF) >> 8;
/*
* Set the base FB address for the DMA transfer. Subtract off the 256
* byte IMEM block offset such that the relative FB and IMEM offsets
* match, allowing the IMEM tags to be properly created.
*/
dst = segments->boot_imem_offset;
gk20a_writel(g, reg_offset + gr_fecs_dmatrfbase_r(),
(addr_load32 - (dst >> 8)));
for (b = 0; b < blocks; b++) {
/* Setup destination IMEM offset */
gk20a_writel(g, reg_offset + gr_fecs_dmatrfmoffs_r(),
dst + (b << 8));
/* Setup source offset (relative to BASE) */
gk20a_writel(g, reg_offset + gr_fecs_dmatrffboffs_r(),
dst + (b << 8));
gk20a_writel(g, reg_offset + gr_fecs_dmatrfcmd_r(),
gr_fecs_dmatrfcmd_imem_f(0x01) |
gr_fecs_dmatrfcmd_write_f(0x00) |
gr_fecs_dmatrfcmd_size_f(0x06) |
gr_fecs_dmatrfcmd_ctxdma_f(0));
}
/* Specify the falcon boot vector */
gk20a_writel(g, reg_offset + gr_fecs_bootvec_r(),
gr_fecs_bootvec_vec_f(segments->boot_entry));
/* Write to CPUCTL to start the falcon */
gk20a_writel(g, reg_offset + gr_fecs_cpuctl_r(),
gr_fecs_cpuctl_startcpu_f(0x01));
return 0;
}
void gm20b_init_gr(struct gpu_ops *gops)
{
gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;
@@ -590,4 +671,5 @@ void gm20b_init_gr(struct gpu_ops *gops)
gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs;
gops->gr.init_fs_state = gr_gm20b_ctx_state_floorsweep;
gops->gr.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask;
gops->gr.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments;
}