gpu: nvgpu: Support GPUs with no physical mode

Support GPUs which cannot choose between SMMU and physical
addressing.

Change-Id: If3256fa1bc795a84d039ad3aa63ebdccf5cc0afb
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1120469
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
This commit is contained in:
Terje Bergstrom
2016-03-31 13:33:02 -07:00
parent 2adf9164d9
commit 9b5427da37
12 changed files with 59 additions and 42 deletions

View File

@@ -333,33 +333,32 @@ static int channel_gk20a_setup_userd(struct channel_gk20a *c)
return 0;
}
static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
static void channel_gk20a_bind(struct channel_gk20a *c)
{
struct gk20a *g = ch_gk20a->g;
struct gk20a *g = c->g;
struct fifo_gk20a *f = &g->fifo;
struct fifo_engine_info_gk20a *engine_info =
f->engine_info + ENGINE_GR_GK20A;
u32 inst_ptr = gk20a_mem_phys(&ch_gk20a->inst_block)
u32 inst_ptr = gk20a_mm_inst_block_addr(g, &c->inst_block)
>> ram_in_base_shift_v();
gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
ch_gk20a->hw_chid, inst_ptr);
c->hw_chid, inst_ptr);
ch_gk20a->bound = true;
c->bound = true;
gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
(gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
gk20a_writel(g, ccsr_channel_r(c->hw_chid),
(gk20a_readl(g, ccsr_channel_r(c->hw_chid)) &
~ccsr_channel_runlist_f(~0)) |
ccsr_channel_runlist_f(engine_info->runlist_id));
gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
gk20a_writel(g, ccsr_channel_inst_r(c->hw_chid),
ccsr_channel_inst_ptr_f(inst_ptr) |
ccsr_channel_inst_target_vid_mem_f() |
ccsr_channel_inst_bind_true_f());
gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
(gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
gk20a_writel(g, ccsr_channel_r(c->hw_chid),
(gk20a_readl(g, ccsr_channel_r(c->hw_chid)) &
~ccsr_channel_enable_set_f(~0)) |
ccsr_channel_enable_set_true_f());
}
@@ -402,7 +401,7 @@ int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
return err;
gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
ch->hw_chid, (u64)gk20a_mem_phys(&ch->inst_block));
ch->hw_chid, gk20a_mm_inst_block_addr(g, &ch->inst_block));
gk20a_dbg_fn("done");
return 0;

View File

@@ -281,7 +281,7 @@ static int css_hw_enable_snapshot(struct gr_gk20a *gr, u32 snapshot_size)
gk20a_writel(g, perf_pmasys_outsize_r(), snapshot_size);
/* this field is aligned to 4K */
inst_pa_page = gk20a_mem_phys(&g->mm.hwpm.inst_block) >> 12;
inst_pa_page = gk20a_mm_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12;
/* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
* should be written last */

View File

@@ -1046,7 +1046,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
gk20a_writel(g, perf_pmasys_outsize_r(), virt_size);
/* this field is aligned to 4K */
inst_pa_page = gk20a_mem_phys(&g->mm.hwpm.inst_block) >> 12;
inst_pa_page = gk20a_mm_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12;
/* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
* should be written last */

View File

@@ -624,12 +624,13 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g,
gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
"hw_chid=%d context_ptr=%x inst_block=%llx",
ch->hw_chid, context_ptr, gk20a_mem_phys(&ch->inst_block));
ch->hw_chid, context_ptr,
gk20a_mm_inst_block_addr(g, &ch->inst_block));
if (!trace)
return -ENOMEM;
pa = gk20a_mem_phys(&trace->trace_buf);
pa = gk20a_mm_inst_block_addr(g, &trace->trace_buf);
if (!pa)
return -ENOMEM;

View File

@@ -654,17 +654,21 @@ int gk20a_init_fifo_support(struct gk20a *g)
static struct channel_gk20a *
channel_from_inst_ptr(struct fifo_gk20a *f, u64 inst_ptr)
{
struct gk20a *g = f->g;
int ci;
if (unlikely(!f->channel))
return NULL;
for (ci = 0; ci < f->num_channels; ci++) {
struct channel_gk20a *ch = gk20a_channel_get(&f->channel[ci]);
struct channel_gk20a *ch;
u64 ch_inst_ptr;
ch = gk20a_channel_get(&f->channel[ci]);
/* only alive channels are searched */
if (!ch)
continue;
if (ch->inst_block.cpu_va &&
(inst_ptr == gk20a_mem_phys(&ch->inst_block)))
ch_inst_ptr = gk20a_mm_inst_block_addr(g, &ch->inst_block);
if (ch->inst_block.cpu_va && inst_ptr == ch_inst_ptr)
return ch;
gk20a_channel_put(ch);
@@ -1087,10 +1091,10 @@ static bool gk20a_fifo_handle_mmu_fault(
ch->hw_chid);
}
} else if (f.inst_ptr ==
gk20a_mem_phys(&g->mm.bar1.inst_block)) {
gk20a_mm_inst_block_addr(g, &g->mm.bar1.inst_block)) {
gk20a_err(dev_from_gk20a(g), "mmu fault from bar1");
} else if (f.inst_ptr ==
gk20a_mem_phys(&g->mm.pmu.inst_block)) {
gk20a_mm_inst_block_addr(g, &g->mm.pmu.inst_block)) {
gk20a_err(dev_from_gk20a(g), "mmu fault from pmu");
} else
gk20a_err(dev_from_gk20a(g), "couldn't locate channel for mmu fault");

View File

@@ -1542,6 +1542,7 @@ static int gk20a_probe(struct platform_device *dev)
gk20a->mm.ltc_enabled_debug = true;
gk20a->mm.bypass_smmu = platform->bypass_smmu;
gk20a->mm.disable_bigpage = platform->disable_bigpage;
gk20a->mm.has_physical_mode = true;
gk20a->debugfs_ltc_enabled =
debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR,
platform->debugfs,

View File

@@ -729,7 +729,7 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g,
static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
struct channel_gk20a *c)
{
u32 inst_base_ptr = u64_lo32(gk20a_mem_phys(&c->inst_block)
u32 inst_base_ptr = u64_lo32(gk20a_mm_inst_block_addr(g, &c->inst_block)
>> ram_in_base_shift_v());
u32 ret;
@@ -1408,7 +1408,7 @@ static int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type)
int ret;
u32 inst_base_ptr =
u64_lo32(gk20a_mem_phys(&c->inst_block)
u64_lo32(gk20a_mm_inst_block_addr(g, &c->inst_block)
>> ram_in_base_shift_v());
@@ -1875,7 +1875,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
if (tegra_platform_is_linsim()) {
u32 inst_base_ptr =
u64_lo32(gk20a_mem_phys(&c->inst_block)
u64_lo32(gk20a_mm_inst_block_addr(g, &c->inst_block)
>> ram_in_base_shift_v());
ret = gr_gk20a_submit_fecs_method_op(g,
@@ -2103,7 +2103,7 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g)
gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0);
inst_ptr = gk20a_mem_phys(&ucode_info->inst_blk_desc);
inst_ptr = gk20a_mm_inst_block_addr(g, &ucode_info->inst_blk_desc);
gk20a_writel(g, gr_fecs_new_ctx_r(),
gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) |
gr_fecs_new_ctx_target_m() |
@@ -4712,7 +4712,7 @@ static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g)
err = gr_gk20a_fecs_set_reglist_bind_inst(g,
gk20a_mem_phys(&mm->pmu.inst_block));
gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block));
if (err) {
gk20a_err(dev_from_gk20a(g),
"fail to bind pmu inst to gr");
@@ -4991,7 +4991,7 @@ int gk20a_gr_reset(struct gk20a *g)
}
err = gr_gk20a_fecs_set_reglist_bind_inst(g,
gk20a_mem_phys(&g->mm.pmu.inst_block));
gk20a_mm_inst_block_addr(g, &g->mm.pmu.inst_block));
if (err) {
gk20a_err(dev_from_gk20a(g),
"fail to bind pmu inst to gr");
@@ -5372,7 +5372,7 @@ static struct channel_gk20a *gk20a_gr_get_channel_from_ctx(
if (!gk20a_channel_get(ch))
continue;
if ((u32)(gk20a_mem_phys(&ch->inst_block) >>
if ((u32)(gk20a_mm_inst_block_addr(g, &ch->inst_block) >>
ram_in_base_shift_v()) ==
gr_fecs_current_ctx_ptr_v(curr_ctx)) {
tsgid = ch->tsgid;

View File

@@ -482,7 +482,7 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
{
struct mm_gk20a *mm = &g->mm;
struct mem_desc *inst_block = &mm->bar1.inst_block;
phys_addr_t inst_pa = gk20a_mem_phys(inst_block);
u64 inst_pa = gk20a_mm_inst_block_addr(g, inst_block);
int err;
gk20a_dbg_fn("");
@@ -2249,7 +2249,7 @@ void gk20a_free_sgtable(struct sg_table **sgt)
u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova)
{
if (!device_is_iommuable(dev_from_gk20a(g)))
if (!device_is_iommuable(dev_from_gk20a(g)) || !g->mm.has_physical_mode)
return iova;
else
return iova | 1ULL << g->ops.mm.get_physical_addr_bits(g);
@@ -3382,6 +3382,17 @@ void gk20a_free_inst_block(struct gk20a *g, struct mem_desc *inst_block)
gk20a_gmmu_free(g, inst_block);
}
u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct mem_desc *inst_block)
{
u64 addr;
if (g->mm.has_physical_mode)
addr = gk20a_mem_phys(inst_block);
else
addr = gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(inst_block->sgt->sgl));
return addr;
}
static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
{
int err;
@@ -3484,11 +3495,10 @@ void gk20a_init_inst_block(struct mem_desc *inst_block, struct vm_gk20a *vm,
{
struct gk20a *g = gk20a_from_vm(vm);
u64 pde_addr = g->ops.mm.get_iova_addr(g, vm->pdb.sgt->sgl, 0);
phys_addr_t inst_pa = gk20a_mem_phys(inst_block);
void *inst_ptr = inst_block->cpu_va;
gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p",
(u64)inst_pa, inst_ptr);
gk20a_mm_inst_block_addr(g, inst_block), inst_ptr);
gk20a_dbg_info("pde pa=0x%llx", (u64)pde_addr);

View File

@@ -346,6 +346,7 @@ struct mm_gk20a {
#endif
u32 bypass_smmu;
u32 disable_bigpage;
bool has_physical_mode;
};
int gk20a_mm_init(struct mm_gk20a *mm);
@@ -420,6 +421,7 @@ int gk20a_alloc_inst_block(struct gk20a *g, struct mem_desc *inst_block);
void gk20a_free_inst_block(struct gk20a *g, struct mem_desc *inst_block);
void gk20a_init_inst_block(struct mem_desc *inst_block, struct vm_gk20a *vm,
u32 big_page_size);
u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct mem_desc *mem);
void gk20a_mm_dump_vm(struct vm_gk20a *vm,
u64 va_begin, u64 va_end, char *label);

View File

@@ -1827,7 +1827,7 @@ int pmu_bootstrap(struct pmu_gk20a *pmu)
pwr_falcon_itfen_ctxen_enable_f());
gk20a_writel(g, pwr_pmu_new_instblk_r(),
pwr_pmu_new_instblk_ptr_f(
gk20a_mem_phys(&mm->pmu.inst_block) >> 12) |
gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
pwr_pmu_new_instblk_valid_f(1) |
pwr_pmu_new_instblk_target_sys_coh_f());

View File

@@ -1169,7 +1169,7 @@ static int bl_bootstrap(struct pmu_gk20a *pmu,
pwr_falcon_itfen_ctxen_enable_f());
gk20a_writel(g, pwr_pmu_new_instblk_r(),
pwr_pmu_new_instblk_ptr_f(
gk20a_mem_phys(&mm->pmu.inst_block) >> 12) |
gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
pwr_pmu_new_instblk_valid_f(1) |
pwr_pmu_new_instblk_target_sys_coh_f());

View File

@@ -20,25 +20,25 @@
#include "hw_ram_gm20b.h"
#include "hw_fifo_gm20b.h"
static void channel_gm20b_bind(struct channel_gk20a *ch_gk20a)
static void channel_gm20b_bind(struct channel_gk20a *c)
{
struct gk20a *g = ch_gk20a->g;
struct gk20a *g = c->g;
u32 inst_ptr = gk20a_mem_phys(&ch_gk20a->inst_block)
u32 inst_ptr = gk20a_mm_inst_block_addr(g, &c->inst_block)
>> ram_in_base_shift_v();
gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
ch_gk20a->hw_chid, inst_ptr);
c->hw_chid, inst_ptr);
ch_gk20a->bound = true;
c->bound = true;
gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
gk20a_writel(g, ccsr_channel_inst_r(c->hw_chid),
ccsr_channel_inst_ptr_f(inst_ptr) |
ccsr_channel_inst_target_vid_mem_f() |
ccsr_channel_inst_bind_true_f());
gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
(gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
gk20a_writel(g, ccsr_channel_r(c->hw_chid),
(gk20a_readl(g, ccsr_channel_r(c->hw_chid)) &
~ccsr_channel_enable_set_f(~0)) |
ccsr_channel_enable_set_true_f());
}