gpu: nvgpu: initial support for vidmem apertures

add gk20a_aperture_mask() for memory target selection now that buffers
can actually be allocated from vidmem, and use it in all cases that have
a mem_desc available.

Jira DNVGPU-76

Change-Id: I4353cdc6e1e79488f0875581cfaf2a5cfb8c976a
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1169306
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
Konsta Holtta
2016-06-17 15:51:02 +03:00
committed by Terje Bergstrom
parent cd5a1dc315
commit e12c5c8594
9 changed files with 105 additions and 82 deletions

View File

@@ -143,8 +143,8 @@ static int channel_gk20a_commit_userd(struct channel_gk20a *c)
gk20a_mem_wr32(g, &c->inst_block, gk20a_mem_wr32(g, &c->inst_block,
ram_in_ramfc_w() + ram_fc_userd_w(), ram_in_ramfc_w() + ram_fc_userd_w(),
(g->mm.vidmem_is_vidmem ? gk20a_aperture_mask(g, &g->fifo.userd,
pbdma_userd_target_sys_mem_ncoh_f() : pbdma_userd_target_sys_mem_ncoh_f(),
pbdma_userd_target_vid_mem_f()) | pbdma_userd_target_vid_mem_f()) |
pbdma_userd_addr_f(addr_lo)); pbdma_userd_addr_f(addr_lo));
@@ -360,8 +360,8 @@ static void channel_gk20a_bind(struct channel_gk20a *c)
gk20a_writel(g, ccsr_channel_inst_r(c->hw_chid), gk20a_writel(g, ccsr_channel_inst_r(c->hw_chid),
ccsr_channel_inst_ptr_f(inst_ptr) | ccsr_channel_inst_ptr_f(inst_ptr) |
(g->mm.vidmem_is_vidmem ? gk20a_aperture_mask(g, &c->inst_block,
ccsr_channel_inst_target_sys_mem_ncoh_f() : ccsr_channel_inst_target_sys_mem_ncoh_f(),
ccsr_channel_inst_target_vid_mem_f()) | ccsr_channel_inst_target_vid_mem_f()) |
ccsr_channel_inst_bind_true_f()); ccsr_channel_inst_bind_true_f());

View File

@@ -2751,8 +2751,8 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
if (count != 0) { if (count != 0) {
gk20a_writel(g, fifo_runlist_base_r(), gk20a_writel(g, fifo_runlist_base_r(),
fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) | fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) |
(g->mm.vidmem_is_vidmem ? gk20a_aperture_mask(g, &runlist->mem[new_buf],
fifo_runlist_base_target_sys_mem_ncoh_f() : fifo_runlist_base_target_sys_mem_ncoh_f(),
fifo_runlist_base_target_vid_mem_f())); fifo_runlist_base_target_vid_mem_f()));
} }

View File

@@ -521,11 +521,11 @@ struct gpu_ops {
void (*remove_bar2_vm)(struct gk20a *g); void (*remove_bar2_vm)(struct gk20a *g);
const struct gk20a_mmu_level * const struct gk20a_mmu_level *
(*get_mmu_levels)(struct gk20a *g, u32 big_page_size); (*get_mmu_levels)(struct gk20a *g, u32 big_page_size);
void (*init_pdb)(struct gk20a *g, struct mem_desc *mem, void (*init_pdb)(struct gk20a *g, struct mem_desc *inst_block,
u64 pdb_addr); struct vm_gk20a *vm);
u64 (*get_iova_addr)(struct gk20a *g, struct scatterlist *sgl, u64 (*get_iova_addr)(struct gk20a *g, struct scatterlist *sgl,
u32 flags); u32 flags);
int (*bar1_bind)(struct gk20a *g, u64 bar1_iova); int (*bar1_bind)(struct gk20a *g, struct mem_desc *bar1_inst);
size_t (*get_vidmem_size)(struct gk20a *g); size_t (*get_vidmem_size)(struct gk20a *g);
} mm; } mm;
struct { struct {

View File

@@ -674,11 +674,24 @@ void gr_gk20a_ctx_patch_write(struct gk20a *g,
} }
} }
static u32 fecs_current_ctx_data(struct gk20a *g, struct mem_desc *inst_block)
{
u32 ptr = u64_lo32(gk20a_mm_inst_block_addr(g, inst_block)
>> ram_in_base_shift_v());
u32 aperture = gk20a_aperture_mask(g, inst_block,
gr_fecs_current_ctx_target_sys_mem_ncoh_f(),
gr_fecs_current_ctx_target_vid_mem_f());
return gr_fecs_current_ctx_ptr_f(ptr) | aperture |
gr_fecs_current_ctx_valid_f(1);
}
static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
struct channel_gk20a *c) struct channel_gk20a *c)
{ {
u32 inst_base_ptr = u64_lo32(gk20a_mm_inst_block_addr(g, &c->inst_block) u32 inst_base_ptr = u64_lo32(gk20a_mm_inst_block_addr(g, &c->inst_block)
>> ram_in_base_shift_v()); >> ram_in_base_shift_v());
u32 data = fecs_current_ctx_data(g, &c->inst_block);
u32 ret; u32 ret;
gk20a_dbg_info("bind channel %d inst ptr 0x%08x", gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
@@ -687,11 +700,7 @@ static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
ret = gr_gk20a_submit_fecs_method_op(g, ret = gr_gk20a_submit_fecs_method_op(g,
(struct fecs_method_op_gk20a) { (struct fecs_method_op_gk20a) {
.method.addr = gr_fecs_method_push_adr_bind_pointer_v(), .method.addr = gr_fecs_method_push_adr_bind_pointer_v(),
.method.data = (gr_fecs_current_ctx_ptr_f(inst_base_ptr) | .method.data = data,
(g->mm.vidmem_is_vidmem ?
gr_fecs_current_ctx_target_sys_mem_ncoh_f() :
gr_fecs_current_ctx_target_vid_mem_f()) |
gr_fecs_current_ctx_valid_f(1)),
.mailbox = { .id = 0, .data = 0, .mailbox = { .id = 0, .data = 0,
.clr = 0x30, .clr = 0x30,
.ret = NULL, .ret = NULL,
@@ -1392,21 +1401,12 @@ static int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type)
struct gk20a *g = c->g; struct gk20a *g = c->g;
int ret; int ret;
u32 inst_base_ptr =
u64_lo32(gk20a_mm_inst_block_addr(g, &c->inst_block)
>> ram_in_base_shift_v());
gk20a_dbg_fn(""); gk20a_dbg_fn("");
ret = gr_gk20a_submit_fecs_method_op(g, ret = gr_gk20a_submit_fecs_method_op(g,
(struct fecs_method_op_gk20a) { (struct fecs_method_op_gk20a) {
.method.addr = save_type, .method.addr = save_type,
.method.data = (gr_fecs_current_ctx_ptr_f(inst_base_ptr) | .method.data = fecs_current_ctx_data(g, &c->inst_block),
(g->mm.vidmem_is_vidmem ?
gr_fecs_current_ctx_target_sys_mem_ncoh_f() :
gr_fecs_current_ctx_target_vid_mem_f()) |
gr_fecs_current_ctx_valid_f(1)),
.mailbox = {.id = 0, .data = 0, .clr = 3, .ret = NULL, .mailbox = {.id = 0, .data = 0, .clr = 3, .ret = NULL,
.ok = 1, .fail = 2, .ok = 1, .fail = 2,
}, },
@@ -1987,18 +1987,11 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
gk20a_mem_end(g, mem); gk20a_mem_end(g, mem);
if (tegra_platform_is_linsim()) { if (tegra_platform_is_linsim()) {
u32 inst_base_ptr = u32 mdata = fecs_current_ctx_data(g, &c->inst_block);
u64_lo32(gk20a_mm_inst_block_addr(g, &c->inst_block)
>> ram_in_base_shift_v());
ret = gr_gk20a_submit_fecs_method_op(g, ret = gr_gk20a_submit_fecs_method_op(g,
(struct fecs_method_op_gk20a) { (struct fecs_method_op_gk20a) {
.method.data = .method.data = mdata,
(gr_fecs_current_ctx_ptr_f(inst_base_ptr) |
(g->mm.vidmem_is_vidmem ?
gr_fecs_current_ctx_target_sys_mem_ncoh_f() :
gr_fecs_current_ctx_target_vid_mem_f()) |
gr_fecs_current_ctx_valid_f(1)),
.method.addr = .method.addr =
gr_fecs_method_push_adr_restore_golden_v(), gr_fecs_method_push_adr_restore_golden_v(),
.mailbox = { .mailbox = {
@@ -4507,8 +4500,8 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
addr >>= fb_mmu_debug_wr_addr_alignment_v(); addr >>= fb_mmu_debug_wr_addr_alignment_v();
gk20a_writel(g, fb_mmu_debug_wr_r(), gk20a_writel(g, fb_mmu_debug_wr_r(),
(g->mm.vidmem_is_vidmem ? gk20a_aperture_mask(g, &gr->mmu_wr_mem,
fb_mmu_debug_wr_aperture_sys_mem_ncoh_f() : fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(),
fb_mmu_debug_wr_aperture_vid_mem_f()) | fb_mmu_debug_wr_aperture_vid_mem_f()) |
fb_mmu_debug_wr_vol_false_f() | fb_mmu_debug_wr_vol_false_f() |
fb_mmu_debug_wr_addr_f(addr)); fb_mmu_debug_wr_addr_f(addr));
@@ -4517,8 +4510,8 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
addr >>= fb_mmu_debug_rd_addr_alignment_v(); addr >>= fb_mmu_debug_rd_addr_alignment_v();
gk20a_writel(g, fb_mmu_debug_rd_r(), gk20a_writel(g, fb_mmu_debug_rd_r(),
(g->mm.vidmem_is_vidmem ? gk20a_aperture_mask(g, &gr->mmu_rd_mem,
fb_mmu_debug_wr_aperture_sys_mem_ncoh_f() : fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(),
fb_mmu_debug_rd_aperture_vid_mem_f()) | fb_mmu_debug_rd_aperture_vid_mem_f()) |
fb_mmu_debug_rd_vol_false_f() | fb_mmu_debug_rd_vol_false_f() |
fb_mmu_debug_rd_addr_f(addr)); fb_mmu_debug_rd_addr_f(addr));
@@ -4966,8 +4959,7 @@ static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g)
} }
err = gr_gk20a_fecs_set_reglist_bind_inst(g, err = gr_gk20a_fecs_set_reglist_bind_inst(g, &mm->pmu.inst_block);
gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block));
if (err) { if (err) {
gk20a_err(dev_from_gk20a(g), gk20a_err(dev_from_gk20a(g),
"fail to bind pmu inst to gr"); "fail to bind pmu inst to gr");
@@ -5245,8 +5237,7 @@ int gk20a_gr_reset(struct gk20a *g)
return err; return err;
} }
err = gr_gk20a_fecs_set_reglist_bind_inst(g, err = gr_gk20a_fecs_set_reglist_bind_inst(g, &g->mm.pmu.inst_block);
gk20a_mm_inst_block_addr(g, &g->mm.pmu.inst_block));
if (err) { if (err) {
gk20a_err(dev_from_gk20a(g), gk20a_err(dev_from_gk20a(g),
"fail to bind pmu inst to gr"); "fail to bind pmu inst to gr");
@@ -6346,16 +6337,15 @@ int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size)
.mailbox.fail = 0}, false); .mailbox.fail = 0}, false);
} }
int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr) int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g,
struct mem_desc *inst_block)
{ {
u32 data = fecs_current_ctx_data(g, inst_block);
return gr_gk20a_submit_fecs_method_op(g, return gr_gk20a_submit_fecs_method_op(g,
(struct fecs_method_op_gk20a){ (struct fecs_method_op_gk20a){
.mailbox.id = 4, .mailbox.id = 4,
.mailbox.data = (gr_fecs_current_ctx_ptr_f(addr >> 12) | .mailbox.data = data,
gr_fecs_current_ctx_valid_f(1) |
(g->mm.vidmem_is_vidmem ?
gr_fecs_current_ctx_target_sys_mem_ncoh_f() :
gr_fecs_current_ctx_target_vid_mem_f())),
.mailbox.clr = ~0, .mailbox.clr = ~0,
.method.data = 1, .method.data = 1,
.method.addr = gr_fecs_method_push_adr_set_reglist_bind_instance_v(), .method.addr = gr_fecs_method_push_adr_set_reglist_bind_instance_v(),

View File

@@ -476,7 +476,8 @@ int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr);
/* pmu */ /* pmu */
int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size); int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size);
int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr); int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g,
struct mem_desc *inst_block);
int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va); int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va);
void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine); void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine);

View File

@@ -92,10 +92,10 @@ static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w)
u32 hi = (u32)((addr & ~(u64)0xfffff) u32 hi = (u32)((addr & ~(u64)0xfffff)
>> bus_bar0_window_target_bar0_window_base_shift_v()); >> bus_bar0_window_target_bar0_window_base_shift_v());
u32 lo = (u32)(addr & 0xfffff); u32 lo = (u32)(addr & 0xfffff);
u32 win = (g->mm.vidmem_is_vidmem && mem->aperture == APERTURE_SYSMEM ? u32 win = gk20a_aperture_mask(g, mem,
bus_bar0_window_target_sys_mem_noncoherent_f() : bus_bar0_window_target_sys_mem_noncoherent_f(),
bus_bar0_window_target_vid_mem_f()) | bus_bar0_window_target_vid_mem_f()) |
bus_bar0_window_base_f(hi); bus_bar0_window_base_f(hi);
gk20a_dbg(gpu_dbg_mem, gk20a_dbg(gpu_dbg_mem,
"0x%08x:%08x begin for %p at [%llx,%llx] (sz %zu)", "0x%08x:%08x begin for %p at [%llx,%llx] (sz %zu)",
@@ -817,8 +817,6 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
int gk20a_init_mm_setup_hw(struct gk20a *g) int gk20a_init_mm_setup_hw(struct gk20a *g)
{ {
struct mm_gk20a *mm = &g->mm; struct mm_gk20a *mm = &g->mm;
struct mem_desc *inst_block = &mm->bar1.inst_block;
u64 inst_pa = gk20a_mm_inst_block_addr(g, inst_block);
int err; int err;
gk20a_dbg_fn(""); gk20a_dbg_fn("");
@@ -832,7 +830,7 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.sgt->sgl, 0) g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.sgt->sgl, 0)
>> 8); >> 8);
g->ops.mm.bar1_bind(g, inst_pa); g->ops.mm.bar1_bind(g, &mm->bar1.inst_block);
if (g->ops.mm.init_bar2_mm_hw_setup) { if (g->ops.mm.init_bar2_mm_hw_setup) {
err = g->ops.mm.init_bar2_mm_hw_setup(g); err = g->ops.mm.init_bar2_mm_hw_setup(g);
@@ -847,17 +845,19 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
return 0; return 0;
} }
static int gk20a_mm_bar1_bind(struct gk20a *g, u64 bar1_iova) static int gk20a_mm_bar1_bind(struct gk20a *g, struct mem_desc *bar1_inst)
{ {
u64 inst_pa = (u32)(bar1_iova >> bar1_instance_block_shift_gk20a()); u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst);
gk20a_dbg_info("bar1 inst block ptr: 0x%08x", (u32)inst_pa); u32 ptr_v = (u32)(iova >> bar1_instance_block_shift_gk20a());
gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v);
gk20a_writel(g, bus_bar1_block_r(), gk20a_writel(g, bus_bar1_block_r(),
(g->mm.vidmem_is_vidmem ? gk20a_aperture_mask(g, bar1_inst,
bus_bar1_block_target_sys_mem_ncoh_f() : bus_bar1_block_target_sys_mem_ncoh_f(),
bus_bar1_block_target_vid_mem_f()) | bus_bar1_block_target_vid_mem_f()) |
bus_bar1_block_mode_virtual_f() | bus_bar1_block_mode_virtual_f() |
bus_bar1_block_ptr_f(inst_pa)); bus_bar1_block_ptr_f(ptr_v));
return 0; return 0;
} }
@@ -2559,6 +2559,29 @@ void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem)
return gk20a_gmmu_free_attr(g, 0, mem); return gk20a_gmmu_free_attr(g, 0, mem);
} }
u32 __gk20a_aperture_mask(struct gk20a *g, enum gk20a_aperture aperture,
u32 sysmem_mask, u32 vidmem_mask)
{
switch (aperture) {
case APERTURE_SYSMEM:
/* sysmem for dgpus; some igpus consider system memory vidmem */
return g->mm.vidmem_is_vidmem ? sysmem_mask : vidmem_mask;
case APERTURE_VIDMEM:
/* for dgpus only */
return vidmem_mask;
case APERTURE_INVALID:
WARN_ON("Bad aperture");
}
return 0;
}
u32 gk20a_aperture_mask(struct gk20a *g, struct mem_desc *mem,
u32 sysmem_mask, u32 vidmem_mask)
{
return __gk20a_aperture_mask(g, mem->aperture,
sysmem_mask, vidmem_mask);
}
int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size, struct mem_desc *mem) int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size, struct mem_desc *mem)
{ {
return gk20a_gmmu_alloc_map_attr(vm, 0, size, mem); return gk20a_gmmu_alloc_map_attr(vm, 0, size, mem);
@@ -4049,19 +4072,23 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm)
false, false, "cde"); false, false, "cde");
} }
void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, u64 pdb_addr) void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *inst_block,
struct vm_gk20a *vm)
{ {
u64 pdb_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0);
u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
u32 pdb_addr_hi = u64_hi32(pdb_addr); u32 pdb_addr_hi = u64_hi32(pdb_addr);
gk20a_mem_wr32(g, mem, ram_in_page_dir_base_lo_w(), gk20a_dbg_info("pde pa=0x%llx", pdb_addr);
(g->mm.vidmem_is_vidmem ?
ram_in_page_dir_base_target_sys_mem_ncoh_f() : gk20a_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
gk20a_aperture_mask(g, &vm->pdb.mem,
ram_in_page_dir_base_target_sys_mem_ncoh_f(),
ram_in_page_dir_base_target_vid_mem_f()) | ram_in_page_dir_base_target_vid_mem_f()) |
ram_in_page_dir_base_vol_true_f() | ram_in_page_dir_base_vol_true_f() |
ram_in_page_dir_base_lo_f(pdb_addr_lo)); ram_in_page_dir_base_lo_f(pdb_addr_lo));
gk20a_mem_wr32(g, mem, ram_in_page_dir_base_hi_w(), gk20a_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(),
ram_in_page_dir_base_hi_f(pdb_addr_hi)); ram_in_page_dir_base_hi_f(pdb_addr_hi));
} }
@@ -4069,14 +4096,11 @@ void gk20a_init_inst_block(struct mem_desc *inst_block, struct vm_gk20a *vm,
u32 big_page_size) u32 big_page_size)
{ {
struct gk20a *g = gk20a_from_vm(vm); struct gk20a *g = gk20a_from_vm(vm);
u64 pde_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0);
gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p",
gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va); gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va);
gk20a_dbg_info("pde pa=0x%llx", (u64)pde_addr); g->ops.mm.init_pdb(g, inst_block, vm);
g->ops.mm.init_pdb(g, inst_block, pde_addr);
gk20a_mem_wr32(g, inst_block, ram_in_adr_limit_lo_w(), gk20a_mem_wr32(g, inst_block, ram_in_adr_limit_lo_w(),
u64_lo32(vm->va_limit - 1) & ~0xfff); u64_lo32(vm->va_limit - 1) & ~0xfff);
@@ -4311,7 +4335,7 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
{ {
struct gk20a *g = gk20a_from_vm(vm); struct gk20a *g = gk20a_from_vm(vm);
u32 addr_lo = u64_lo32(g->ops.mm.get_iova_addr(vm->mm->g, u32 addr_lo = u64_lo32(g->ops.mm.get_iova_addr(g,
vm->pdb.mem.sgt->sgl, 0) >> 12); vm->pdb.mem.sgt->sgl, 0) >> 12);
u32 data; u32 data;
s32 retry = 2000; s32 retry = 2000;
@@ -4348,8 +4372,8 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
gk20a_writel(g, fb_mmu_invalidate_pdb_r(), gk20a_writel(g, fb_mmu_invalidate_pdb_r(),
fb_mmu_invalidate_pdb_addr_f(addr_lo) | fb_mmu_invalidate_pdb_addr_f(addr_lo) |
(g->mm.vidmem_is_vidmem ? gk20a_aperture_mask(g, &vm->pdb.mem,
fb_mmu_invalidate_pdb_aperture_sys_mem_f() : fb_mmu_invalidate_pdb_aperture_sys_mem_f(),
fb_mmu_invalidate_pdb_aperture_vid_mem_f())); fb_mmu_invalidate_pdb_aperture_vid_mem_f()));
gk20a_writel(g, fb_mmu_invalidate_r(), gk20a_writel(g, fb_mmu_invalidate_r(),

View File

@@ -600,6 +600,11 @@ static inline phys_addr_t gk20a_mem_phys(struct mem_desc *mem)
return 0; return 0;
} }
u32 __gk20a_aperture_mask(struct gk20a *g, enum gk20a_aperture aperture,
u32 sysmem_mask, u32 vidmem_mask);
u32 gk20a_aperture_mask(struct gk20a *g, struct mem_desc *mem,
u32 sysmem_mask, u32 vidmem_mask);
u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
u64 map_offset, u64 map_offset,
struct sg_table *sgt, struct sg_table *sgt,
@@ -767,7 +772,8 @@ struct gpu_ops;
void gk20a_init_mm(struct gpu_ops *gops); void gk20a_init_mm(struct gpu_ops *gops);
const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
u32 big_page_size); u32 big_page_size);
void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, u64 pdb_addr); void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem,
struct vm_gk20a *vm);
void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block); void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block);

View File

@@ -35,8 +35,8 @@ static void channel_gm20b_bind(struct channel_gk20a *c)
gk20a_writel(g, ccsr_channel_inst_r(c->hw_chid), gk20a_writel(g, ccsr_channel_inst_r(c->hw_chid),
ccsr_channel_inst_ptr_f(inst_ptr) | ccsr_channel_inst_ptr_f(inst_ptr) |
(g->mm.vidmem_is_vidmem ? gk20a_aperture_mask(g, &c->inst_block,
ccsr_channel_inst_target_sys_mem_ncoh_f() : ccsr_channel_inst_target_sys_mem_ncoh_f(),
ccsr_channel_inst_target_vid_mem_f()) | ccsr_channel_inst_target_vid_mem_f()) |
ccsr_channel_inst_bind_true_f()); ccsr_channel_inst_bind_true_f());

View File

@@ -135,18 +135,20 @@ static bool gm20b_mm_support_sparse(struct gk20a *g)
return true; return true;
} }
static int gm20b_mm_bar1_bind(struct gk20a *g, u64 bar1_iova) static int gm20b_mm_bar1_bind(struct gk20a *g, struct mem_desc *bar1_inst)
{ {
int retry = 1000; int retry = 1000;
u64 inst_pa = (u32)(bar1_iova >> bar1_instance_block_shift_gk20a()); u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst);
gk20a_dbg_info("bar1 inst block ptr: 0x%08x", (u32)inst_pa); u32 ptr_v = (u32)(iova >> bar1_instance_block_shift_gk20a());
gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v);
gk20a_writel(g, bus_bar1_block_r(), gk20a_writel(g, bus_bar1_block_r(),
(g->mm.vidmem_is_vidmem ? gk20a_aperture_mask(g, bar1_inst,
bus_bar1_block_target_sys_mem_ncoh_f() : bus_bar1_block_target_sys_mem_ncoh_f(),
bus_bar1_block_target_vid_mem_f()) | bus_bar1_block_target_vid_mem_f()) |
bus_bar1_block_mode_virtual_f() | bus_bar1_block_mode_virtual_f() |
bus_bar1_block_ptr_f(inst_pa)); bus_bar1_block_ptr_f(ptr_v));
do { do {
u32 val = gk20a_readl(g, bus_bind_status_r()); u32 val = gk20a_readl(g, bus_bind_status_r());
u32 pending = bus_bind_status_bar1_pending_v(val); u32 pending = bus_bind_status_bar1_pending_v(val);