diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu index ce4f67b05..e689aa7f0 100644 --- a/drivers/gpu/nvgpu/Makefile.nvgpu +++ b/drivers/gpu/nvgpu/Makefile.nvgpu @@ -65,6 +65,7 @@ nvgpu-y := \ common/mm/vm_area.o \ common/mm/nvgpu_mem.o \ common/mm/comptags.o \ + common/mm/mm.o \ common/bus.o \ common/enabled.o \ common/pramin.o \ diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c index 56edc11b5..c8831a975 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c @@ -1372,7 +1372,7 @@ static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset) err = g->ops.dbg_session_ops.perfbuffer_disable(g); nvgpu_vm_unmap_buffer(vm, offset, NULL); - gk20a_free_inst_block(g, &mm->perfbuf.inst_block); + nvgpu_free_inst_block(g, &mm->perfbuf.inst_block); nvgpu_vm_put(vm); g->perfbuf.owner = NULL; diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c index c6f10a692..a2546e9d4 100644 --- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c @@ -25,8 +25,8 @@ #include #include #include - -#include "gk20a/mm_gk20a.h" +#include +#include #include "buddy_allocator_priv.h" diff --git a/drivers/gpu/nvgpu/common/mm/mm.c b/drivers/gpu/nvgpu/common/mm/mm.c new file mode 100644 index 000000000..1027ed283 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/mm.c @@ -0,0 +1,426 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gk20a/gk20a.h" + +/* + * Attempt to find a reserved memory area to determine PTE size for the passed + * mapping. If no reserved area can be found use small pages. + */ +enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm, + u64 base, u64 size) +{ + struct nvgpu_vm_area *vm_area; + + vm_area = nvgpu_vm_area_find(vm, base); + if (!vm_area) + return gmmu_page_size_small; + + return vm_area->pgsz_idx; +} + +/* + * This is for when the address space does not support unified address spaces. + */ +static enum gmmu_pgsz_gk20a __get_pte_size_split_addr(struct vm_gk20a *vm, + u64 base, u64 size) +{ + if (!base) { + if (size >= vm->gmmu_page_sizes[gmmu_page_size_big]) + return gmmu_page_size_big; + return gmmu_page_size_small; + } else { + if (base < __nv_gmmu_va_small_page_limit()) + return gmmu_page_size_small; + else + return gmmu_page_size_big; + } +} + +/* + * This determines the PTE size for a given alloc. Used by both the GVA space + * allocator and the mm core code so that agreement can be reached on how to + * map allocations. + * + * The page size of a buffer is this: + * + * o If the VM doesn't support large pages then obviously small pages + * must be used. + * o If the base address is non-zero (fixed address map): + * - Attempt to find a reserved memory area and use the page size + * based on that. + * - If no reserved page size is available, default to small pages. + * o If the base is zero: + * - If the size is larger than or equal to the big page size, use big + * pages. + * - Otherwise use small pages. + */ +enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size) +{ + struct gk20a *g = gk20a_from_vm(vm); + + if (!vm->big_pages) + return gmmu_page_size_small; + + if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES)) + return __get_pte_size_split_addr(vm, base, size); + + if (base) + return __get_pte_size_fixed_map(vm, base, size); + + if (size >= vm->gmmu_page_sizes[gmmu_page_size_big]) + return gmmu_page_size_big; + return gmmu_page_size_small; +} + +int nvgpu_mm_suspend(struct gk20a *g) +{ + nvgpu_info(g, "MM suspend running..."); + + nvgpu_vidmem_thread_pause_sync(&g->mm); + + g->ops.mm.cbc_clean(g); + g->ops.mm.l2_flush(g, false); + + nvgpu_info(g, "MM suspend done!"); + + return 0; +} + +u64 nvgpu_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block) +{ + if (g->mm.has_physical_mode) + return nvgpu_mem_get_phys_addr(g, inst_block); + else + return nvgpu_mem_get_addr(g, inst_block); +} + +void nvgpu_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block) +{ + if (nvgpu_mem_is_valid(inst_block)) + nvgpu_dma_free(g, inst_block); +} + +static int nvgpu_alloc_sysmem_flush(struct gk20a *g) +{ + return nvgpu_dma_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush); +} + +static void nvgpu_remove_mm_ce_support(struct mm_gk20a *mm) +{ + struct gk20a *g = gk20a_from_mm(mm); + + if (mm->vidmem.ce_ctx_id != (u32)~0) + gk20a_ce_delete_context_priv(g, mm->vidmem.ce_ctx_id); + + mm->vidmem.ce_ctx_id = (u32)~0; + + nvgpu_vm_put(mm->ce.vm); +} + +static void nvgpu_remove_mm_support(struct mm_gk20a *mm) +{ + struct gk20a *g = gk20a_from_mm(mm); + + if (g->ops.mm.fault_info_mem_destroy) + g->ops.mm.fault_info_mem_destroy(g); + + if (g->ops.mm.remove_bar2_vm) + g->ops.mm.remove_bar2_vm(g); + + if (g->ops.mm.is_bar1_supported(g)) { + nvgpu_free_inst_block(g, &mm->bar1.inst_block); + nvgpu_vm_put(mm->bar1.vm); + } + + nvgpu_free_inst_block(g, &mm->pmu.inst_block); + nvgpu_free_inst_block(g, &mm->hwpm.inst_block); + nvgpu_vm_put(mm->pmu.vm); + nvgpu_vm_put(mm->cde.vm); + + nvgpu_semaphore_sea_destroy(g); + nvgpu_vidmem_destroy(g); + nvgpu_pd_cache_fini(g); +} + +/* pmu vm, share channel_vm interfaces */ +static int nvgpu_init_system_vm(struct mm_gk20a *mm) +{ + int err; + struct gk20a *g = gk20a_from_mm(mm); + struct nvgpu_mem *inst_block = &mm->pmu.inst_block; + u32 big_page_size = g->ops.mm.get_default_big_page_size(); + u32 low_hole, aperture_size; + + /* + * No user region - so we will pass that as zero sized. + */ + low_hole = SZ_4K * 16; + aperture_size = GK20A_PMU_VA_SIZE * 2; + + mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; + nvgpu_info(g, "pmu vm size = 0x%x", mm->pmu.aperture_size); + + mm->pmu.vm = nvgpu_vm_init(g, big_page_size, + low_hole, + aperture_size - low_hole, + aperture_size, + true, + false, + "system"); + if (!mm->pmu.vm) + return -ENOMEM; + + err = g->ops.mm.alloc_inst_block(g, inst_block); + if (err) + goto clean_up_vm; + g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, big_page_size); + + return 0; + +clean_up_vm: + nvgpu_vm_put(mm->pmu.vm); + return err; +} + +static int nvgpu_init_hwpm(struct mm_gk20a *mm) +{ + int err; + struct gk20a *g = gk20a_from_mm(mm); + struct nvgpu_mem *inst_block = &mm->hwpm.inst_block; + + err = g->ops.mm.alloc_inst_block(g, inst_block); + if (err) + return err; + g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, 0); + + return 0; +} + +static int nvgpu_init_cde_vm(struct mm_gk20a *mm) +{ + struct gk20a *g = gk20a_from_mm(mm); + u32 big_page_size = g->ops.mm.get_default_big_page_size(); + + mm->cde.vm = nvgpu_vm_init(g, big_page_size, + big_page_size << 10, + NV_MM_DEFAULT_KERNEL_SIZE, + NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE, + false, false, "cde"); + if (!mm->cde.vm) + return -ENOMEM; + return 0; +} + +static int nvgpu_init_ce_vm(struct mm_gk20a *mm) +{ + struct gk20a *g = gk20a_from_mm(mm); + u32 big_page_size = g->ops.mm.get_default_big_page_size(); + + mm->ce.vm = nvgpu_vm_init(g, big_page_size, + big_page_size << 10, + NV_MM_DEFAULT_KERNEL_SIZE, + NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE, + false, false, "ce"); + if (!mm->ce.vm) + return -ENOMEM; + return 0; +} + +void nvgpu_init_mm_ce_context(struct gk20a *g) +{ +#if defined(CONFIG_GK20A_VIDMEM) + if (g->mm.vidmem.size && (g->mm.vidmem.ce_ctx_id == (u32)~0)) { + g->mm.vidmem.ce_ctx_id = + gk20a_ce_create_context_with_cb(g, + gk20a_fifo_get_fast_ce_runlist_id(g), + -1, + -1, + -1, + NULL); + + if (g->mm.vidmem.ce_ctx_id == (u32)~0) + nvgpu_err(g, + "Failed to allocate CE context for vidmem page clearing support"); + } +#endif +} + +static int nvgpu_init_mm_reset_enable_hw(struct gk20a *g) +{ + if (g->ops.fb.reset) + g->ops.fb.reset(g); + + if (g->ops.clock_gating.slcg_fb_load_gating_prod) + g->ops.clock_gating.slcg_fb_load_gating_prod(g, + g->slcg_enabled); + if (g->ops.clock_gating.slcg_ltc_load_gating_prod) + g->ops.clock_gating.slcg_ltc_load_gating_prod(g, + g->slcg_enabled); + if (g->ops.clock_gating.blcg_fb_load_gating_prod) + g->ops.clock_gating.blcg_fb_load_gating_prod(g, + g->blcg_enabled); + if (g->ops.clock_gating.blcg_ltc_load_gating_prod) + g->ops.clock_gating.blcg_ltc_load_gating_prod(g, + g->blcg_enabled); + + if (g->ops.fb.init_fs_state) + g->ops.fb.init_fs_state(g); + + return 0; +} + +static int nvgpu_init_bar1_vm(struct mm_gk20a *mm) +{ + int err; + struct gk20a *g = gk20a_from_mm(mm); + struct nvgpu_mem *inst_block = &mm->bar1.inst_block; + u32 big_page_size = g->ops.mm.get_default_big_page_size(); + + mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; + nvgpu_info(g, "bar1 vm size = 0x%x", mm->bar1.aperture_size); + mm->bar1.vm = nvgpu_vm_init(g, + big_page_size, + SZ_4K, + mm->bar1.aperture_size - SZ_4K, + mm->bar1.aperture_size, + true, false, + "bar1"); + if (!mm->bar1.vm) + return -ENOMEM; + + err = g->ops.mm.alloc_inst_block(g, inst_block); + if (err) + goto clean_up_vm; + g->ops.mm.init_inst_block(inst_block, mm->bar1.vm, big_page_size); + + return 0; + +clean_up_vm: + nvgpu_vm_put(mm->bar1.vm); + return err; +} + +static int nvgpu_init_mm_setup_sw(struct gk20a *g) +{ + struct mm_gk20a *mm = &g->mm; + int err; + + if (mm->sw_ready) { + nvgpu_info(g, "skip init"); + return 0; + } + + mm->g = g; + nvgpu_mutex_init(&mm->l2_op_lock); + + /*TBD: make channel vm size configurable */ + mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE - + NV_MM_DEFAULT_KERNEL_SIZE; + mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE; + + nvgpu_info(g, "channel vm size: user %dMB kernel %dMB", + (int)(mm->channel.user_size >> 20), + (int)(mm->channel.kernel_size >> 20)); + + nvgpu_init_pramin(mm); + + mm->vidmem.ce_ctx_id = (u32)~0; + + err = nvgpu_vidmem_init(mm); + if (err) + return err; + + /* + * this requires fixed allocations in vidmem which must be + * allocated before all other buffers + */ + if (g->ops.pmu.alloc_blob_space + && !nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) { + err = g->ops.pmu.alloc_blob_space(g, 0, &g->acr.ucode_blob); + if (err) + return err; + } + + err = nvgpu_alloc_sysmem_flush(g); + if (err) + return err; + + if (g->ops.mm.is_bar1_supported(g)) { + err = nvgpu_init_bar1_vm(mm); + if (err) + return err; + } + if (g->ops.mm.init_bar2_vm) { + err = g->ops.mm.init_bar2_vm(g); + if (err) + return err; + } + err = nvgpu_init_system_vm(mm); + if (err) + return err; + + err = nvgpu_init_hwpm(mm); + if (err) + return err; + + err = nvgpu_init_cde_vm(mm); + if (err) + return err; + + err = nvgpu_init_ce_vm(mm); + if (err) + return err; + + mm->remove_support = nvgpu_remove_mm_support; + mm->remove_ce_support = nvgpu_remove_mm_ce_support; + + mm->sw_ready = true; + + return 0; +} + +int nvgpu_init_mm_support(struct gk20a *g) +{ + u32 err; + + err = nvgpu_init_mm_reset_enable_hw(g); + if (err) + return err; + + err = nvgpu_init_mm_setup_sw(g); + if (err) + return err; + + if (g->ops.mm.init_mm_setup_hw) + err = g->ops.mm.init_mm_setup_hw(g); + + return err; +} diff --git a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c index 938c4b007..9b031bbf4 100644 --- a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "gk20a.h" #include "bus_gk20a.h" @@ -137,8 +138,8 @@ int gk20a_read_ptimer(struct gk20a *g, u64 *value) int gk20a_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst) { - u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst); - u32 ptr_v = (u32)(iova >> bar1_instance_block_shift_gk20a()); + u64 iova = nvgpu_inst_block_addr(g, bar1_inst); + u32 ptr_v = (u32)(iova >> bus_bar1_block_ptr_shift_v()); gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v); diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c index 725ae2782..e3896981e 100644 --- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "gk20a.h" #include "css_gr_gk20a.h" @@ -183,7 +184,7 @@ int css_hw_enable_snapshot(struct channel_gk20a *ch, gk20a_writel(g, perf_pmasys_outsize_r(), snapshot_size); /* this field is aligned to 4K */ - inst_pa_page = gk20a_mm_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12; + inst_pa_page = nvgpu_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12; /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK * should be written last */ diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 8c39ecb76..802ccd766 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "gk20a.h" #include "gk20a/platform_gk20a.h" @@ -305,7 +306,7 @@ int gk20a_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size) return err; } - err = gk20a_alloc_inst_block(g, &mm->perfbuf.inst_block); + err = g->ops.mm.alloc_inst_block(g, &mm->perfbuf.inst_block); if (err) return err; @@ -322,8 +323,7 @@ int gk20a_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size) gk20a_writel(g, perf_pmasys_outsize_r(), size); /* this field is aligned to 4K */ - inst_pa_page = gk20a_mm_inst_block_addr(g, - &mm->perfbuf.inst_block) >> 12; + inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12; /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK * should be written last */ diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index 7fd1793ca..12d7dcb9b 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "ctxsw_trace_gk20a.h" #include "fecs_trace_gk20a.h" @@ -93,7 +94,7 @@ static inline u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts) static u32 gk20a_fecs_trace_fecs_context_ptr(struct gk20a *g, struct channel_gk20a *ch) { - return (u32) (gk20a_mm_inst_block_addr(g, &ch->inst_block) >> 12LL); + return (u32) (nvgpu_inst_block_addr(g, &ch->inst_block) >> 12LL); } static inline int gk20a_fecs_trace_num_ts(void) @@ -633,12 +634,12 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g, gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "chid=%d context_ptr=%x inst_block=%llx", ch->chid, context_ptr, - gk20a_mm_inst_block_addr(g, &ch->inst_block)); + nvgpu_inst_block_addr(g, &ch->inst_block)); if (!trace) return -ENOMEM; - pa = gk20a_mm_inst_block_addr(g, &trace->trace_buf); + pa = nvgpu_inst_block_addr(g, &trace->trace_buf); if (!pa) return -ENOMEM; aperture = nvgpu_aperture_mask(g, &trace->trace_buf, diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 03ca69846..fc71c358e 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -24,6 +24,7 @@ #include +#include #include #include #include @@ -1058,7 +1059,7 @@ gk20a_refch_from_inst_ptr(struct gk20a *g, u64 inst_ptr) if (!ch) continue; - ch_inst_ptr = gk20a_mm_inst_block_addr(g, &ch->inst_block); + ch_inst_ptr = nvgpu_inst_block_addr(g, &ch->inst_block); if (inst_ptr == ch_inst_ptr) return ch; @@ -1659,10 +1660,10 @@ static bool gk20a_fifo_handle_mmu_fault( ch->chid); } } else if (mmfault_info.inst_ptr == - gk20a_mm_inst_block_addr(g, &g->mm.bar1.inst_block)) { + nvgpu_inst_block_addr(g, &g->mm.bar1.inst_block)) { nvgpu_err(g, "mmu fault from bar1"); } else if (mmfault_info.inst_ptr == - gk20a_mm_inst_block_addr(g, &g->mm.pmu.inst_block)) { + nvgpu_inst_block_addr(g, &g->mm.pmu.inst_block)) { nvgpu_err(g, "mmu fault from pmu"); } else nvgpu_err(g, "couldn't locate channel for mmu fault"); @@ -3973,12 +3974,12 @@ int gk20a_fifo_alloc_inst(struct gk20a *g, struct channel_gk20a *ch) gk20a_dbg_fn(""); - err = gk20a_alloc_inst_block(g, &ch->inst_block); + err = g->ops.mm.alloc_inst_block(g, &ch->inst_block); if (err) return err; gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx", - ch->chid, gk20a_mm_inst_block_addr(g, &ch->inst_block)); + ch->chid, nvgpu_inst_block_addr(g, &ch->inst_block)); gk20a_dbg_fn("done"); return 0; @@ -3986,7 +3987,7 @@ int gk20a_fifo_alloc_inst(struct gk20a *g, struct channel_gk20a *ch) void gk20a_fifo_free_inst(struct gk20a *g, struct channel_gk20a *ch) { - gk20a_free_inst_block(g, &ch->inst_block); + nvgpu_free_inst_block(g, &ch->inst_block); } u32 gk20a_fifo_userd_gp_get(struct gk20a *g, struct channel_gk20a *c) diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index 2bc7d9a8b..ea5d55a4a 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h @@ -36,6 +36,7 @@ #include struct gk20a_debug_output; +struct mmu_fault_info; #define MAX_RUNLIST_BUFFERS 2 diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 2d09c0bbf..e3c2397c8 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -34,6 +34,7 @@ #include #include #include +#include #include @@ -107,7 +108,7 @@ int gk20a_prepare_poweroff(struct gk20a *g) ret |= nvgpu_pmu_destroy(g); ret |= gk20a_gr_suspend(g); - ret |= gk20a_mm_suspend(g); + ret |= nvgpu_mm_suspend(g); ret |= gk20a_fifo_suspend(g); gk20a_ce_suspend(g); @@ -213,7 +214,7 @@ int gk20a_finalize_poweron(struct gk20a *g) goto done; } - err = gk20a_init_mm_support(g); + err = nvgpu_init_mm_support(g); if (err) { nvgpu_err(g, "failed to init gk20a mm"); goto done; @@ -314,7 +315,7 @@ int gk20a_finalize_poweron(struct gk20a *g) gk20a_init_ce_support(g); - gk20a_init_mm_ce_context(g); + nvgpu_init_mm_ce_context(g); if (g->ops.xve.available_speeds) { u32 speed; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 92bcb6182..9c09e85ff 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -47,6 +47,7 @@ struct nvgpu_warpstate; #include #include +#include #include #include #include @@ -756,6 +757,8 @@ struct gpu_ops { u64 (*gpu_phys_addr)(struct gk20a *g, struct nvgpu_gmmu_attrs *attrs, u64 phys); size_t (*get_vidmem_size)(struct gk20a *g); + int (*alloc_inst_block)(struct gk20a *g, + struct nvgpu_mem *inst_block); void (*init_inst_block)(struct nvgpu_mem *inst_block, struct vm_gk20a *vm, u32 big_page_size); bool (*mmu_fault_pending)(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index d67324537..6d3702508 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "gk20a.h" #include "kind_gk20a.h" @@ -731,7 +732,7 @@ void gr_gk20a_ctx_patch_write(struct gk20a *g, static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block) { - u32 ptr = u64_lo32(gk20a_mm_inst_block_addr(g, inst_block) + u32 ptr = u64_lo32(nvgpu_inst_block_addr(g, inst_block) >> ram_in_base_shift_v()); u32 aperture = nvgpu_aperture_mask(g, inst_block, gr_fecs_current_ctx_target_sys_mem_ncoh_f(), @@ -744,7 +745,7 @@ static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block) static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, struct channel_gk20a *c) { - u32 inst_base_ptr = u64_lo32(gk20a_mm_inst_block_addr(g, &c->inst_block) + u32 inst_base_ptr = u64_lo32(nvgpu_inst_block_addr(g, &c->inst_block) >> ram_in_base_shift_v()); u32 data = fecs_current_ctx_data(g, &c->inst_block); u32 ret; @@ -1980,7 +1981,7 @@ static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g) struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; int err; - err = gk20a_alloc_inst_block(g, &ucode_info->inst_blk_desc); + err = g->ops.mm.alloc_inst_block(g, &ucode_info->inst_blk_desc); if (err) return err; @@ -2154,7 +2155,7 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g) gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0); - inst_ptr = gk20a_mm_inst_block_addr(g, &ucode_info->inst_blk_desc); + inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc); gk20a_writel(g, gr_fecs_new_ctx_r(), gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) | nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, @@ -5455,7 +5456,7 @@ static struct channel_gk20a *gk20a_gr_get_channel_from_ctx( if (!gk20a_channel_get(ch)) continue; - if ((u32)(gk20a_mm_inst_block_addr(g, &ch->inst_block) >> + if ((u32)(nvgpu_inst_block_addr(g, &ch->inst_block) >> ram_in_base_shift_v()) == gr_fecs_current_ctx_ptr_v(curr_ctx)) { tsgid = ch->tsgid; diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index d96fa4e1f..a17d6bb68 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -1,6 +1,4 @@ /* - * GK20A memory management - * * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -24,6 +22,7 @@ #include +#include #include #include #include @@ -88,161 +87,6 @@ * */ -static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm); -static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); -static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm); -static int __must_check gk20a_init_cde_vm(struct mm_gk20a *mm); -static int __must_check gk20a_init_ce_vm(struct mm_gk20a *mm); - -static int gk20a_init_mm_reset_enable_hw(struct gk20a *g) -{ - gk20a_dbg_fn(""); - if (g->ops.fb.reset) - g->ops.fb.reset(g); - - if (g->ops.clock_gating.slcg_fb_load_gating_prod) - g->ops.clock_gating.slcg_fb_load_gating_prod(g, - g->slcg_enabled); - if (g->ops.clock_gating.slcg_ltc_load_gating_prod) - g->ops.clock_gating.slcg_ltc_load_gating_prod(g, - g->slcg_enabled); - if (g->ops.clock_gating.blcg_fb_load_gating_prod) - g->ops.clock_gating.blcg_fb_load_gating_prod(g, - g->blcg_enabled); - if (g->ops.clock_gating.blcg_ltc_load_gating_prod) - g->ops.clock_gating.blcg_ltc_load_gating_prod(g, - g->blcg_enabled); - - if (g->ops.fb.init_fs_state) - g->ops.fb.init_fs_state(g); - - return 0; -} - -static void gk20a_remove_mm_ce_support(struct mm_gk20a *mm) -{ - struct gk20a *g = gk20a_from_mm(mm); - - if (mm->vidmem.ce_ctx_id != (u32)~0) - gk20a_ce_delete_context_priv(g, mm->vidmem.ce_ctx_id); - - mm->vidmem.ce_ctx_id = (u32)~0; - - nvgpu_vm_put(mm->ce.vm); -} - -static void gk20a_remove_mm_support(struct mm_gk20a *mm) -{ - struct gk20a *g = gk20a_from_mm(mm); - - if (g->ops.mm.fault_info_mem_destroy) - g->ops.mm.fault_info_mem_destroy(g); - - if (g->ops.mm.remove_bar2_vm) - g->ops.mm.remove_bar2_vm(g); - - if (g->ops.mm.is_bar1_supported(g)) { - gk20a_free_inst_block(g, &mm->bar1.inst_block); - nvgpu_vm_put(mm->bar1.vm); - } - - gk20a_free_inst_block(g, &mm->pmu.inst_block); - gk20a_free_inst_block(g, &mm->hwpm.inst_block); - nvgpu_vm_put(mm->pmu.vm); - nvgpu_vm_put(mm->cde.vm); - - nvgpu_semaphore_sea_destroy(g); - nvgpu_vidmem_destroy(g); - nvgpu_pd_cache_fini(g); -} - -static int gk20a_alloc_sysmem_flush(struct gk20a *g) -{ - return nvgpu_dma_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush); -} - -int gk20a_init_mm_setup_sw(struct gk20a *g) -{ - struct mm_gk20a *mm = &g->mm; - int err; - - gk20a_dbg_fn(""); - - if (mm->sw_ready) { - gk20a_dbg_fn("skip init"); - return 0; - } - - mm->g = g; - nvgpu_mutex_init(&mm->l2_op_lock); - - /*TBD: make channel vm size configurable */ - mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE - - NV_MM_DEFAULT_KERNEL_SIZE; - mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE; - - gk20a_dbg_info("channel vm size: user %dMB kernel %dMB", - (int)(mm->channel.user_size >> 20), - (int)(mm->channel.kernel_size >> 20)); - - nvgpu_init_pramin(mm); - - mm->vidmem.ce_ctx_id = (u32)~0; - - err = nvgpu_vidmem_init(mm); - if (err) - return err; - - /* - * this requires fixed allocations in vidmem which must be - * allocated before all other buffers - */ - if (g->ops.pmu.alloc_blob_space - && !nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) { - err = g->ops.pmu.alloc_blob_space(g, 0, &g->acr.ucode_blob); - if (err) - return err; - } - - err = gk20a_alloc_sysmem_flush(g); - if (err) - return err; - - if (g->ops.mm.is_bar1_supported(g)) { - err = gk20a_init_bar1_vm(mm); - if (err) - return err; - } - if (g->ops.mm.init_bar2_vm) { - err = g->ops.mm.init_bar2_vm(g); - if (err) - return err; - } - err = gk20a_init_system_vm(mm); - if (err) - return err; - - err = gk20a_init_hwpm(mm); - if (err) - return err; - - err = gk20a_init_cde_vm(mm); - if (err) - return err; - - err = gk20a_init_ce_vm(mm); - if (err) - return err; - - mm->remove_support = gk20a_remove_mm_support; - mm->remove_ce_support = gk20a_remove_mm_ce_support; - - mm->sw_ready = true; - - gk20a_dbg_fn("done"); - return 0; -} - /* make sure gk20a_init_mm_support is called before */ int gk20a_init_mm_setup_hw(struct gk20a *g) { @@ -274,43 +118,6 @@ int gk20a_init_mm_setup_hw(struct gk20a *g) return 0; } -int gk20a_init_mm_support(struct gk20a *g) -{ - u32 err; - - err = gk20a_init_mm_reset_enable_hw(g); - if (err) - return err; - - err = gk20a_init_mm_setup_sw(g); - if (err) - return err; - - if (g->ops.mm.init_mm_setup_hw) - err = g->ops.mm.init_mm_setup_hw(g); - - return err; -} - -void gk20a_init_mm_ce_context(struct gk20a *g) -{ -#if defined(CONFIG_GK20A_VIDMEM) - if (g->mm.vidmem.size && (g->mm.vidmem.ce_ctx_id == (u32)~0)) { - g->mm.vidmem.ce_ctx_id = - gk20a_ce_create_context_with_cb(g, - gk20a_fifo_get_fast_ce_runlist_id(g), - -1, - -1, - -1, - NULL); - - if (g->mm.vidmem.ce_ctx_id == (u32)~0) - nvgpu_err(g, - "Failed to allocate CE context for vidmem page clearing support"); - } -#endif -} - int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm) { return vm->mmu_levels[0].lo_bit[0]; @@ -505,76 +312,6 @@ const struct gk20a_mmu_level gk20a_mm_levels_128k[] = { {.update_entry = NULL} }; -/* - * Attempt to find a reserved memory area to determine PTE size for the passed - * mapping. If no reserved area can be found use small pages. - */ -enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm, - u64 base, u64 size) -{ - struct nvgpu_vm_area *vm_area; - - vm_area = nvgpu_vm_area_find(vm, base); - if (!vm_area) - return gmmu_page_size_small; - - return vm_area->pgsz_idx; -} - -/* - * This is for when the address space does not support unified address spaces. - */ -static enum gmmu_pgsz_gk20a __get_pte_size_split_addr(struct vm_gk20a *vm, - u64 base, u64 size) -{ - if (!base) { - if (size >= vm->gmmu_page_sizes[gmmu_page_size_big]) - return gmmu_page_size_big; - return gmmu_page_size_small; - } else { - if (base < __nv_gmmu_va_small_page_limit()) - return gmmu_page_size_small; - else - return gmmu_page_size_big; - } -} - -/* - * This determines the PTE size for a given alloc. Used by both the GVA space - * allocator and the mm core code so that agreement can be reached on how to - * map allocations. - * - * The page size of a buffer is this: - * - * o If the VM doesn't support large pages then obviously small pages - * must be used. - * o If the base address is non-zero (fixed address map): - * - Attempt to find a reserved memory area and use the page size - * based on that. - * - If no reserved page size is available, default to small pages. - * o If the base is zero: - * - If the size is larger than or equal to the big page size, use big - * pages. - * - Otherwise use small pages. - */ -enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size) -{ - struct gk20a *g = gk20a_from_vm(vm); - - if (!vm->big_pages) - return gmmu_page_size_small; - - if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES)) - return __get_pte_size_split_addr(vm, base, size); - - if (base) - return __get_pte_size_fixed_map(vm, base, size); - - if (size >= vm->gmmu_page_sizes[gmmu_page_size_big]) - return gmmu_page_size_big; - return gmmu_page_size_small; -} - int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch) { int err = 0; @@ -599,151 +336,6 @@ int gk20a_vm_bind_channel(struct gk20a_as_share *as_share, return __gk20a_vm_bind_channel(as_share->vm, ch); } -int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block) -{ - int err; - - gk20a_dbg_fn(""); - - err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block); - if (err) { - nvgpu_err(g, "%s: memory allocation failed", __func__); - return err; - } - - gk20a_dbg_fn("done"); - return 0; -} - -void gk20a_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block) -{ - if (inst_block->size) - nvgpu_dma_free(g, inst_block); -} - -u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block) -{ - if (g->mm.has_physical_mode) - return nvgpu_mem_get_phys_addr(g, inst_block); - else - return nvgpu_mem_get_addr(g, inst_block); -} - -static int gk20a_init_bar1_vm(struct mm_gk20a *mm) -{ - int err; - struct gk20a *g = gk20a_from_mm(mm); - struct nvgpu_mem *inst_block = &mm->bar1.inst_block; - u32 big_page_size = g->ops.mm.get_default_big_page_size(); - - mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; - gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); - mm->bar1.vm = nvgpu_vm_init(g, - big_page_size, - SZ_4K, - mm->bar1.aperture_size - SZ_4K, - mm->bar1.aperture_size, - true, false, - "bar1"); - if (!mm->bar1.vm) - return -ENOMEM; - - err = gk20a_alloc_inst_block(g, inst_block); - if (err) - goto clean_up_vm; - g->ops.mm.init_inst_block(inst_block, mm->bar1.vm, big_page_size); - - return 0; - -clean_up_vm: - nvgpu_vm_put(mm->bar1.vm); - return err; -} - -/* pmu vm, share channel_vm interfaces */ -static int gk20a_init_system_vm(struct mm_gk20a *mm) -{ - int err; - struct gk20a *g = gk20a_from_mm(mm); - struct nvgpu_mem *inst_block = &mm->pmu.inst_block; - u32 big_page_size = g->ops.mm.get_default_big_page_size(); - u32 low_hole, aperture_size; - - /* - * No user region - so we will pass that as zero sized. - */ - low_hole = SZ_4K * 16; - aperture_size = GK20A_PMU_VA_SIZE * 2; - - mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; - gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); - - mm->pmu.vm = nvgpu_vm_init(g, big_page_size, - low_hole, - aperture_size - low_hole, - aperture_size, - true, - false, - "system"); - if (!mm->pmu.vm) - return -ENOMEM; - - err = gk20a_alloc_inst_block(g, inst_block); - if (err) - goto clean_up_vm; - g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, big_page_size); - - return 0; - -clean_up_vm: - nvgpu_vm_put(mm->pmu.vm); - return err; -} - -static int gk20a_init_hwpm(struct mm_gk20a *mm) -{ - int err; - struct gk20a *g = gk20a_from_mm(mm); - struct nvgpu_mem *inst_block = &mm->hwpm.inst_block; - - err = gk20a_alloc_inst_block(g, inst_block); - if (err) - return err; - g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, 0); - - return 0; -} - -static int gk20a_init_cde_vm(struct mm_gk20a *mm) -{ - struct gk20a *g = gk20a_from_mm(mm); - u32 big_page_size = g->ops.mm.get_default_big_page_size(); - - mm->cde.vm = nvgpu_vm_init(g, big_page_size, - big_page_size << 10, - NV_MM_DEFAULT_KERNEL_SIZE, - NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE, - false, false, "cde"); - if (!mm->cde.vm) - return -ENOMEM; - return 0; -} - -static int gk20a_init_ce_vm(struct mm_gk20a *mm) -{ - struct gk20a *g = gk20a_from_mm(mm); - u32 big_page_size = g->ops.mm.get_default_big_page_size(); - - mm->ce.vm = nvgpu_vm_init(g, big_page_size, - big_page_size << 10, - NV_MM_DEFAULT_KERNEL_SIZE, - NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE, - false, false, "ce"); - if (!mm->ce.vm) - return -ENOMEM; - return 0; -} - void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block, struct vm_gk20a *vm) { @@ -770,7 +362,7 @@ void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm, struct gk20a *g = gk20a_from_vm(vm); gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", - gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va); + nvgpu_inst_block_addr(g, inst_block), inst_block->cpu_va); g->ops.mm.init_pdb(g, inst_block, vm); @@ -784,6 +376,22 @@ void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm, g->ops.mm.set_big_page_size(g, inst_block, big_page_size); } +int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block) +{ + int err; + + gk20a_dbg_fn(""); + + err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block); + if (err) { + nvgpu_err(g, "%s: memory allocation failed", __func__); + return err; + } + + gk20a_dbg_fn("done"); + return 0; +} + int gk20a_mm_fb_flush(struct gk20a *g) { struct mm_gk20a *mm = &g->mm; @@ -992,19 +600,6 @@ hw_was_off: gk20a_idle_nosuspend(g); } -int gk20a_mm_suspend(struct gk20a *g) -{ - gk20a_dbg_fn(""); - - nvgpu_vidmem_thread_pause_sync(&g->mm); - - g->ops.mm.cbc_clean(g); - g->ops.mm.l2_flush(g, false); - - gk20a_dbg_fn("done"); - return 0; -} - u32 gk20a_mm_get_iommu_bit(struct gk20a *g) { return 34; diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 15876b106..434fc4228 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -35,11 +35,6 @@ #include #include #include -#include -#include -#include - -struct nvgpu_pd_cache; #ifdef CONFIG_ARM64 #define outer_flush_range(a, b) @@ -138,219 +133,24 @@ struct priv_cmd_entry { struct gk20a; struct channel_gk20a; -int gk20a_init_mm_support(struct gk20a *g); -int gk20a_init_mm_setup_sw(struct gk20a *g); -int gk20a_init_mm_setup_hw(struct gk20a *g); -void gk20a_init_mm_ce_context(struct gk20a *g); - int gk20a_mm_fb_flush(struct gk20a *g); void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate); void gk20a_mm_cbc_clean(struct gk20a *g); void gk20a_mm_l2_invalidate(struct gk20a *g); -#define FAULT_TYPE_NUM 2 /* replay and nonreplay faults */ - -struct mmu_fault_info { - u64 inst_ptr; - u32 inst_aperture; - u64 fault_addr; - u32 fault_addr_aperture; - u32 timestamp_lo; - u32 timestamp_hi; - u32 mmu_engine_id; - u32 gpc_id; - u32 client_type; - u32 client_id; - u32 fault_type; - u32 access_type; - u32 protected_mode; - u32 replayable_fault; - u32 replay_fault_en; - u32 valid; - u32 faulted_pbdma; - u32 faulted_engine; - u32 faulted_subid; - u32 chid; - struct channel_gk20a *refch; - const char *client_type_desc; - const char *fault_type_desc; - const char *client_id_desc; -}; - -struct mm_gk20a { - struct gk20a *g; - - /* GPU VA default sizes address spaces for channels */ - struct { - u64 user_size; /* userspace-visible GPU VA region */ - u64 kernel_size; /* kernel-only GPU VA region */ - } channel; - - struct { - u32 aperture_size; - struct vm_gk20a *vm; - struct nvgpu_mem inst_block; - } bar1; - - struct { - u32 aperture_size; - struct vm_gk20a *vm; - struct nvgpu_mem inst_block; - } bar2; - - struct { - u32 aperture_size; - struct vm_gk20a *vm; - struct nvgpu_mem inst_block; - } pmu; - - struct { - /* using pmu vm currently */ - struct nvgpu_mem inst_block; - } hwpm; - - struct { - struct vm_gk20a *vm; - struct nvgpu_mem inst_block; - } perfbuf; - - struct { - struct vm_gk20a *vm; - } cde; - - struct { - struct vm_gk20a *vm; - } ce; - - struct nvgpu_pd_cache *pd_cache; - - struct nvgpu_mutex l2_op_lock; - struct nvgpu_mutex tlb_lock; - struct nvgpu_mutex priv_lock; - - struct nvgpu_mem bar2_desc; - -#ifdef CONFIG_TEGRA_19x_GPU - struct nvgpu_mem hw_fault_buf[FAULT_TYPE_NUM]; - unsigned int hw_fault_buf_status[FAULT_TYPE_NUM]; - struct mmu_fault_info *fault_info[FAULT_TYPE_NUM]; - struct nvgpu_mutex hub_isr_mutex; - u32 hub_intr_types; -#endif - /* - * Separate function to cleanup the CE since it requires a channel to - * be closed which must happen before fifo cleanup. - */ - void (*remove_ce_support)(struct mm_gk20a *mm); - void (*remove_support)(struct mm_gk20a *mm); - bool sw_ready; - int physical_bits; - bool use_full_comp_tag_line; - bool ltc_enabled_current; - bool ltc_enabled_target; - bool bypass_smmu; - bool disable_bigpage; - bool has_physical_mode; - - struct nvgpu_mem sysmem_flush; - - u32 pramin_window; - struct nvgpu_spinlock pramin_window_lock; - bool force_pramin; /* via debugfs */ - - struct { - size_t size; - u64 base; - size_t bootstrap_size; - u64 bootstrap_base; - - struct nvgpu_allocator allocator; - struct nvgpu_allocator bootstrap_allocator; - - u32 ce_ctx_id; - volatile bool cleared; - struct nvgpu_mutex first_clear_mutex; - - struct nvgpu_list_node clear_list_head; - struct nvgpu_mutex clear_list_mutex; - - struct nvgpu_cond clearing_thread_cond; - struct nvgpu_thread clearing_thread; - struct nvgpu_mutex clearing_thread_lock; - nvgpu_atomic_t pause_count; - - nvgpu_atomic64_t bytes_pending; - } vidmem; -}; - -int gk20a_mm_init(struct mm_gk20a *mm); - -#define gk20a_from_mm(mm) ((mm)->g) -#define gk20a_from_vm(vm) ((vm)->mm->g) - #define dev_from_vm(vm) dev_from_gk20a(vm->mm->g) -#define DEFAULT_ALLOC_ALIGNMENT (4*1024) - -static inline int bar1_aperture_size_mb_gk20a(void) -{ - return 16; /* 16MB is more than enough atm. */ -} - -/* The maximum GPU VA range supported */ -#define NV_GMMU_VA_RANGE 38 - -/* The default userspace-visible GPU VA size */ -#define NV_MM_DEFAULT_USER_SIZE (1ULL << 37) - -/* The default kernel-reserved GPU VA size */ -#define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32) - -/* - * When not using unified address spaces, the bottom 56GB of the space are used - * for small pages, and the remaining high memory is used for large pages. - */ -static inline u64 __nv_gmmu_va_small_page_limit(void) -{ - return ((u64)SZ_1G * 56); -} - -enum nvgpu_flush_op { - NVGPU_FLUSH_DEFAULT, - NVGPU_FLUSH_FB, - NVGPU_FLUSH_L2_INV, - NVGPU_FLUSH_L2_FLUSH, - NVGPU_FLUSH_CBC_CLEAN, -}; - -enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm, - u64 base, u64 size); -enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size); - -#if 0 /*related to addr bits above, concern below TBD on which is accurate */ -#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\ - bus_bar1_block_ptr_s()) -#else -#define bar1_instance_block_shift_gk20a() bus_bar1_block_ptr_shift_v() -#endif - -int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block); -void gk20a_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block); -void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm, - u32 big_page_size); -u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct nvgpu_mem *mem); - -void gk20a_mm_dump_vm(struct vm_gk20a *vm, - u64 va_begin, u64 va_end, char *label); - -int gk20a_mm_suspend(struct gk20a *g); - void gk20a_mm_ltc_isr(struct gk20a *g); bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g); int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g); +int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block); +void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm, + u32 big_page_size); +int gk20a_init_mm_setup_hw(struct gk20a *g); + u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, u64 map_offset, struct nvgpu_sgt *sgt, diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index e4dd6a598..2b954e1ad 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "gk20a.h" #include "gr_gk20a.h" @@ -181,7 +182,7 @@ int pmu_bootstrap(struct nvgpu_pmu *pmu) pwr_falcon_itfen_ctxen_enable_f()); gk20a_writel(g, pwr_pmu_new_instblk_r(), pwr_pmu_new_instblk_ptr_f( - gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | + nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | pwr_pmu_new_instblk_valid_f(1) | pwr_pmu_new_instblk_target_sys_coh_f()); diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c index 7029b4772..557948e17 100644 --- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c @@ -21,7 +21,6 @@ */ #include - #include #include #include @@ -33,6 +32,7 @@ #include #include #include +#include #include "gk20a/gk20a.h" #include "gk20a/pmu_gk20a.h" @@ -1170,7 +1170,7 @@ static int bl_bootstrap(struct nvgpu_pmu *pmu, pwr_falcon_itfen_ctxen_enable_f()); gk20a_writel(g, pwr_pmu_new_instblk_r(), pwr_pmu_new_instblk_ptr_f( - gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | + nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | pwr_pmu_new_instblk_valid_f(1) | pwr_pmu_new_instblk_target_sys_coh_f()); diff --git a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c index b8d42f7af..34c8d4b73 100644 --- a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c @@ -24,6 +24,7 @@ #include #include +#include #include "bus_gm20b.h" #include "gk20a/gk20a.h" @@ -35,8 +36,8 @@ int gm20b_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst) { struct nvgpu_timeout timeout; int err = 0; - u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst); - u32 ptr_v = (u32)(iova >> bar1_instance_block_shift_gk20a()); + u64 iova = nvgpu_inst_block_addr(g, bar1_inst); + u32 ptr_v = (u32)(iova >> bus_bar1_block_ptr_shift_v()); gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v); diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c index f4ddd92fa..0762e8bdd 100644 --- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -42,7 +43,7 @@ void channel_gm20b_bind(struct channel_gk20a *c) { struct gk20a *g = c->g; - u32 inst_ptr = gk20a_mm_inst_block_addr(g, &c->inst_block) + u32 inst_ptr = nvgpu_inst_block_addr(g, &c->inst_block) >> ram_in_base_shift_v(); gk20a_dbg_info("bind channel %d inst ptr 0x%08x", diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 269fd7f14..d081fb245 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -442,6 +442,7 @@ static const struct gpu_ops gm20b_ops = { .init_pdb = gk20a_mm_init_pdb, .init_mm_setup_hw = gk20a_init_mm_setup_hw, .is_bar1_supported = gm20b_mm_is_bar1_supported, + .alloc_inst_block = gk20a_alloc_inst_block, .init_inst_block = gk20a_init_inst_block, .mmu_fault_pending = gk20a_fifo_mmu_fault_pending, .get_kind_invalid = gm20b_get_kind_invalid, diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index 1246ee7f1..59f72e130 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c @@ -524,6 +524,7 @@ static const struct gpu_ops gp106_ops = { .init_pdb = gp10b_mm_init_pdb, .init_mm_setup_hw = gp10b_init_mm_setup_hw, .is_bar1_supported = gm20b_mm_is_bar1_supported, + .alloc_inst_block = gk20a_alloc_inst_block, .init_inst_block = gk20a_init_inst_block, .mmu_fault_pending = gk20a_fifo_mmu_fault_pending, .init_bar2_vm = gb10b_init_bar2_vm, diff --git a/drivers/gpu/nvgpu/gp106/sec2_gp106.c b/drivers/gpu/nvgpu/gp106/sec2_gp106.c index 9f0fe375d..26ded39e6 100644 --- a/drivers/gpu/nvgpu/gp106/sec2_gp106.c +++ b/drivers/gpu/nvgpu/gp106/sec2_gp106.c @@ -22,6 +22,7 @@ #include #include +#include #include "gk20a/gk20a.h" #include "sec2_gp106.h" @@ -88,7 +89,7 @@ int bl_bootstrap_sec2(struct nvgpu_pmu *pmu, gk20a_writel(g, psec_falcon_nxtctx_r(), pwr_pmu_new_instblk_ptr_f( - gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | + nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | pwr_pmu_new_instblk_valid_f(1) | nvgpu_aperture_mask(g, &mm->pmu.inst_block, pwr_pmu_new_instblk_target_sys_coh_f(), @@ -154,7 +155,7 @@ void init_pmu_setup_hw1(struct gk20a *g) pwr_falcon_itfen_ctxen_enable_f()); gk20a_writel(g, pwr_pmu_new_instblk_r(), pwr_pmu_new_instblk_ptr_f( - gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | + nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | pwr_pmu_new_instblk_valid_f(1) | nvgpu_aperture_mask(g, &mm->pmu.inst_block, pwr_pmu_new_instblk_target_sys_coh_f(), diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index b80722b87..a10df7406 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -484,6 +484,7 @@ static const struct gpu_ops gp10b_ops = { .init_pdb = gp10b_mm_init_pdb, .init_mm_setup_hw = gp10b_init_mm_setup_hw, .is_bar1_supported = gm20b_mm_is_bar1_supported, + .alloc_inst_block = gk20a_alloc_inst_block, .init_inst_block = gk20a_init_inst_block, .mmu_fault_pending = gk20a_fifo_mmu_fault_pending, .init_bar2_vm = gb10b_init_bar2_vm, diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index 06a9b9291..dc7461539 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c @@ -22,6 +22,7 @@ * DEALINGS IN THE SOFTWARE. */ +#include #include #include @@ -95,7 +96,7 @@ int gb10b_init_bar2_vm(struct gk20a *g) return -ENOMEM; /* allocate instance mem for bar2 */ - err = gk20a_alloc_inst_block(g, inst_block); + err = g->ops.mm.alloc_inst_block(g, inst_block); if (err) goto clean_up_va; @@ -112,7 +113,7 @@ int gb10b_init_bar2_mm_hw_setup(struct gk20a *g) { struct mm_gk20a *mm = &g->mm; struct nvgpu_mem *inst_block = &mm->bar2.inst_block; - u64 inst_pa = gk20a_mm_inst_block_addr(g, inst_block); + u64 inst_pa = nvgpu_inst_block_addr(g, inst_block); gk20a_dbg_fn(""); @@ -374,6 +375,6 @@ void gp10b_remove_bar2_vm(struct gk20a *g) struct mm_gk20a *mm = &g->mm; gp10b_replayable_pagefault_buffer_deinit(g); - gk20a_free_inst_block(g, &mm->bar2.inst_block); + nvgpu_free_inst_block(g, &mm->bar2.inst_block); nvgpu_vm_put(mm->bar2.vm); } diff --git a/drivers/gpu/nvgpu/include/nvgpu/mm.h b/drivers/gpu/nvgpu/include/nvgpu/mm.h new file mode 100644 index 000000000..13b33d9f5 --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/mm.h @@ -0,0 +1,220 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __NVGPU_MM_H__ +#define __NVGPU_MM_H__ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct gk20a; +struct vm_gk20a; +struct nvgpu_mem; +struct nvgpu_pd_cache; + +#define FAULT_TYPE_NUM 2 /* replay and nonreplay faults */ + +struct mmu_fault_info { + u64 inst_ptr; + u32 inst_aperture; + u64 fault_addr; + u32 fault_addr_aperture; + u32 timestamp_lo; + u32 timestamp_hi; + u32 mmu_engine_id; + u32 gpc_id; + u32 client_type; + u32 client_id; + u32 fault_type; + u32 access_type; + u32 protected_mode; + u32 replayable_fault; + u32 replay_fault_en; + u32 valid; + u32 faulted_pbdma; + u32 faulted_engine; + u32 faulted_subid; + u32 chid; + struct channel_gk20a *refch; + const char *client_type_desc; + const char *fault_type_desc; + const char *client_id_desc; +}; + +enum nvgpu_flush_op { + NVGPU_FLUSH_DEFAULT, + NVGPU_FLUSH_FB, + NVGPU_FLUSH_L2_INV, + NVGPU_FLUSH_L2_FLUSH, + NVGPU_FLUSH_CBC_CLEAN, +}; + +struct mm_gk20a { + struct gk20a *g; + + /* GPU VA default sizes address spaces for channels */ + struct { + u64 user_size; /* userspace-visible GPU VA region */ + u64 kernel_size; /* kernel-only GPU VA region */ + } channel; + + struct { + u32 aperture_size; + struct vm_gk20a *vm; + struct nvgpu_mem inst_block; + } bar1; + + struct { + u32 aperture_size; + struct vm_gk20a *vm; + struct nvgpu_mem inst_block; + } bar2; + + struct { + u32 aperture_size; + struct vm_gk20a *vm; + struct nvgpu_mem inst_block; + } pmu; + + struct { + /* using pmu vm currently */ + struct nvgpu_mem inst_block; + } hwpm; + + struct { + struct vm_gk20a *vm; + struct nvgpu_mem inst_block; + } perfbuf; + + struct { + struct vm_gk20a *vm; + } cde; + + struct { + struct vm_gk20a *vm; + } ce; + + struct nvgpu_pd_cache *pd_cache; + + struct nvgpu_mutex l2_op_lock; + struct nvgpu_mutex tlb_lock; + struct nvgpu_mutex priv_lock; + + struct nvgpu_mem bar2_desc; + +#ifdef CONFIG_TEGRA_19x_GPU + struct nvgpu_mem hw_fault_buf[FAULT_TYPE_NUM]; + unsigned int hw_fault_buf_status[FAULT_TYPE_NUM]; + struct mmu_fault_info *fault_info[FAULT_TYPE_NUM]; + struct nvgpu_mutex hub_isr_mutex; + u32 hub_intr_types; +#endif + /* + * Separate function to cleanup the CE since it requires a channel to + * be closed which must happen before fifo cleanup. + */ + void (*remove_ce_support)(struct mm_gk20a *mm); + void (*remove_support)(struct mm_gk20a *mm); + bool sw_ready; + int physical_bits; + bool use_full_comp_tag_line; + bool ltc_enabled_current; + bool ltc_enabled_target; + bool bypass_smmu; + bool disable_bigpage; + bool has_physical_mode; + + struct nvgpu_mem sysmem_flush; + + u32 pramin_window; + struct nvgpu_spinlock pramin_window_lock; + bool force_pramin; /* via debugfs */ + + struct { + size_t size; + u64 base; + size_t bootstrap_size; + u64 bootstrap_base; + + struct nvgpu_allocator allocator; + struct nvgpu_allocator bootstrap_allocator; + + u32 ce_ctx_id; + volatile bool cleared; + struct nvgpu_mutex first_clear_mutex; + + struct nvgpu_list_node clear_list_head; + struct nvgpu_mutex clear_list_mutex; + + struct nvgpu_cond clearing_thread_cond; + struct nvgpu_thread clearing_thread; + struct nvgpu_mutex clearing_thread_lock; + nvgpu_atomic_t pause_count; + + nvgpu_atomic64_t bytes_pending; + } vidmem; +}; + +#define gk20a_from_mm(mm) ((mm)->g) +#define gk20a_from_vm(vm) ((vm)->mm->g) + +static inline int bar1_aperture_size_mb_gk20a(void) +{ + return 16; /* 16MB is more than enough atm. */ +} + +/* The maximum GPU VA range supported */ +#define NV_GMMU_VA_RANGE 38 + +/* The default userspace-visible GPU VA size */ +#define NV_MM_DEFAULT_USER_SIZE (1ULL << 37) + +/* The default kernel-reserved GPU VA size */ +#define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32) + +/* + * When not using unified address spaces, the bottom 56GB of the space are used + * for small pages, and the remaining high memory is used for large pages. + */ +static inline u64 __nv_gmmu_va_small_page_limit(void) +{ + return ((u64)SZ_1G * 56); +} + +enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm, + u64 base, u64 size); +enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size); + +void nvgpu_init_mm_ce_context(struct gk20a *g); +int nvgpu_init_mm_support(struct gk20a *g); +int nvgpu_init_mm_setup_hw(struct gk20a *g); + +u64 nvgpu_inst_block_addr(struct gk20a *g, struct nvgpu_mem *mem); +void nvgpu_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block); + +int nvgpu_mm_suspend(struct gk20a *g); + +#endif