From 4ef493979791fd4df96d8f07d84f6cd7b8c0915f Mon Sep 17 00:00:00 2001 From: Thomas Fleury Date: Wed, 20 Mar 2019 16:55:08 -0700 Subject: [PATCH] gpu: nvgpu: add base_shift and alloc_size ramin HALs Added the following HALs - ramin.base_shift - ramin.alloc_base Use above HALs in mm, instead of using hw definitions. Defined nvgpu_inst_block_ptr to - get inst_block address, - shift if by base_shift - assert upper 32 bits are 0 - return lower 32 bits Added missing #include for Jira NVGPU-3015 Change-Id: I558a6f4c9fbc6873a5b71f1557ea9ad8eae2778f Signed-off-by: Thomas Fleury Reviewed-on: https://git-master.nvidia.com/r/2077840 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/fifo/channel_gm20b.c | 5 ++--- drivers/gpu/nvgpu/common/gr/fecs_trace.c | 11 +++-------- drivers/gpu/nvgpu/common/gr/obj_ctx.c | 1 + drivers/gpu/nvgpu/common/mm/mm.c | 9 +++++++++ drivers/gpu/nvgpu/common/perf/perf_gm20b.c | 7 +++---- drivers/gpu/nvgpu/common/perf/perf_gv11b.c | 7 +++---- drivers/gpu/nvgpu/common/pmu/pmu_gk20a.c | 7 +++---- drivers/gpu/nvgpu/common/pmu/pmu_gm20b.c | 8 ++++---- drivers/gpu/nvgpu/common/pmu/pmu_gp106.c | 8 ++++---- drivers/gpu/nvgpu/common/pmu/pmu_gv11b.c | 8 ++++---- drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c | 2 -- .../gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c | 2 ++ .../gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c | 3 +++ drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | 1 - drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 7 ++++--- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 4 +--- drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 3 +-- drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 2 ++ drivers/gpu/nvgpu/gp106/sec2_gp106.c | 7 +++---- drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 2 ++ drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 1 - drivers/gpu/nvgpu/gv100/hal_gv100.c | 3 +++ drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 1 - drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 3 +++ drivers/gpu/nvgpu/hal/fifo/ramin_gk20a.c | 10 ++++++++++ drivers/gpu/nvgpu/hal/fifo/ramin_gk20a.h | 2 ++ drivers/gpu/nvgpu/hal/gsp/gsp_gv100.c | 8 ++++---- drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 2 ++ drivers/gpu/nvgpu/include/nvgpu/mm.h | 1 + drivers/gpu/nvgpu/libnvgpu-drv.export | 1 + drivers/gpu/nvgpu/os/linux/ioctl_dbg.c | 1 + drivers/gpu/nvgpu/tu104/hal_tu104.c | 3 +++ drivers/gpu/nvgpu/tu104/sec2_tu104.c | 13 ++++++------- userspace/units/mm/gmmu/page_table/page_table.c | 2 ++ .../units/mm/page_table_faults/page_table_faults.c | 2 ++ 35 files changed, 94 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/nvgpu/common/fifo/channel_gm20b.c b/drivers/gpu/nvgpu/common/fifo/channel_gm20b.c index c545693a6..d134c6414 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel_gm20b.c +++ b/drivers/gpu/nvgpu/common/fifo/channel_gm20b.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "channel_gm20b.h" @@ -36,13 +37,11 @@ void gm20b_channel_bind(struct channel_gk20a *c) { struct gk20a *g = c->g; - u32 inst_ptr = nvgpu_inst_block_addr(g, &c->inst_block) - >> ram_in_base_shift_v(); + u32 inst_ptr = nvgpu_inst_block_ptr(g, &c->inst_block); nvgpu_log_info(g, "bind channel %d inst ptr 0x%08x", c->chid, inst_ptr); - gk20a_writel(g, ccsr_channel_inst_r(c->chid), ccsr_channel_inst_ptr_f(inst_ptr) | nvgpu_aperture_mask(g, &c->inst_block, diff --git a/drivers/gpu/nvgpu/common/gr/fecs_trace.c b/drivers/gpu/nvgpu/common/gr/fecs_trace.c index b22ebae84..28501dacc 100644 --- a/drivers/gpu/nvgpu/common/gr/fecs_trace.c +++ b/drivers/gpu/nvgpu/common/gr/fecs_trace.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -546,12 +547,6 @@ int nvgpu_gr_fecs_trace_reset(struct gk20a *g) return g->ops.gr.fecs_trace.set_read_index(g, 0); } -static u32 nvgpu_gr_fecs_trace_fecs_context_ptr(struct gk20a *g, - struct nvgpu_mem *inst_block) -{ - return (u32)(nvgpu_inst_block_addr(g, inst_block) >> 12LL); -} - /* * map global circ_buf to the context space and store the GPU VA * in the context header. @@ -571,7 +566,7 @@ int nvgpu_gr_fecs_trace_bind_channel(struct gk20a *g, return -EINVAL; } - context_ptr = nvgpu_gr_fecs_trace_fecs_context_ptr(g, inst_block); + context_ptr = nvgpu_inst_block_ptr(g, inst_block); nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "pid=%d context_ptr=%x inst_block=%llx", @@ -629,7 +624,7 @@ int nvgpu_gr_fecs_trace_unbind_channel(struct gk20a *g, return -EINVAL; } - context_ptr = nvgpu_gr_fecs_trace_fecs_context_ptr(g, inst_block); + context_ptr = nvgpu_inst_block_ptr(g, inst_block); nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "context_ptr=%x", context_ptr); diff --git a/drivers/gpu/nvgpu/common/gr/obj_ctx.c b/drivers/gpu/nvgpu/common/gr/obj_ctx.c index 135622843..2b60e05d9 100644 --- a/drivers/gpu/nvgpu/common/gr/obj_ctx.c +++ b/drivers/gpu/nvgpu/common/gr/obj_ctx.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/gpu/nvgpu/common/mm/mm.c b/drivers/gpu/nvgpu/common/mm/mm.c index 87f9510d5..f6f663b72 100644 --- a/drivers/gpu/nvgpu/common/mm/mm.c +++ b/drivers/gpu/nvgpu/common/mm/mm.c @@ -152,6 +152,15 @@ u64 nvgpu_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block) } } +u32 nvgpu_inst_block_ptr(struct gk20a *g, struct nvgpu_mem *inst_block) +{ + u64 addr = nvgpu_inst_block_addr(g, inst_block) >> + g->ops.ramin.base_shift(); + + nvgpu_assert(u64_hi32(addr) == 0U); + return u64_lo32(addr); +} + void nvgpu_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block) { if (nvgpu_mem_is_valid(inst_block)) { diff --git a/drivers/gpu/nvgpu/common/perf/perf_gm20b.c b/drivers/gpu/nvgpu/common/perf/perf_gm20b.c index 263147ecf..6ff7dfc39 100644 --- a/drivers/gpu/nvgpu/common/perf/perf_gm20b.c +++ b/drivers/gpu/nvgpu/common/perf/perf_gm20b.c @@ -74,7 +74,7 @@ void gm20b_perf_enable_membuf(struct gk20a *g, u32 size, { u32 addr_lo; u32 addr_hi; - u64 inst_block_addr; + u32 inst_block_ptr; addr_lo = u64_lo32(buf_addr); addr_hi = u64_hi32(buf_addr); @@ -84,11 +84,10 @@ void gm20b_perf_enable_membuf(struct gk20a *g, u32 size, perf_pmasys_outbaseupper_ptr_f(addr_hi)); nvgpu_writel(g, perf_pmasys_outsize_r(), size); - inst_block_addr = nvgpu_inst_block_addr(g, inst_block) >> 12; + inst_block_ptr = nvgpu_inst_block_ptr(g, inst_block); - nvgpu_assert(inst_block_addr <= U64(U32_MAX)); nvgpu_writel(g, perf_pmasys_mem_block_r(), - perf_pmasys_mem_block_base_f(U32(inst_block_addr)) | + perf_pmasys_mem_block_base_f(inst_block_ptr) | perf_pmasys_mem_block_valid_true_f() | nvgpu_aperture_mask(g, inst_block, perf_pmasys_mem_block_target_sys_ncoh_f(), diff --git a/drivers/gpu/nvgpu/common/perf/perf_gv11b.c b/drivers/gpu/nvgpu/common/perf/perf_gv11b.c index e6631555a..c6ef4c902 100644 --- a/drivers/gpu/nvgpu/common/perf/perf_gv11b.c +++ b/drivers/gpu/nvgpu/common/perf/perf_gv11b.c @@ -74,7 +74,7 @@ void gv11b_perf_enable_membuf(struct gk20a *g, u32 size, { u32 addr_lo; u32 addr_hi; - u64 inst_block_addr; + u32 inst_block_ptr; addr_lo = u64_lo32(buf_addr); addr_hi = u64_hi32(buf_addr); @@ -84,11 +84,10 @@ void gv11b_perf_enable_membuf(struct gk20a *g, u32 size, perf_pmasys_outbaseupper_ptr_f(addr_hi)); nvgpu_writel(g, perf_pmasys_outsize_r(), size); - inst_block_addr = nvgpu_inst_block_addr(g, inst_block) >> 12; + inst_block_ptr = nvgpu_inst_block_ptr(g, inst_block); - nvgpu_assert(inst_block_addr <= U64(U32_MAX)); nvgpu_writel(g, perf_pmasys_mem_block_r(), - perf_pmasys_mem_block_base_f(U32(inst_block_addr)) | + perf_pmasys_mem_block_base_f(inst_block_ptr) | perf_pmasys_mem_block_valid_true_f() | nvgpu_aperture_mask(g, inst_block, perf_pmasys_mem_block_target_sys_ncoh_f(), diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_gk20a.c b/drivers/gpu/nvgpu/common/pmu/pmu_gk20a.c index fee649b24..8af730620 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/common/pmu/pmu_gk20a.c @@ -194,17 +194,16 @@ int pmu_bootstrap(struct nvgpu_pmu *pmu) u32 addr_code, addr_data, addr_load; u32 i, blocks, addr_args; int err; - u64 tmp_addr; + u32 inst_block_ptr; nvgpu_log_fn(g, " "); gk20a_writel(g, pwr_falcon_itfen_r(), gk20a_readl(g, pwr_falcon_itfen_r()) | pwr_falcon_itfen_ctxen_enable_f()); - tmp_addr = nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12; - nvgpu_assert(u64_hi32(tmp_addr) == 0U); + inst_block_ptr = nvgpu_inst_block_ptr(g, &mm->pmu.inst_block); gk20a_writel(g, pwr_pmu_new_instblk_r(), - pwr_pmu_new_instblk_ptr_f((u32)tmp_addr) | + pwr_pmu_new_instblk_ptr_f(inst_block_ptr) | pwr_pmu_new_instblk_valid_f(1) | pwr_pmu_new_instblk_target_sys_coh_f()); diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_gm20b.c b/drivers/gpu/nvgpu/common/pmu/pmu_gm20b.c index 1291313b7..2b8e0b010 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu_gm20b.c +++ b/drivers/gpu/nvgpu/common/pmu/pmu_gm20b.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -356,7 +357,7 @@ void gm20b_update_lspmu_cmdline_args(struct gk20a *g) void gm20b_pmu_flcn_setup_boot_config(struct gk20a *g) { struct mm_gk20a *mm = &g->mm; - u64 tmp_addr; + u32 inst_block_ptr; nvgpu_log_fn(g, " "); @@ -377,11 +378,10 @@ void gm20b_pmu_flcn_setup_boot_config(struct gk20a *g) * The instance block address to write is the lower 32-bits of the 4K- * aligned physical instance block address. */ - tmp_addr = nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12U; - nvgpu_assert(u64_hi32(tmp_addr) == 0U); + inst_block_ptr = nvgpu_inst_block_ptr(g, &mm->pmu.inst_block); gk20a_writel(g, pwr_pmu_new_instblk_r(), - pwr_pmu_new_instblk_ptr_f((u32)tmp_addr) | + pwr_pmu_new_instblk_ptr_f(inst_block_ptr) | pwr_pmu_new_instblk_valid_f(1U) | (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ? pwr_pmu_new_instblk_target_sys_coh_f() : diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_gp106.c b/drivers/gpu/nvgpu/common/pmu/pmu_gp106.c index 850901923..34641e522 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu_gp106.c +++ b/drivers/gpu/nvgpu/common/pmu/pmu_gp106.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -204,7 +205,7 @@ void gp106_update_lspmu_cmdline_args(struct gk20a *g) void gp106_pmu_setup_apertures(struct gk20a *g) { struct mm_gk20a *mm = &g->mm; - u64 tmp_addr; + u32 inst_block_ptr; /* PMU TRANSCFG */ /* setup apertures - virtual */ @@ -228,10 +229,9 @@ void gp106_pmu_setup_apertures(struct gk20a *g) gk20a_writel(g, pwr_falcon_itfen_r(), gk20a_readl(g, pwr_falcon_itfen_r()) | pwr_falcon_itfen_ctxen_enable_f()); - tmp_addr = nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12; - nvgpu_assert(u64_hi32(tmp_addr) == 0U); + inst_block_ptr = nvgpu_inst_block_ptr(g, &mm->pmu.inst_block); gk20a_writel(g, pwr_pmu_new_instblk_r(), - pwr_pmu_new_instblk_ptr_f(U32(tmp_addr)) | + pwr_pmu_new_instblk_ptr_f(inst_block_ptr) | pwr_pmu_new_instblk_valid_f(1) | nvgpu_aperture_mask(g, &mm->pmu.inst_block, pwr_pmu_new_instblk_target_sys_ncoh_f(), diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_gv11b.c b/drivers/gpu/nvgpu/common/pmu/pmu_gv11b.c index fc41b0839..2df840a10 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu_gv11b.c +++ b/drivers/gpu/nvgpu/common/pmu/pmu_gv11b.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -152,7 +153,7 @@ int gv11b_pmu_bootstrap(struct nvgpu_pmu *pmu) u32 addr_code_hi, addr_data_hi; u32 i, blocks, addr_args; int err; - u64 tmp_addr; + u32 inst_block_ptr; nvgpu_log_fn(g, " "); @@ -160,10 +161,9 @@ int gv11b_pmu_bootstrap(struct nvgpu_pmu *pmu) gk20a_readl(g, pwr_falcon_itfen_r()) | pwr_falcon_itfen_ctxen_enable_f()); - tmp_addr = nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> ALIGN_4KB; - nvgpu_assert(u64_hi32(tmp_addr) == 0U); + inst_block_ptr = nvgpu_inst_block_ptr(g, &mm->pmu.inst_block); gk20a_writel(g, pwr_pmu_new_instblk_r(), - pwr_pmu_new_instblk_ptr_f((u32)tmp_addr) | + pwr_pmu_new_instblk_ptr_f(inst_block_ptr) | pwr_pmu_new_instblk_valid_f(1) | (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ? pwr_pmu_new_instblk_target_sys_coh_f() : diff --git a/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c index 88c7f5311..e1cd792e9 100644 --- a/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c @@ -44,8 +44,6 @@ #include "common/vgpu/gr/subctx_vgpu.h" #include "common/vgpu/ivc/comm_vgpu.h" -#include - void vgpu_channel_bind(struct channel_gk20a *ch) { struct tegra_vgpu_cmd_msg msg; diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c index 825823dc4..3c913b587 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c @@ -506,6 +506,8 @@ static const struct gpu_ops vgpu_gp10b_ops = { .init_pdb = gp10b_ramin_init_pdb, .init_subctx_pdb = NULL, .set_adr_limit = gk20a_ramin_set_adr_limit, + .base_shift = gk20a_ramin_base_shift, + .alloc_size = gk20a_ramin_alloc_size, }, .runlist = { .reschedule = NULL, diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c index 4bc010345..5d87079e9 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c @@ -26,6 +26,7 @@ #include "hal/fifo/pbdma_gm20b.h" #include "hal/fifo/pbdma_gp10b.h" #include "hal/fifo/pbdma_gv11b.h" +#include "hal/fifo/ramin_gk20a.h" #include "hal/fifo/ramin_gm20b.h" #include "hal/fifo/ramin_gp10b.h" #include "hal/fifo/ramin_gv11b.h" @@ -592,6 +593,8 @@ static const struct gpu_ops vgpu_gv11b_ops = { .init_pdb = gp10b_ramin_init_pdb, .init_subctx_pdb = gv11b_ramin_init_subctx_pdb, .set_adr_limit = NULL, + .base_shift = gk20a_ramin_base_shift, + .alloc_size = gk20a_ramin_alloc_size, }, .runlist = { .reschedule = NULL, diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c index bfd3c4f72..132606399 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c @@ -39,7 +39,6 @@ #include #include -#include #include #include #include diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 5c3dd2bee..aef55cf05 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -63,7 +63,6 @@ #include #include -#include #include #define FECS_METHOD_WFI_RESTORE 0x80000U @@ -430,10 +429,12 @@ static bool gk20a_fifo_handle_mmu_fault_locked( ch->chid); gk20a_channel_put(ch); } else if (mmfault_info.inst_ptr == - nvgpu_inst_block_addr(g, &g->mm.bar1.inst_block)) { + nvgpu_inst_block_addr(g, + &g->mm.bar1.inst_block)) { nvgpu_err(g, "mmu fault from bar1"); } else if (mmfault_info.inst_ptr == - nvgpu_inst_block_addr(g, &g->mm.pmu.inst_block)) { + nvgpu_inst_block_addr(g, + &g->mm.pmu.inst_block)) { nvgpu_err(g, "mmu fault from pmu"); } else { nvgpu_err(g, "couldn't locate channel for mmu fault"); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index c0984c067..f8a3ad5ee 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -70,7 +70,6 @@ #include #include -#include static struct channel_gk20a *gk20a_gr_get_channel_from_ctx( struct gk20a *g, u32 curr_ctx, u32 *curr_tsgid); @@ -1264,8 +1263,7 @@ static struct channel_gk20a *gk20a_gr_get_channel_from_ctx( continue; } - if ((u32)(nvgpu_inst_block_addr(g, &ch->inst_block) >> - ram_in_base_shift_v()) == + if (nvgpu_inst_block_ptr(g, &ch->inst_block) == g->ops.gr.falcon.get_ctx_ptr(curr_ctx)) { tsgid = ch->tsgid; /* found it */ diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 561523722..3d6cd3d27 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -50,7 +50,6 @@ #include "fence_gk20a.h" #include -#include #include #include @@ -397,7 +396,7 @@ int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block) nvgpu_log_fn(g, " "); - err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block); + err = nvgpu_dma_alloc(g, g->ops.ramin.alloc_size(), inst_block); if (err != 0) { nvgpu_err(g, "%s: memory allocation failed", __func__); return err; diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index c8bf251d3..802ae3037 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -711,6 +711,8 @@ static const struct gpu_ops gm20b_ops = { .init_pdb = gk20a_ramin_init_pdb, .init_subctx_pdb = NULL, .set_adr_limit = gk20a_ramin_set_adr_limit, + .base_shift = gk20a_ramin_base_shift, + .alloc_size = gk20a_ramin_alloc_size, }, .runlist = { .update_for_channel = gk20a_runlist_update_for_channel, diff --git a/drivers/gpu/nvgpu/gp106/sec2_gp106.c b/drivers/gpu/nvgpu/gp106/sec2_gp106.c index cb0ce7a02..d402729da 100644 --- a/drivers/gpu/nvgpu/gp106/sec2_gp106.c +++ b/drivers/gpu/nvgpu/gp106/sec2_gp106.c @@ -50,7 +50,7 @@ int gp106_sec2_reset(struct gk20a *g) void gp106_sec2_flcn_setup_boot_config(struct gk20a *g) { struct mm_gk20a *mm = &g->mm; - u64 tmp_addr; + u32 inst_block_ptr; u32 data = 0U; nvgpu_log_fn(g, " "); @@ -85,11 +85,10 @@ void gp106_sec2_flcn_setup_boot_config(struct gk20a *g) * The instance block address to write is the lower 32-bits of the 4K- * aligned physical instance block address. */ - tmp_addr = nvgpu_inst_block_addr(g, &mm->sec2.inst_block) >> 12U; - nvgpu_assert(u64_hi32(tmp_addr) == 0U); + inst_block_ptr = nvgpu_inst_block_ptr(g, &mm->sec2.inst_block); gk20a_writel(g, psec_falcon_nxtctx_r(), - pwr_pmu_new_instblk_ptr_f((u32)tmp_addr) | + pwr_pmu_new_instblk_ptr_f(inst_block_ptr) | pwr_pmu_new_instblk_valid_f(1U) | nvgpu_aperture_mask(g, &mm->sec2.inst_block, pwr_pmu_new_instblk_target_sys_ncoh_f(), diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index ed970d5e9..a909dc77e 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -800,6 +800,8 @@ static const struct gpu_ops gp10b_ops = { .init_pdb = gp10b_ramin_init_pdb, .init_subctx_pdb = NULL, .set_adr_limit = gk20a_ramin_set_adr_limit, + .base_shift = gk20a_ramin_base_shift, + .alloc_size = gk20a_ramin_alloc_size, }, .runlist = { .reschedule = gk20a_runlist_reschedule, diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index 5e2cbe17b..5f982fade 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c @@ -34,7 +34,6 @@ #include "gm20b/mm_gm20b.h" #include "mm_gp10b.h" -#include #include u32 gp10b_mm_get_default_big_page_size(void) diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 60f01a473..111a06b20 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -59,6 +59,7 @@ #include "hal/fifo/pbdma_status_gm20b.h" #include "hal/fifo/ramfc_gp10b.h" #include "hal/fifo/ramfc_gv11b.h" +#include "hal/fifo/ramin_gk20a.h" #include "hal/fifo/ramin_gm20b.h" #include "hal/fifo/ramin_gp10b.h" #include "hal/fifo/ramin_gv11b.h" @@ -984,6 +985,8 @@ static const struct gpu_ops gv100_ops = { .init_pdb = gp10b_ramin_init_pdb, .init_subctx_pdb = gv11b_ramin_init_subctx_pdb, .set_adr_limit = NULL, + .base_shift = gk20a_ramin_base_shift, + .alloc_size = gk20a_ramin_alloc_size, }, .runlist = { .update_for_channel = gk20a_runlist_update_for_channel, diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index cfc28455a..784815699 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -64,7 +64,6 @@ #include #include #include -#include #include #define GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT 100U diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index bef7ae14b..4a343573a 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -60,6 +60,7 @@ #include "hal/fifo/engines_gv11b.h" #include "hal/fifo/ramfc_gp10b.h" #include "hal/fifo/ramfc_gv11b.h" +#include "hal/fifo/ramin_gk20a.h" #include "hal/fifo/ramin_gm20b.h" #include "hal/fifo/ramin_gp10b.h" #include "hal/fifo/ramin_gv11b.h" @@ -939,6 +940,8 @@ static const struct gpu_ops gv11b_ops = { .init_pdb = gp10b_ramin_init_pdb, .init_subctx_pdb = gv11b_ramin_init_subctx_pdb, .set_adr_limit = NULL, + .base_shift = gk20a_ramin_base_shift, + .alloc_size = gk20a_ramin_alloc_size, }, .runlist = { .reschedule = gv11b_runlist_reschedule, diff --git a/drivers/gpu/nvgpu/hal/fifo/ramin_gk20a.c b/drivers/gpu/nvgpu/hal/fifo/ramin_gk20a.c index 77ab3d5a3..cdb06d586 100644 --- a/drivers/gpu/nvgpu/hal/fifo/ramin_gk20a.c +++ b/drivers/gpu/nvgpu/hal/fifo/ramin_gk20a.c @@ -72,3 +72,13 @@ void gk20a_ramin_set_adr_limit(struct gk20a *g, nvgpu_mem_wr32(g, inst_block, ram_in_adr_limit_hi_w(), ram_in_adr_limit_hi_f(u64_hi32(va_limit - 1U))); } + +u32 gk20a_ramin_base_shift(void) +{ + return ram_in_base_shift_v(); +} + +u32 gk20a_ramin_alloc_size(void) +{ + return ram_in_alloc_size_v(); +} diff --git a/drivers/gpu/nvgpu/hal/fifo/ramin_gk20a.h b/drivers/gpu/nvgpu/hal/fifo/ramin_gk20a.h index b065a1267..c67c9ca5f 100644 --- a/drivers/gpu/nvgpu/hal/fifo/ramin_gk20a.h +++ b/drivers/gpu/nvgpu/hal/fifo/ramin_gk20a.h @@ -33,5 +33,7 @@ void gk20a_ramin_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block, u64 pdb_addr, struct nvgpu_mem *pdb_mem); void gk20a_ramin_set_adr_limit(struct gk20a *g, struct nvgpu_mem *inst_block, u64 va_limit); +u32 gk20a_ramin_base_shift(void); +u32 gk20a_ramin_alloc_size(void); #endif /* NVGPU_RAMIN_GK20A_H */ diff --git a/drivers/gpu/nvgpu/hal/gsp/gsp_gv100.c b/drivers/gpu/nvgpu/hal/gsp/gsp_gv100.c index cd19dc362..eb49d1142 100644 --- a/drivers/gpu/nvgpu/hal/gsp/gsp_gv100.c +++ b/drivers/gpu/nvgpu/hal/gsp/gsp_gv100.c @@ -21,6 +21,7 @@ */ #include +#include #include #include #include @@ -44,7 +45,7 @@ int gv100_gsp_reset(struct gk20a *g) void gv100_gsp_flcn_setup_boot_config(struct gk20a *g) { struct mm_gk20a *mm = &g->mm; - u64 tmp_addr; + u32 inst_block_ptr; u32 data = 0; data = gk20a_readl(g, pgsp_fbif_ctl_r()); @@ -77,11 +78,10 @@ void gv100_gsp_flcn_setup_boot_config(struct gk20a *g) * The instance block address to write is the lower 32-bits of the 4K- * aligned physical instance block address. */ - tmp_addr = nvgpu_inst_block_addr(g, &mm->gsp.inst_block) >> 12U; - nvgpu_assert(u64_hi32(tmp_addr) == 0U); + inst_block_ptr = nvgpu_inst_block_ptr(g, &mm->gsp.inst_block); gk20a_writel(g, pgsp_falcon_nxtctx_r(), - pgsp_falcon_nxtctx_ctxptr_f((u32)tmp_addr) | + pgsp_falcon_nxtctx_ctxptr_f(inst_block_ptr) | pgsp_falcon_nxtctx_ctxvalid_f(1) | nvgpu_aperture_mask(g, &mm->gsp.inst_block, pgsp_falcon_nxtctx_ctxtgt_sys_ncoh_f(), diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 89e4a42ef..b1d17d69c 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -1033,6 +1033,8 @@ struct gpu_ops { bool replayable); void (*set_adr_limit)(struct gk20a *g, struct nvgpu_mem *inst_block, u64 va_limit); + u32 (*base_shift)(void); + u32 (*alloc_size)(void); } ramin; struct { int (*reschedule)(struct channel_gk20a *ch, bool preempt_next); diff --git a/drivers/gpu/nvgpu/include/nvgpu/mm.h b/drivers/gpu/nvgpu/include/nvgpu/mm.h index ca045da27..65f5458fd 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/mm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/mm.h @@ -214,6 +214,7 @@ int nvgpu_init_mm_support(struct gk20a *g); int nvgpu_init_mm_setup_hw(struct gk20a *g); u64 nvgpu_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block); +u32 nvgpu_inst_block_ptr(struct gk20a *g, struct nvgpu_mem *inst_block); void nvgpu_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block); int nvgpu_mm_suspend(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/libnvgpu-drv.export b/drivers/gpu/nvgpu/libnvgpu-drv.export index dc362aa07..1ca36dd5b 100644 --- a/drivers/gpu/nvgpu/libnvgpu-drv.export +++ b/drivers/gpu/nvgpu/libnvgpu-drv.export @@ -30,6 +30,7 @@ gk20a_runlist_get_ch_entry gk20a_runlist_get_tsg_entry gk20a_locked_gmmu_map gk20a_locked_gmmu_unmap +gk20a_ramin_alloc_size gm20b_fb_tlb_invalidate gm20b_fuse_status_opt_gpc gm20b_ramin_set_big_page_size diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c index 39c223b73..33aacf4ad 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index 37f5b9cd1..5541be9b4 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -64,6 +64,7 @@ #include "hal/fifo/ramfc_gp10b.h" #include "hal/fifo/ramfc_gv11b.h" #include "hal/fifo/ramfc_tu104.h" +#include "hal/fifo/ramin_gk20a.h" #include "hal/fifo/ramin_gm20b.h" #include "hal/fifo/ramin_gp10b.h" #include "hal/fifo/ramin_gv11b.h" @@ -1021,6 +1022,8 @@ static const struct gpu_ops tu104_ops = { .init_pdb = gp10b_ramin_init_pdb, .init_subctx_pdb = gv11b_ramin_init_subctx_pdb, .set_adr_limit = NULL, + .base_shift = gk20a_ramin_base_shift, + .alloc_size = gk20a_ramin_alloc_size, }, .runlist = { .update_for_channel = gk20a_runlist_update_for_channel, diff --git a/drivers/gpu/nvgpu/tu104/sec2_tu104.c b/drivers/gpu/nvgpu/tu104/sec2_tu104.c index d8b2438fd..c6b10a646 100644 --- a/drivers/gpu/nvgpu/tu104/sec2_tu104.c +++ b/drivers/gpu/nvgpu/tu104/sec2_tu104.c @@ -176,7 +176,7 @@ int tu104_sec2_flcn_copy_from_emem(struct gk20a *g, void tu104_sec2_flcn_setup_boot_config(struct gk20a *g) { struct mm_gk20a *mm = &g->mm; - u64 tmp_addr; + u32 inst_block_ptr; u32 data = 0U; nvgpu_log_fn(g, " "); @@ -208,14 +208,13 @@ void tu104_sec2_flcn_setup_boot_config(struct gk20a *g) psec_falcon_itfen_ctxen_enable_f()); /* - * The instance block address to write is the lower 32-bits of the 4K- - * aligned physical instance block address. - */ - tmp_addr = nvgpu_inst_block_addr(g, &mm->sec2.inst_block) >> 12U; - nvgpu_assert(u64_hi32(tmp_addr) == 0U); + * The instance block address to write is the lower 32-bits of the 4K- + * aligned physical instance block address. + */ + inst_block_ptr = nvgpu_inst_block_ptr(g, &mm->sec2.inst_block); gk20a_writel(g, psec_falcon_nxtctx_r(), - pwr_pmu_new_instblk_ptr_f((u32)tmp_addr) | + pwr_pmu_new_instblk_ptr_f(inst_block_ptr) | pwr_pmu_new_instblk_valid_f(1U) | nvgpu_aperture_mask(g, &mm->sec2.inst_block, pwr_pmu_new_instblk_target_sys_ncoh_f(), diff --git a/userspace/units/mm/gmmu/page_table/page_table.c b/userspace/units/mm/gmmu/page_table/page_table.c index 1d6de6a5a..a53264c99 100644 --- a/userspace/units/mm/gmmu/page_table/page_table.c +++ b/userspace/units/mm/gmmu/page_table/page_table.c @@ -43,6 +43,7 @@ #include #include +#include #include #define TEST_PA_ADDRESS 0xEFAD80000000 @@ -305,6 +306,7 @@ static int init_mm(struct unit_module *m, struct gk20a *g) g->ops.fb.compression_page_size = gp10b_fb_compression_page_size; g->ops.fb.tlb_invalidate = gm20b_fb_tlb_invalidate; g->ops.ramin.init_pdb = gp10b_ramin_init_pdb; + g->ops.ramin.alloc_size = gk20a_ramin_alloc_size; if (g->ops.mm.is_bar1_supported(g)) { unit_return_fail(m, "BAR1 is not supported on Volta+\n"); diff --git a/userspace/units/mm/page_table_faults/page_table_faults.c b/userspace/units/mm/page_table_faults/page_table_faults.c index 6900c4db3..9005f24e3 100644 --- a/userspace/units/mm/page_table_faults/page_table_faults.c +++ b/userspace/units/mm/page_table_faults/page_table_faults.c @@ -46,6 +46,7 @@ #include "hal/fb/fb_gp10b.h" #include "hal/fb/fb_gm20b.h" #include "hal/fb/fb_gv11b.h" +#include "hal/fifo/ramin_gk20a.h" #include "hal/fifo/ramin_gm20b.h" #include "hal/fifo/ramin_gp10b.h" @@ -123,6 +124,7 @@ static int init_mm(struct unit_module *m, struct gk20a *g) g->ops.fb.compression_page_size = gp10b_fb_compression_page_size; g->ops.fb.tlb_invalidate = gm20b_fb_tlb_invalidate; g->ops.ramin.init_pdb = gp10b_ramin_init_pdb; + g->ops.ramin.alloc_size = gk20a_ramin_alloc_size; /* New HALs for fault testing */ g->ops.mm.mmu_fault_pending = gv11b_mm_mmu_fault_pending;