gpu: nvgpu: add base_shift and alloc_size ramin HALs

Added the following HALs
- ramin.base_shift
- ramin.alloc_base

Use above HALs in mm, instead of using hw definitions.

Defined nvgpu_inst_block_ptr to
- get inst_block address,
- shift if by base_shift
- assert upper 32 bits are 0
- return lower 32 bits

Added missing #include for <nvgpu/mm.h>

Jira NVGPU-3015

Change-Id: I558a6f4c9fbc6873a5b71f1557ea9ad8eae2778f
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2077840
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Thomas Fleury
2019-03-20 16:55:08 -07:00
committed by mobile promotions
parent 80b91ef2a5
commit 4ef4939797
35 changed files with 94 additions and 63 deletions

View File

@@ -25,6 +25,7 @@
#include <nvgpu/atomic.h> #include <nvgpu/atomic.h>
#include <nvgpu/io.h> #include <nvgpu/io.h>
#include <nvgpu/barrier.h> #include <nvgpu/barrier.h>
#include <nvgpu/mm.h>
#include <nvgpu/gk20a.h> #include <nvgpu/gk20a.h>
#include "channel_gm20b.h" #include "channel_gm20b.h"
@@ -36,13 +37,11 @@ void gm20b_channel_bind(struct channel_gk20a *c)
{ {
struct gk20a *g = c->g; struct gk20a *g = c->g;
u32 inst_ptr = nvgpu_inst_block_addr(g, &c->inst_block) u32 inst_ptr = nvgpu_inst_block_ptr(g, &c->inst_block);
>> ram_in_base_shift_v();
nvgpu_log_info(g, "bind channel %d inst ptr 0x%08x", nvgpu_log_info(g, "bind channel %d inst ptr 0x%08x",
c->chid, inst_ptr); c->chid, inst_ptr);
gk20a_writel(g, ccsr_channel_inst_r(c->chid), gk20a_writel(g, ccsr_channel_inst_r(c->chid),
ccsr_channel_inst_ptr_f(inst_ptr) | ccsr_channel_inst_ptr_f(inst_ptr) |
nvgpu_aperture_mask(g, &c->inst_block, nvgpu_aperture_mask(g, &c->inst_block,

View File

@@ -24,6 +24,7 @@
#include <nvgpu/list.h> #include <nvgpu/list.h>
#include <nvgpu/log.h> #include <nvgpu/log.h>
#include <nvgpu/log2.h> #include <nvgpu/log2.h>
#include <nvgpu/mm.h>
#include <nvgpu/circ_buf.h> #include <nvgpu/circ_buf.h>
#include <nvgpu/timers.h> #include <nvgpu/timers.h>
#include <nvgpu/enabled.h> #include <nvgpu/enabled.h>
@@ -546,12 +547,6 @@ int nvgpu_gr_fecs_trace_reset(struct gk20a *g)
return g->ops.gr.fecs_trace.set_read_index(g, 0); return g->ops.gr.fecs_trace.set_read_index(g, 0);
} }
static u32 nvgpu_gr_fecs_trace_fecs_context_ptr(struct gk20a *g,
struct nvgpu_mem *inst_block)
{
return (u32)(nvgpu_inst_block_addr(g, inst_block) >> 12LL);
}
/* /*
* map global circ_buf to the context space and store the GPU VA * map global circ_buf to the context space and store the GPU VA
* in the context header. * in the context header.
@@ -571,7 +566,7 @@ int nvgpu_gr_fecs_trace_bind_channel(struct gk20a *g,
return -EINVAL; return -EINVAL;
} }
context_ptr = nvgpu_gr_fecs_trace_fecs_context_ptr(g, inst_block); context_ptr = nvgpu_inst_block_ptr(g, inst_block);
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
"pid=%d context_ptr=%x inst_block=%llx", "pid=%d context_ptr=%x inst_block=%llx",
@@ -629,7 +624,7 @@ int nvgpu_gr_fecs_trace_unbind_channel(struct gk20a *g,
return -EINVAL; return -EINVAL;
} }
context_ptr = nvgpu_gr_fecs_trace_fecs_context_ptr(g, inst_block); context_ptr = nvgpu_inst_block_ptr(g, inst_block);
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
"context_ptr=%x", context_ptr); "context_ptr=%x", context_ptr);

View File

@@ -23,6 +23,7 @@
#include <nvgpu/gk20a.h> #include <nvgpu/gk20a.h>
#include <nvgpu/log.h> #include <nvgpu/log.h>
#include <nvgpu/io.h> #include <nvgpu/io.h>
#include <nvgpu/mm.h>
#include <nvgpu/gr/ctx.h> #include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/subctx.h> #include <nvgpu/gr/subctx.h>
#include <nvgpu/gr/global_ctx.h> #include <nvgpu/gr/global_ctx.h>

View File

@@ -152,6 +152,15 @@ u64 nvgpu_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block)
} }
} }
u32 nvgpu_inst_block_ptr(struct gk20a *g, struct nvgpu_mem *inst_block)
{
u64 addr = nvgpu_inst_block_addr(g, inst_block) >>
g->ops.ramin.base_shift();
nvgpu_assert(u64_hi32(addr) == 0U);
return u64_lo32(addr);
}
void nvgpu_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block) void nvgpu_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
{ {
if (nvgpu_mem_is_valid(inst_block)) { if (nvgpu_mem_is_valid(inst_block)) {

View File

@@ -74,7 +74,7 @@ void gm20b_perf_enable_membuf(struct gk20a *g, u32 size,
{ {
u32 addr_lo; u32 addr_lo;
u32 addr_hi; u32 addr_hi;
u64 inst_block_addr; u32 inst_block_ptr;
addr_lo = u64_lo32(buf_addr); addr_lo = u64_lo32(buf_addr);
addr_hi = u64_hi32(buf_addr); addr_hi = u64_hi32(buf_addr);
@@ -84,11 +84,10 @@ void gm20b_perf_enable_membuf(struct gk20a *g, u32 size,
perf_pmasys_outbaseupper_ptr_f(addr_hi)); perf_pmasys_outbaseupper_ptr_f(addr_hi));
nvgpu_writel(g, perf_pmasys_outsize_r(), size); nvgpu_writel(g, perf_pmasys_outsize_r(), size);
inst_block_addr = nvgpu_inst_block_addr(g, inst_block) >> 12; inst_block_ptr = nvgpu_inst_block_ptr(g, inst_block);
nvgpu_assert(inst_block_addr <= U64(U32_MAX));
nvgpu_writel(g, perf_pmasys_mem_block_r(), nvgpu_writel(g, perf_pmasys_mem_block_r(),
perf_pmasys_mem_block_base_f(U32(inst_block_addr)) | perf_pmasys_mem_block_base_f(inst_block_ptr) |
perf_pmasys_mem_block_valid_true_f() | perf_pmasys_mem_block_valid_true_f() |
nvgpu_aperture_mask(g, inst_block, nvgpu_aperture_mask(g, inst_block,
perf_pmasys_mem_block_target_sys_ncoh_f(), perf_pmasys_mem_block_target_sys_ncoh_f(),

View File

@@ -74,7 +74,7 @@ void gv11b_perf_enable_membuf(struct gk20a *g, u32 size,
{ {
u32 addr_lo; u32 addr_lo;
u32 addr_hi; u32 addr_hi;
u64 inst_block_addr; u32 inst_block_ptr;
addr_lo = u64_lo32(buf_addr); addr_lo = u64_lo32(buf_addr);
addr_hi = u64_hi32(buf_addr); addr_hi = u64_hi32(buf_addr);
@@ -84,11 +84,10 @@ void gv11b_perf_enable_membuf(struct gk20a *g, u32 size,
perf_pmasys_outbaseupper_ptr_f(addr_hi)); perf_pmasys_outbaseupper_ptr_f(addr_hi));
nvgpu_writel(g, perf_pmasys_outsize_r(), size); nvgpu_writel(g, perf_pmasys_outsize_r(), size);
inst_block_addr = nvgpu_inst_block_addr(g, inst_block) >> 12; inst_block_ptr = nvgpu_inst_block_ptr(g, inst_block);
nvgpu_assert(inst_block_addr <= U64(U32_MAX));
nvgpu_writel(g, perf_pmasys_mem_block_r(), nvgpu_writel(g, perf_pmasys_mem_block_r(),
perf_pmasys_mem_block_base_f(U32(inst_block_addr)) | perf_pmasys_mem_block_base_f(inst_block_ptr) |
perf_pmasys_mem_block_valid_true_f() | perf_pmasys_mem_block_valid_true_f() |
nvgpu_aperture_mask(g, inst_block, nvgpu_aperture_mask(g, inst_block,
perf_pmasys_mem_block_target_sys_ncoh_f(), perf_pmasys_mem_block_target_sys_ncoh_f(),

View File

@@ -194,17 +194,16 @@ int pmu_bootstrap(struct nvgpu_pmu *pmu)
u32 addr_code, addr_data, addr_load; u32 addr_code, addr_data, addr_load;
u32 i, blocks, addr_args; u32 i, blocks, addr_args;
int err; int err;
u64 tmp_addr; u32 inst_block_ptr;
nvgpu_log_fn(g, " "); nvgpu_log_fn(g, " ");
gk20a_writel(g, pwr_falcon_itfen_r(), gk20a_writel(g, pwr_falcon_itfen_r(),
gk20a_readl(g, pwr_falcon_itfen_r()) | gk20a_readl(g, pwr_falcon_itfen_r()) |
pwr_falcon_itfen_ctxen_enable_f()); pwr_falcon_itfen_ctxen_enable_f());
tmp_addr = nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12; inst_block_ptr = nvgpu_inst_block_ptr(g, &mm->pmu.inst_block);
nvgpu_assert(u64_hi32(tmp_addr) == 0U);
gk20a_writel(g, pwr_pmu_new_instblk_r(), gk20a_writel(g, pwr_pmu_new_instblk_r(),
pwr_pmu_new_instblk_ptr_f((u32)tmp_addr) | pwr_pmu_new_instblk_ptr_f(inst_block_ptr) |
pwr_pmu_new_instblk_valid_f(1) | pwr_pmu_new_instblk_valid_f(1) |
pwr_pmu_new_instblk_target_sys_coh_f()); pwr_pmu_new_instblk_target_sys_coh_f());

View File

@@ -24,6 +24,7 @@
#include <nvgpu/timers.h> #include <nvgpu/timers.h>
#include <nvgpu/pmu.h> #include <nvgpu/pmu.h>
#include <nvgpu/mm.h>
#include <nvgpu/fuse.h> #include <nvgpu/fuse.h>
#include <nvgpu/enabled.h> #include <nvgpu/enabled.h>
#include <nvgpu/io.h> #include <nvgpu/io.h>
@@ -356,7 +357,7 @@ void gm20b_update_lspmu_cmdline_args(struct gk20a *g)
void gm20b_pmu_flcn_setup_boot_config(struct gk20a *g) void gm20b_pmu_flcn_setup_boot_config(struct gk20a *g)
{ {
struct mm_gk20a *mm = &g->mm; struct mm_gk20a *mm = &g->mm;
u64 tmp_addr; u32 inst_block_ptr;
nvgpu_log_fn(g, " "); nvgpu_log_fn(g, " ");
@@ -377,11 +378,10 @@ void gm20b_pmu_flcn_setup_boot_config(struct gk20a *g)
* The instance block address to write is the lower 32-bits of the 4K- * The instance block address to write is the lower 32-bits of the 4K-
* aligned physical instance block address. * aligned physical instance block address.
*/ */
tmp_addr = nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12U; inst_block_ptr = nvgpu_inst_block_ptr(g, &mm->pmu.inst_block);
nvgpu_assert(u64_hi32(tmp_addr) == 0U);
gk20a_writel(g, pwr_pmu_new_instblk_r(), gk20a_writel(g, pwr_pmu_new_instblk_r(),
pwr_pmu_new_instblk_ptr_f((u32)tmp_addr) | pwr_pmu_new_instblk_ptr_f(inst_block_ptr) |
pwr_pmu_new_instblk_valid_f(1U) | pwr_pmu_new_instblk_valid_f(1U) |
(nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ? (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
pwr_pmu_new_instblk_target_sys_coh_f() : pwr_pmu_new_instblk_target_sys_coh_f() :

View File

@@ -23,6 +23,7 @@
#include <nvgpu/pmu.h> #include <nvgpu/pmu.h>
#include <nvgpu/io.h> #include <nvgpu/io.h>
#include <nvgpu/clk_arb.h> #include <nvgpu/clk_arb.h>
#include <nvgpu/mm.h>
#include <nvgpu/gk20a.h> #include <nvgpu/gk20a.h>
#include <nvgpu/pmu/lpwr.h> #include <nvgpu/pmu/lpwr.h>
#include <nvgpu/pmu/cmd.h> #include <nvgpu/pmu/cmd.h>
@@ -204,7 +205,7 @@ void gp106_update_lspmu_cmdline_args(struct gk20a *g)
void gp106_pmu_setup_apertures(struct gk20a *g) void gp106_pmu_setup_apertures(struct gk20a *g)
{ {
struct mm_gk20a *mm = &g->mm; struct mm_gk20a *mm = &g->mm;
u64 tmp_addr; u32 inst_block_ptr;
/* PMU TRANSCFG */ /* PMU TRANSCFG */
/* setup apertures - virtual */ /* setup apertures - virtual */
@@ -228,10 +229,9 @@ void gp106_pmu_setup_apertures(struct gk20a *g)
gk20a_writel(g, pwr_falcon_itfen_r(), gk20a_writel(g, pwr_falcon_itfen_r(),
gk20a_readl(g, pwr_falcon_itfen_r()) | gk20a_readl(g, pwr_falcon_itfen_r()) |
pwr_falcon_itfen_ctxen_enable_f()); pwr_falcon_itfen_ctxen_enable_f());
tmp_addr = nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12; inst_block_ptr = nvgpu_inst_block_ptr(g, &mm->pmu.inst_block);
nvgpu_assert(u64_hi32(tmp_addr) == 0U);
gk20a_writel(g, pwr_pmu_new_instblk_r(), gk20a_writel(g, pwr_pmu_new_instblk_r(),
pwr_pmu_new_instblk_ptr_f(U32(tmp_addr)) | pwr_pmu_new_instblk_ptr_f(inst_block_ptr) |
pwr_pmu_new_instblk_valid_f(1) | pwr_pmu_new_instblk_valid_f(1) |
nvgpu_aperture_mask(g, &mm->pmu.inst_block, nvgpu_aperture_mask(g, &mm->pmu.inst_block,
pwr_pmu_new_instblk_target_sys_ncoh_f(), pwr_pmu_new_instblk_target_sys_ncoh_f(),

View File

@@ -26,6 +26,7 @@
#include <nvgpu/falcon.h> #include <nvgpu/falcon.h>
#include <nvgpu/io.h> #include <nvgpu/io.h>
#include <nvgpu/utils.h> #include <nvgpu/utils.h>
#include <nvgpu/mm.h>
#include <nvgpu/gk20a.h> #include <nvgpu/gk20a.h>
#include <nvgpu/nvgpu_err.h> #include <nvgpu/nvgpu_err.h>
#include <nvgpu/firmware.h> #include <nvgpu/firmware.h>
@@ -152,7 +153,7 @@ int gv11b_pmu_bootstrap(struct nvgpu_pmu *pmu)
u32 addr_code_hi, addr_data_hi; u32 addr_code_hi, addr_data_hi;
u32 i, blocks, addr_args; u32 i, blocks, addr_args;
int err; int err;
u64 tmp_addr; u32 inst_block_ptr;
nvgpu_log_fn(g, " "); nvgpu_log_fn(g, " ");
@@ -160,10 +161,9 @@ int gv11b_pmu_bootstrap(struct nvgpu_pmu *pmu)
gk20a_readl(g, pwr_falcon_itfen_r()) | gk20a_readl(g, pwr_falcon_itfen_r()) |
pwr_falcon_itfen_ctxen_enable_f()); pwr_falcon_itfen_ctxen_enable_f());
tmp_addr = nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> ALIGN_4KB; inst_block_ptr = nvgpu_inst_block_ptr(g, &mm->pmu.inst_block);
nvgpu_assert(u64_hi32(tmp_addr) == 0U);
gk20a_writel(g, pwr_pmu_new_instblk_r(), gk20a_writel(g, pwr_pmu_new_instblk_r(),
pwr_pmu_new_instblk_ptr_f((u32)tmp_addr) | pwr_pmu_new_instblk_ptr_f(inst_block_ptr) |
pwr_pmu_new_instblk_valid_f(1) | pwr_pmu_new_instblk_valid_f(1) |
(nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ? (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
pwr_pmu_new_instblk_target_sys_coh_f() : pwr_pmu_new_instblk_target_sys_coh_f() :

View File

@@ -44,8 +44,6 @@
#include "common/vgpu/gr/subctx_vgpu.h" #include "common/vgpu/gr/subctx_vgpu.h"
#include "common/vgpu/ivc/comm_vgpu.h" #include "common/vgpu/ivc/comm_vgpu.h"
#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
void vgpu_channel_bind(struct channel_gk20a *ch) void vgpu_channel_bind(struct channel_gk20a *ch)
{ {
struct tegra_vgpu_cmd_msg msg; struct tegra_vgpu_cmd_msg msg;

View File

@@ -506,6 +506,8 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.init_pdb = gp10b_ramin_init_pdb, .init_pdb = gp10b_ramin_init_pdb,
.init_subctx_pdb = NULL, .init_subctx_pdb = NULL,
.set_adr_limit = gk20a_ramin_set_adr_limit, .set_adr_limit = gk20a_ramin_set_adr_limit,
.base_shift = gk20a_ramin_base_shift,
.alloc_size = gk20a_ramin_alloc_size,
}, },
.runlist = { .runlist = {
.reschedule = NULL, .reschedule = NULL,

View File

@@ -26,6 +26,7 @@
#include "hal/fifo/pbdma_gm20b.h" #include "hal/fifo/pbdma_gm20b.h"
#include "hal/fifo/pbdma_gp10b.h" #include "hal/fifo/pbdma_gp10b.h"
#include "hal/fifo/pbdma_gv11b.h" #include "hal/fifo/pbdma_gv11b.h"
#include "hal/fifo/ramin_gk20a.h"
#include "hal/fifo/ramin_gm20b.h" #include "hal/fifo/ramin_gm20b.h"
#include "hal/fifo/ramin_gp10b.h" #include "hal/fifo/ramin_gp10b.h"
#include "hal/fifo/ramin_gv11b.h" #include "hal/fifo/ramin_gv11b.h"
@@ -592,6 +593,8 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.init_pdb = gp10b_ramin_init_pdb, .init_pdb = gp10b_ramin_init_pdb,
.init_subctx_pdb = gv11b_ramin_init_subctx_pdb, .init_subctx_pdb = gv11b_ramin_init_subctx_pdb,
.set_adr_limit = NULL, .set_adr_limit = NULL,
.base_shift = gk20a_ramin_base_shift,
.alloc_size = gk20a_ramin_alloc_size,
}, },
.runlist = { .runlist = {
.reschedule = NULL, .reschedule = NULL,

View File

@@ -39,7 +39,6 @@
#include <nvgpu/hw/gk20a/hw_ce2_gk20a.h> #include <nvgpu/hw/gk20a/hw_ce2_gk20a.h>
#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
#include <nvgpu/hw/gk20a/hw_top_gk20a.h> #include <nvgpu/hw/gk20a/hw_top_gk20a.h>
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h> #include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
#include <nvgpu/barrier.h> #include <nvgpu/barrier.h>

View File

@@ -63,7 +63,6 @@
#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h> #include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h> #include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
#define FECS_METHOD_WFI_RESTORE 0x80000U #define FECS_METHOD_WFI_RESTORE 0x80000U
@@ -430,10 +429,12 @@ static bool gk20a_fifo_handle_mmu_fault_locked(
ch->chid); ch->chid);
gk20a_channel_put(ch); gk20a_channel_put(ch);
} else if (mmfault_info.inst_ptr == } else if (mmfault_info.inst_ptr ==
nvgpu_inst_block_addr(g, &g->mm.bar1.inst_block)) { nvgpu_inst_block_addr(g,
&g->mm.bar1.inst_block)) {
nvgpu_err(g, "mmu fault from bar1"); nvgpu_err(g, "mmu fault from bar1");
} else if (mmfault_info.inst_ptr == } else if (mmfault_info.inst_ptr ==
nvgpu_inst_block_addr(g, &g->mm.pmu.inst_block)) { nvgpu_inst_block_addr(g,
&g->mm.pmu.inst_block)) {
nvgpu_err(g, "mmu fault from pmu"); nvgpu_err(g, "mmu fault from pmu");
} else { } else {
nvgpu_err(g, "couldn't locate channel for mmu fault"); nvgpu_err(g, "couldn't locate channel for mmu fault");

View File

@@ -70,7 +70,6 @@
#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h> #include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h> #include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
static struct channel_gk20a *gk20a_gr_get_channel_from_ctx( static struct channel_gk20a *gk20a_gr_get_channel_from_ctx(
struct gk20a *g, u32 curr_ctx, u32 *curr_tsgid); struct gk20a *g, u32 curr_ctx, u32 *curr_tsgid);
@@ -1264,8 +1263,7 @@ static struct channel_gk20a *gk20a_gr_get_channel_from_ctx(
continue; continue;
} }
if ((u32)(nvgpu_inst_block_addr(g, &ch->inst_block) >> if (nvgpu_inst_block_ptr(g, &ch->inst_block) ==
ram_in_base_shift_v()) ==
g->ops.gr.falcon.get_ctx_ptr(curr_ctx)) { g->ops.gr.falcon.get_ctx_ptr(curr_ctx)) {
tsgid = ch->tsgid; tsgid = ch->tsgid;
/* found it */ /* found it */

View File

@@ -50,7 +50,6 @@
#include "fence_gk20a.h" #include "fence_gk20a.h"
#include <nvgpu/hw/gk20a/hw_gmmu_gk20a.h> #include <nvgpu/hw/gk20a/hw_gmmu_gk20a.h>
#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
#include <nvgpu/hw/gk20a/hw_pram_gk20a.h> #include <nvgpu/hw/gk20a/hw_pram_gk20a.h>
#include <nvgpu/hw/gk20a/hw_flush_gk20a.h> #include <nvgpu/hw/gk20a/hw_flush_gk20a.h>
@@ -397,7 +396,7 @@ int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
nvgpu_log_fn(g, " "); nvgpu_log_fn(g, " ");
err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block); err = nvgpu_dma_alloc(g, g->ops.ramin.alloc_size(), inst_block);
if (err != 0) { if (err != 0) {
nvgpu_err(g, "%s: memory allocation failed", __func__); nvgpu_err(g, "%s: memory allocation failed", __func__);
return err; return err;

View File

@@ -711,6 +711,8 @@ static const struct gpu_ops gm20b_ops = {
.init_pdb = gk20a_ramin_init_pdb, .init_pdb = gk20a_ramin_init_pdb,
.init_subctx_pdb = NULL, .init_subctx_pdb = NULL,
.set_adr_limit = gk20a_ramin_set_adr_limit, .set_adr_limit = gk20a_ramin_set_adr_limit,
.base_shift = gk20a_ramin_base_shift,
.alloc_size = gk20a_ramin_alloc_size,
}, },
.runlist = { .runlist = {
.update_for_channel = gk20a_runlist_update_for_channel, .update_for_channel = gk20a_runlist_update_for_channel,

View File

@@ -50,7 +50,7 @@ int gp106_sec2_reset(struct gk20a *g)
void gp106_sec2_flcn_setup_boot_config(struct gk20a *g) void gp106_sec2_flcn_setup_boot_config(struct gk20a *g)
{ {
struct mm_gk20a *mm = &g->mm; struct mm_gk20a *mm = &g->mm;
u64 tmp_addr; u32 inst_block_ptr;
u32 data = 0U; u32 data = 0U;
nvgpu_log_fn(g, " "); nvgpu_log_fn(g, " ");
@@ -85,11 +85,10 @@ void gp106_sec2_flcn_setup_boot_config(struct gk20a *g)
* The instance block address to write is the lower 32-bits of the 4K- * The instance block address to write is the lower 32-bits of the 4K-
* aligned physical instance block address. * aligned physical instance block address.
*/ */
tmp_addr = nvgpu_inst_block_addr(g, &mm->sec2.inst_block) >> 12U; inst_block_ptr = nvgpu_inst_block_ptr(g, &mm->sec2.inst_block);
nvgpu_assert(u64_hi32(tmp_addr) == 0U);
gk20a_writel(g, psec_falcon_nxtctx_r(), gk20a_writel(g, psec_falcon_nxtctx_r(),
pwr_pmu_new_instblk_ptr_f((u32)tmp_addr) | pwr_pmu_new_instblk_ptr_f(inst_block_ptr) |
pwr_pmu_new_instblk_valid_f(1U) | pwr_pmu_new_instblk_valid_f(1U) |
nvgpu_aperture_mask(g, &mm->sec2.inst_block, nvgpu_aperture_mask(g, &mm->sec2.inst_block,
pwr_pmu_new_instblk_target_sys_ncoh_f(), pwr_pmu_new_instblk_target_sys_ncoh_f(),

View File

@@ -800,6 +800,8 @@ static const struct gpu_ops gp10b_ops = {
.init_pdb = gp10b_ramin_init_pdb, .init_pdb = gp10b_ramin_init_pdb,
.init_subctx_pdb = NULL, .init_subctx_pdb = NULL,
.set_adr_limit = gk20a_ramin_set_adr_limit, .set_adr_limit = gk20a_ramin_set_adr_limit,
.base_shift = gk20a_ramin_base_shift,
.alloc_size = gk20a_ramin_alloc_size,
}, },
.runlist = { .runlist = {
.reschedule = gk20a_runlist_reschedule, .reschedule = gk20a_runlist_reschedule,

View File

@@ -34,7 +34,6 @@
#include "gm20b/mm_gm20b.h" #include "gm20b/mm_gm20b.h"
#include "mm_gp10b.h" #include "mm_gp10b.h"
#include <nvgpu/hw/gp10b/hw_ram_gp10b.h>
#include <nvgpu/hw/gp10b/hw_gmmu_gp10b.h> #include <nvgpu/hw/gp10b/hw_gmmu_gp10b.h>
u32 gp10b_mm_get_default_big_page_size(void) u32 gp10b_mm_get_default_big_page_size(void)

View File

@@ -59,6 +59,7 @@
#include "hal/fifo/pbdma_status_gm20b.h" #include "hal/fifo/pbdma_status_gm20b.h"
#include "hal/fifo/ramfc_gp10b.h" #include "hal/fifo/ramfc_gp10b.h"
#include "hal/fifo/ramfc_gv11b.h" #include "hal/fifo/ramfc_gv11b.h"
#include "hal/fifo/ramin_gk20a.h"
#include "hal/fifo/ramin_gm20b.h" #include "hal/fifo/ramin_gm20b.h"
#include "hal/fifo/ramin_gp10b.h" #include "hal/fifo/ramin_gp10b.h"
#include "hal/fifo/ramin_gv11b.h" #include "hal/fifo/ramin_gv11b.h"
@@ -984,6 +985,8 @@ static const struct gpu_ops gv100_ops = {
.init_pdb = gp10b_ramin_init_pdb, .init_pdb = gp10b_ramin_init_pdb,
.init_subctx_pdb = gv11b_ramin_init_subctx_pdb, .init_subctx_pdb = gv11b_ramin_init_subctx_pdb,
.set_adr_limit = NULL, .set_adr_limit = NULL,
.base_shift = gk20a_ramin_base_shift,
.alloc_size = gk20a_ramin_alloc_size,
}, },
.runlist = { .runlist = {
.update_for_channel = gk20a_runlist_update_for_channel, .update_for_channel = gk20a_runlist_update_for_channel,

View File

@@ -64,7 +64,6 @@
#include <nvgpu/hw/gv11b/hw_gr_gv11b.h> #include <nvgpu/hw/gv11b/hw_gr_gv11b.h>
#include <nvgpu/hw/gv11b/hw_fifo_gv11b.h> #include <nvgpu/hw/gv11b/hw_fifo_gv11b.h>
#include <nvgpu/hw/gv11b/hw_proj_gv11b.h> #include <nvgpu/hw/gv11b/hw_proj_gv11b.h>
#include <nvgpu/hw/gv11b/hw_ram_gv11b.h>
#include <nvgpu/hw/gv11b/hw_perf_gv11b.h> #include <nvgpu/hw/gv11b/hw_perf_gv11b.h>
#define GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT 100U #define GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT 100U

View File

@@ -60,6 +60,7 @@
#include "hal/fifo/engines_gv11b.h" #include "hal/fifo/engines_gv11b.h"
#include "hal/fifo/ramfc_gp10b.h" #include "hal/fifo/ramfc_gp10b.h"
#include "hal/fifo/ramfc_gv11b.h" #include "hal/fifo/ramfc_gv11b.h"
#include "hal/fifo/ramin_gk20a.h"
#include "hal/fifo/ramin_gm20b.h" #include "hal/fifo/ramin_gm20b.h"
#include "hal/fifo/ramin_gp10b.h" #include "hal/fifo/ramin_gp10b.h"
#include "hal/fifo/ramin_gv11b.h" #include "hal/fifo/ramin_gv11b.h"
@@ -939,6 +940,8 @@ static const struct gpu_ops gv11b_ops = {
.init_pdb = gp10b_ramin_init_pdb, .init_pdb = gp10b_ramin_init_pdb,
.init_subctx_pdb = gv11b_ramin_init_subctx_pdb, .init_subctx_pdb = gv11b_ramin_init_subctx_pdb,
.set_adr_limit = NULL, .set_adr_limit = NULL,
.base_shift = gk20a_ramin_base_shift,
.alloc_size = gk20a_ramin_alloc_size,
}, },
.runlist = { .runlist = {
.reschedule = gv11b_runlist_reschedule, .reschedule = gv11b_runlist_reschedule,

View File

@@ -72,3 +72,13 @@ void gk20a_ramin_set_adr_limit(struct gk20a *g,
nvgpu_mem_wr32(g, inst_block, ram_in_adr_limit_hi_w(), nvgpu_mem_wr32(g, inst_block, ram_in_adr_limit_hi_w(),
ram_in_adr_limit_hi_f(u64_hi32(va_limit - 1U))); ram_in_adr_limit_hi_f(u64_hi32(va_limit - 1U)));
} }
u32 gk20a_ramin_base_shift(void)
{
return ram_in_base_shift_v();
}
u32 gk20a_ramin_alloc_size(void)
{
return ram_in_alloc_size_v();
}

View File

@@ -33,5 +33,7 @@ void gk20a_ramin_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
u64 pdb_addr, struct nvgpu_mem *pdb_mem); u64 pdb_addr, struct nvgpu_mem *pdb_mem);
void gk20a_ramin_set_adr_limit(struct gk20a *g, void gk20a_ramin_set_adr_limit(struct gk20a *g,
struct nvgpu_mem *inst_block, u64 va_limit); struct nvgpu_mem *inst_block, u64 va_limit);
u32 gk20a_ramin_base_shift(void);
u32 gk20a_ramin_alloc_size(void);
#endif /* NVGPU_RAMIN_GK20A_H */ #endif /* NVGPU_RAMIN_GK20A_H */

View File

@@ -21,6 +21,7 @@
*/ */
#include <nvgpu/falcon.h> #include <nvgpu/falcon.h>
#include <nvgpu/mm.h>
#include <nvgpu/io.h> #include <nvgpu/io.h>
#include <nvgpu/timers.h> #include <nvgpu/timers.h>
#include <nvgpu/gk20a.h> #include <nvgpu/gk20a.h>
@@ -44,7 +45,7 @@ int gv100_gsp_reset(struct gk20a *g)
void gv100_gsp_flcn_setup_boot_config(struct gk20a *g) void gv100_gsp_flcn_setup_boot_config(struct gk20a *g)
{ {
struct mm_gk20a *mm = &g->mm; struct mm_gk20a *mm = &g->mm;
u64 tmp_addr; u32 inst_block_ptr;
u32 data = 0; u32 data = 0;
data = gk20a_readl(g, pgsp_fbif_ctl_r()); data = gk20a_readl(g, pgsp_fbif_ctl_r());
@@ -77,11 +78,10 @@ void gv100_gsp_flcn_setup_boot_config(struct gk20a *g)
* The instance block address to write is the lower 32-bits of the 4K- * The instance block address to write is the lower 32-bits of the 4K-
* aligned physical instance block address. * aligned physical instance block address.
*/ */
tmp_addr = nvgpu_inst_block_addr(g, &mm->gsp.inst_block) >> 12U; inst_block_ptr = nvgpu_inst_block_ptr(g, &mm->gsp.inst_block);
nvgpu_assert(u64_hi32(tmp_addr) == 0U);
gk20a_writel(g, pgsp_falcon_nxtctx_r(), gk20a_writel(g, pgsp_falcon_nxtctx_r(),
pgsp_falcon_nxtctx_ctxptr_f((u32)tmp_addr) | pgsp_falcon_nxtctx_ctxptr_f(inst_block_ptr) |
pgsp_falcon_nxtctx_ctxvalid_f(1) | pgsp_falcon_nxtctx_ctxvalid_f(1) |
nvgpu_aperture_mask(g, &mm->gsp.inst_block, nvgpu_aperture_mask(g, &mm->gsp.inst_block,
pgsp_falcon_nxtctx_ctxtgt_sys_ncoh_f(), pgsp_falcon_nxtctx_ctxtgt_sys_ncoh_f(),

View File

@@ -1033,6 +1033,8 @@ struct gpu_ops {
bool replayable); bool replayable);
void (*set_adr_limit)(struct gk20a *g, void (*set_adr_limit)(struct gk20a *g,
struct nvgpu_mem *inst_block, u64 va_limit); struct nvgpu_mem *inst_block, u64 va_limit);
u32 (*base_shift)(void);
u32 (*alloc_size)(void);
} ramin; } ramin;
struct { struct {
int (*reschedule)(struct channel_gk20a *ch, bool preempt_next); int (*reschedule)(struct channel_gk20a *ch, bool preempt_next);

View File

@@ -214,6 +214,7 @@ int nvgpu_init_mm_support(struct gk20a *g);
int nvgpu_init_mm_setup_hw(struct gk20a *g); int nvgpu_init_mm_setup_hw(struct gk20a *g);
u64 nvgpu_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block); u64 nvgpu_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block);
u32 nvgpu_inst_block_ptr(struct gk20a *g, struct nvgpu_mem *inst_block);
void nvgpu_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block); void nvgpu_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block);
int nvgpu_mm_suspend(struct gk20a *g); int nvgpu_mm_suspend(struct gk20a *g);

View File

@@ -30,6 +30,7 @@ gk20a_runlist_get_ch_entry
gk20a_runlist_get_tsg_entry gk20a_runlist_get_tsg_entry
gk20a_locked_gmmu_map gk20a_locked_gmmu_map
gk20a_locked_gmmu_unmap gk20a_locked_gmmu_unmap
gk20a_ramin_alloc_size
gm20b_fb_tlb_invalidate gm20b_fb_tlb_invalidate
gm20b_fuse_status_opt_gpc gm20b_fuse_status_opt_gpc
gm20b_ramin_set_big_page_size gm20b_ramin_set_big_page_size

View File

@@ -31,6 +31,7 @@
#include <nvgpu/cond.h> #include <nvgpu/cond.h>
#include <nvgpu/debugger.h> #include <nvgpu/debugger.h>
#include <nvgpu/utils.h> #include <nvgpu/utils.h>
#include <nvgpu/mm.h>
#include <nvgpu/gk20a.h> #include <nvgpu/gk20a.h>
#include <nvgpu/channel.h> #include <nvgpu/channel.h>
#include <nvgpu/tsg.h> #include <nvgpu/tsg.h>

View File

@@ -64,6 +64,7 @@
#include "hal/fifo/ramfc_gp10b.h" #include "hal/fifo/ramfc_gp10b.h"
#include "hal/fifo/ramfc_gv11b.h" #include "hal/fifo/ramfc_gv11b.h"
#include "hal/fifo/ramfc_tu104.h" #include "hal/fifo/ramfc_tu104.h"
#include "hal/fifo/ramin_gk20a.h"
#include "hal/fifo/ramin_gm20b.h" #include "hal/fifo/ramin_gm20b.h"
#include "hal/fifo/ramin_gp10b.h" #include "hal/fifo/ramin_gp10b.h"
#include "hal/fifo/ramin_gv11b.h" #include "hal/fifo/ramin_gv11b.h"
@@ -1021,6 +1022,8 @@ static const struct gpu_ops tu104_ops = {
.init_pdb = gp10b_ramin_init_pdb, .init_pdb = gp10b_ramin_init_pdb,
.init_subctx_pdb = gv11b_ramin_init_subctx_pdb, .init_subctx_pdb = gv11b_ramin_init_subctx_pdb,
.set_adr_limit = NULL, .set_adr_limit = NULL,
.base_shift = gk20a_ramin_base_shift,
.alloc_size = gk20a_ramin_alloc_size,
}, },
.runlist = { .runlist = {
.update_for_channel = gk20a_runlist_update_for_channel, .update_for_channel = gk20a_runlist_update_for_channel,

View File

@@ -176,7 +176,7 @@ int tu104_sec2_flcn_copy_from_emem(struct gk20a *g,
void tu104_sec2_flcn_setup_boot_config(struct gk20a *g) void tu104_sec2_flcn_setup_boot_config(struct gk20a *g)
{ {
struct mm_gk20a *mm = &g->mm; struct mm_gk20a *mm = &g->mm;
u64 tmp_addr; u32 inst_block_ptr;
u32 data = 0U; u32 data = 0U;
nvgpu_log_fn(g, " "); nvgpu_log_fn(g, " ");
@@ -211,11 +211,10 @@ void tu104_sec2_flcn_setup_boot_config(struct gk20a *g)
* The instance block address to write is the lower 32-bits of the 4K- * The instance block address to write is the lower 32-bits of the 4K-
* aligned physical instance block address. * aligned physical instance block address.
*/ */
tmp_addr = nvgpu_inst_block_addr(g, &mm->sec2.inst_block) >> 12U; inst_block_ptr = nvgpu_inst_block_ptr(g, &mm->sec2.inst_block);
nvgpu_assert(u64_hi32(tmp_addr) == 0U);
gk20a_writel(g, psec_falcon_nxtctx_r(), gk20a_writel(g, psec_falcon_nxtctx_r(),
pwr_pmu_new_instblk_ptr_f((u32)tmp_addr) | pwr_pmu_new_instblk_ptr_f(inst_block_ptr) |
pwr_pmu_new_instblk_valid_f(1U) | pwr_pmu_new_instblk_valid_f(1U) |
nvgpu_aperture_mask(g, &mm->sec2.inst_block, nvgpu_aperture_mask(g, &mm->sec2.inst_block,
pwr_pmu_new_instblk_target_sys_ncoh_f(), pwr_pmu_new_instblk_target_sys_ncoh_f(),

View File

@@ -43,6 +43,7 @@
#include <hal/fb/fb_gp10b.h> #include <hal/fb/fb_gp10b.h>
#include <hal/fb/fb_gm20b.h> #include <hal/fb/fb_gm20b.h>
#include <hal/fifo/ramin_gk20a.h>
#include <hal/fifo/ramin_gp10b.h> #include <hal/fifo/ramin_gp10b.h>
#define TEST_PA_ADDRESS 0xEFAD80000000 #define TEST_PA_ADDRESS 0xEFAD80000000
@@ -305,6 +306,7 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
g->ops.fb.compression_page_size = gp10b_fb_compression_page_size; g->ops.fb.compression_page_size = gp10b_fb_compression_page_size;
g->ops.fb.tlb_invalidate = gm20b_fb_tlb_invalidate; g->ops.fb.tlb_invalidate = gm20b_fb_tlb_invalidate;
g->ops.ramin.init_pdb = gp10b_ramin_init_pdb; g->ops.ramin.init_pdb = gp10b_ramin_init_pdb;
g->ops.ramin.alloc_size = gk20a_ramin_alloc_size;
if (g->ops.mm.is_bar1_supported(g)) { if (g->ops.mm.is_bar1_supported(g)) {
unit_return_fail(m, "BAR1 is not supported on Volta+\n"); unit_return_fail(m, "BAR1 is not supported on Volta+\n");

View File

@@ -46,6 +46,7 @@
#include "hal/fb/fb_gp10b.h" #include "hal/fb/fb_gp10b.h"
#include "hal/fb/fb_gm20b.h" #include "hal/fb/fb_gm20b.h"
#include "hal/fb/fb_gv11b.h" #include "hal/fb/fb_gv11b.h"
#include "hal/fifo/ramin_gk20a.h"
#include "hal/fifo/ramin_gm20b.h" #include "hal/fifo/ramin_gm20b.h"
#include "hal/fifo/ramin_gp10b.h" #include "hal/fifo/ramin_gp10b.h"
@@ -123,6 +124,7 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
g->ops.fb.compression_page_size = gp10b_fb_compression_page_size; g->ops.fb.compression_page_size = gp10b_fb_compression_page_size;
g->ops.fb.tlb_invalidate = gm20b_fb_tlb_invalidate; g->ops.fb.tlb_invalidate = gm20b_fb_tlb_invalidate;
g->ops.ramin.init_pdb = gp10b_ramin_init_pdb; g->ops.ramin.init_pdb = gp10b_ramin_init_pdb;
g->ops.ramin.alloc_size = gk20a_ramin_alloc_size;
/* New HALs for fault testing */ /* New HALs for fault testing */
g->ops.mm.mmu_fault_pending = gv11b_mm_mmu_fault_pending; g->ops.mm.mmu_fault_pending = gv11b_mm_mmu_fault_pending;