gpu: nvgpu: Per chip default big page size

Make default big page size query a HAL op instead of per-platform
constant. This allows querying for default big page size without
accessing Linux specific gk20a_platform structure.

JIRA NVGPU-38

Change-Id: Ibfbd1319764fdae5fdb06700fb64d23f6f3dd01a
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: https://git-master/r/1507928
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Terje Bergstrom
2017-06-22 12:55:17 -07:00
committed by mobile promotions
parent 82c0c96290
commit 001c7c3185
12 changed files with 21 additions and 25 deletions

View File

@@ -49,8 +49,7 @@ static int gk20a_vm_alloc_share(struct gk20a_as_share *as_share,
gk20a_dbg_fn(""); gk20a_dbg_fn("");
if (big_page_size == 0) { if (big_page_size == 0) {
big_page_size = big_page_size = g->ops.mm.get_default_big_page_size();
gk20a_get_platform(g->dev)->default_big_page_size;
} else { } else {
if (!is_power_of_2(big_page_size)) if (!is_power_of_2(big_page_size))
return -EINVAL; return -EINVAL;

View File

@@ -86,8 +86,6 @@ static struct gk20a_platform nvgpu_pci_device[] = {
.is_railgated = nvgpu_pci_tegra_is_railgated, .is_railgated = nvgpu_pci_tegra_is_railgated,
.clk_round_rate = nvgpu_pci_clk_round_rate, .clk_round_rate = nvgpu_pci_clk_round_rate,
.default_big_page_size = SZ_64K,
.ch_wdt_timeout_ms = 7000, .ch_wdt_timeout_ms = 7000,
.honors_aperture = true, .honors_aperture = true,
@@ -121,8 +119,6 @@ static struct gk20a_platform nvgpu_pci_device[] = {
.is_railgated = nvgpu_pci_tegra_is_railgated, .is_railgated = nvgpu_pci_tegra_is_railgated,
.clk_round_rate = nvgpu_pci_clk_round_rate, .clk_round_rate = nvgpu_pci_clk_round_rate,
.default_big_page_size = SZ_64K,
.ch_wdt_timeout_ms = 7000, .ch_wdt_timeout_ms = 7000,
.honors_aperture = true, .honors_aperture = true,
@@ -156,8 +152,6 @@ static struct gk20a_platform nvgpu_pci_device[] = {
.is_railgated = nvgpu_pci_tegra_is_railgated, .is_railgated = nvgpu_pci_tegra_is_railgated,
.clk_round_rate = nvgpu_pci_clk_round_rate, .clk_round_rate = nvgpu_pci_clk_round_rate,
.default_big_page_size = SZ_64K,
.ch_wdt_timeout_ms = 7000, .ch_wdt_timeout_ms = 7000,
.honors_aperture = true, .honors_aperture = true,
@@ -191,8 +185,6 @@ static struct gk20a_platform nvgpu_pci_device[] = {
.is_railgated = nvgpu_pci_tegra_is_railgated, .is_railgated = nvgpu_pci_tegra_is_railgated,
.clk_round_rate = nvgpu_pci_clk_round_rate, .clk_round_rate = nvgpu_pci_clk_round_rate,
.default_big_page_size = SZ_64K,
.ch_wdt_timeout_ms = 7000, .ch_wdt_timeout_ms = 7000,
.honors_aperture = true, .honors_aperture = true,

View File

@@ -924,8 +924,6 @@ struct gk20a_platform gm20b_tegra_platform = {
.force_reset_in_do_idle = false, .force_reset_in_do_idle = false,
.default_big_page_size = SZ_128K,
.ch_wdt_timeout_ms = 5000, .ch_wdt_timeout_ms = 5000,
.probe = gk20a_tegra_probe, .probe = gk20a_tegra_probe,

View File

@@ -398,8 +398,6 @@ struct gk20a_platform gp10b_tegra_platform = {
.dump_platform_dependencies = gk20a_tegra_debug_dump, .dump_platform_dependencies = gk20a_tegra_debug_dump,
.default_big_page_size = SZ_64K,
.has_cde = true, .has_cde = true,
.clk_round_rate = gp10b_round_clk_rate, .clk_round_rate = gp10b_round_clk_rate,

View File

@@ -1886,7 +1886,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
struct mm_gk20a *mm = &g->mm; struct mm_gk20a *mm = &g->mm;
int err; int err;
u32 virt_size; u32 virt_size;
u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; u32 big_page_size = g->ops.mm.get_default_big_page_size();
nvgpu_mutex_acquire(&g->dbg_sessions_lock); nvgpu_mutex_acquire(&g->dbg_sessions_lock);

View File

@@ -420,7 +420,7 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
gpu->bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */ gpu->bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */
gpu->compression_page_size = g->ops.fb.compression_page_size(g); gpu->compression_page_size = g->ops.fb.compression_page_size(g);
gpu->big_page_size = platform->default_big_page_size; gpu->big_page_size = g->ops.mm.get_default_big_page_size();
gpu->pde_coverage_bit_count = gpu->pde_coverage_bit_count =
g->ops.mm.get_mmu_levels(g, gpu->big_page_size)[0].lo_bit[0]; g->ops.mm.get_mmu_levels(g, gpu->big_page_size)[0].lo_bit[0];

View File

@@ -674,6 +674,7 @@ struct gpu_ops {
void (*set_big_page_size)(struct gk20a *g, void (*set_big_page_size)(struct gk20a *g,
struct nvgpu_mem *mem, int size); struct nvgpu_mem *mem, int size);
u32 (*get_big_page_sizes)(void); u32 (*get_big_page_sizes)(void);
u32 (*get_default_big_page_size)(void);
u32 (*get_physical_addr_bits)(struct gk20a *g); u32 (*get_physical_addr_bits)(struct gk20a *g);
int (*init_mm_setup_hw)(struct gk20a *g); int (*init_mm_setup_hw)(struct gk20a *g);
bool (*is_bar1_supported)(struct gk20a *g); bool (*is_bar1_supported)(struct gk20a *g);

View File

@@ -2124,7 +2124,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
int err; int err;
struct gk20a *g = gk20a_from_mm(mm); struct gk20a *g = gk20a_from_mm(mm);
struct nvgpu_mem *inst_block = &mm->bar1.inst_block; struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; u32 big_page_size = g->ops.mm.get_default_big_page_size();
mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
@@ -2156,7 +2156,7 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
int err; int err;
struct gk20a *g = gk20a_from_mm(mm); struct gk20a *g = gk20a_from_mm(mm);
struct nvgpu_mem *inst_block = &mm->pmu.inst_block; struct nvgpu_mem *inst_block = &mm->pmu.inst_block;
u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; u32 big_page_size = g->ops.mm.get_default_big_page_size();
u32 low_hole, aperture_size; u32 low_hole, aperture_size;
/* /*
@@ -2207,7 +2207,7 @@ static int gk20a_init_hwpm(struct mm_gk20a *mm)
static int gk20a_init_cde_vm(struct mm_gk20a *mm) static int gk20a_init_cde_vm(struct mm_gk20a *mm)
{ {
struct gk20a *g = gk20a_from_mm(mm); struct gk20a *g = gk20a_from_mm(mm);
u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; u32 big_page_size = g->ops.mm.get_default_big_page_size();
mm->cde.vm = nvgpu_vm_init(g, big_page_size, mm->cde.vm = nvgpu_vm_init(g, big_page_size,
big_page_size << 10, big_page_size << 10,
@@ -2222,7 +2222,7 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm)
static int gk20a_init_ce_vm(struct mm_gk20a *mm) static int gk20a_init_ce_vm(struct mm_gk20a *mm)
{ {
struct gk20a *g = gk20a_from_mm(mm); struct gk20a *g = gk20a_from_mm(mm);
u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; u32 big_page_size = g->ops.mm.get_default_big_page_size();
mm->ce.vm = nvgpu_vm_init(g, big_page_size, mm->ce.vm = nvgpu_vm_init(g, big_page_size,
big_page_size << 10, big_page_size << 10,

View File

@@ -111,9 +111,6 @@ struct gk20a_platform {
*/ */
bool force_reset_in_do_idle; bool force_reset_in_do_idle;
/* Default big page size 64K or 128K */
u32 default_big_page_size;
/* default pri timeout, on PCIe it should be lower than timeout /* default pri timeout, on PCIe it should be lower than timeout
* detection * detection
*/ */

View File

@@ -52,7 +52,6 @@ struct gk20a_platform vgpu_tegra_platform = {
.ch_wdt_timeout_ms = 5000, .ch_wdt_timeout_ms = 5000,
.probe = gk20a_tegra_probe, .probe = gk20a_tegra_probe,
.default_big_page_size = SZ_128K,
.clk_round_rate = vgpu_clk_round_rate, .clk_round_rate = vgpu_clk_round_rate,
.get_clk_freqs = vgpu_clk_get_freqs, .get_clk_freqs = vgpu_clk_get_freqs,

View File

@@ -45,6 +45,11 @@ static u32 gm20b_mm_get_big_page_sizes(void)
return SZ_64K | SZ_128K; return SZ_64K | SZ_128K;
} }
static u32 gm20b_mm_get_default_big_page_size(void)
{
return SZ_128K;
}
static bool gm20b_mm_support_sparse(struct gk20a *g) static bool gm20b_mm_support_sparse(struct gk20a *g)
{ {
return true; return true;
@@ -67,6 +72,7 @@ void gm20b_init_mm(struct gpu_ops *gops)
gops->mm.cbc_clean = gk20a_mm_cbc_clean; gops->mm.cbc_clean = gk20a_mm_cbc_clean;
gops->mm.set_big_page_size = gm20b_mm_set_big_page_size; gops->mm.set_big_page_size = gm20b_mm_set_big_page_size;
gops->mm.get_big_page_sizes = gm20b_mm_get_big_page_sizes; gops->mm.get_big_page_sizes = gm20b_mm_get_big_page_sizes;
gops->mm.get_default_big_page_size = gm20b_mm_get_default_big_page_size;
gops->mm.get_iova_addr = gk20a_mm_iova_addr; gops->mm.get_iova_addr = gk20a_mm_iova_addr;
gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits; gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits;
gops->mm.get_mmu_levels = gk20a_mm_get_mmu_levels; gops->mm.get_mmu_levels = gk20a_mm_get_mmu_levels;

View File

@@ -26,6 +26,11 @@
#include <nvgpu/hw/gp10b/hw_bus_gp10b.h> #include <nvgpu/hw/gp10b/hw_bus_gp10b.h>
#include <nvgpu/hw/gp10b/hw_gmmu_gp10b.h> #include <nvgpu/hw/gp10b/hw_gmmu_gp10b.h>
static u32 gp10b_mm_get_default_big_page_size(void)
{
return SZ_64K;
}
static u32 gp10b_mm_get_physical_addr_bits(struct gk20a *g) static u32 gp10b_mm_get_physical_addr_bits(struct gk20a *g)
{ {
return 36; return 36;
@@ -68,7 +73,7 @@ static int gb10b_init_bar2_vm(struct gk20a *g)
int err; int err;
struct mm_gk20a *mm = &g->mm; struct mm_gk20a *mm = &g->mm;
struct nvgpu_mem *inst_block = &mm->bar2.inst_block; struct nvgpu_mem *inst_block = &mm->bar2.inst_block;
u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; u32 big_page_size = g->ops.mm.get_default_big_page_size();
/* BAR2 aperture size is 32MB */ /* BAR2 aperture size is 32MB */
mm->bar2.aperture_size = 32 << 20; mm->bar2.aperture_size = 32 << 20;
@@ -410,6 +415,7 @@ static void gp10b_remove_bar2_vm(struct gk20a *g)
void gp10b_init_mm(struct gpu_ops *gops) void gp10b_init_mm(struct gpu_ops *gops)
{ {
gm20b_init_mm(gops); gm20b_init_mm(gops);
gops->mm.get_default_big_page_size = gp10b_mm_get_default_big_page_size;
gops->mm.get_physical_addr_bits = gp10b_mm_get_physical_addr_bits; gops->mm.get_physical_addr_bits = gp10b_mm_get_physical_addr_bits;
gops->mm.init_mm_setup_hw = gp10b_init_mm_setup_hw; gops->mm.init_mm_setup_hw = gp10b_init_mm_setup_hw;
gops->mm.init_bar2_vm = gb10b_init_bar2_vm; gops->mm.init_bar2_vm = gb10b_init_bar2_vm;