mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: add PRAMIN support for mem accessors
To support vidmem, implement a way to access buffers via the PRAMIN window instead of just kernel-mapped sysmem buffers for iGPU as of now. Depending on the buffer aperture, choose between the two access types in the buffer memory accessor functions. vmap()/vunmap() pairs are no-ops for buffers that can't be cpu-mapped. Two uses of DMA_ATTR_READ_ONLY are removed in the ucode loading path to support writing to them too via the indirection in addition to cpu. JIRA DNVGPU-23 Change-Id: I282dba6741c6b8224bc12e69c1fb3936bde7e6ed Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1141314 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
Terje Bergstrom
parent
dc7af18bf8
commit
3e431e26c5
@@ -2036,8 +2036,7 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
|
||||
g->gr.ctx_vars.ucode.gpccs.inst.count * sizeof(u32),
|
||||
g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32));
|
||||
|
||||
err = gk20a_gmmu_alloc_attr(g, DMA_ATTR_READ_ONLY, ucode_size,
|
||||
&ucode_info->surface_desc);
|
||||
err = gk20a_gmmu_alloc(g, ucode_size, &ucode_info->surface_desc);
|
||||
if (err)
|
||||
goto clean_up;
|
||||
|
||||
|
||||
@@ -50,6 +50,30 @@
|
||||
#ifndef _hw_bus_gk20a_h_
|
||||
#define _hw_bus_gk20a_h_
|
||||
|
||||
static inline u32 bus_bar0_window_r(void)
|
||||
{
|
||||
return 0x00001700;
|
||||
}
|
||||
static inline u32 bus_bar0_window_base_f(u32 v)
|
||||
{
|
||||
return (v & 0xffffff) << 0;
|
||||
}
|
||||
static inline u32 bus_bar0_window_target_vid_mem_f(void)
|
||||
{
|
||||
return 0x0;
|
||||
}
|
||||
static inline u32 bus_bar0_window_target_sys_mem_coherent_f(void)
|
||||
{
|
||||
return 0x2000000;
|
||||
}
|
||||
static inline u32 bus_bar0_window_target_sys_mem_noncoherent_f(void)
|
||||
{
|
||||
return 0x3000000;
|
||||
}
|
||||
static inline u32 bus_bar0_window_target_bar0_window_base_shift_v(void)
|
||||
{
|
||||
return 0x00000010;
|
||||
}
|
||||
static inline u32 bus_bar1_block_r(void)
|
||||
{
|
||||
return 0x00001704;
|
||||
|
||||
57
drivers/gpu/nvgpu/gk20a/hw_pram_gk20a.h
Normal file
57
drivers/gpu/nvgpu/gk20a/hw_pram_gk20a.h
Normal file
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
/*
|
||||
* Function naming determines intended use:
|
||||
*
|
||||
* <x>_r(void) : Returns the offset for register <x>.
|
||||
*
|
||||
* <x>_o(void) : Returns the offset for element <x>.
|
||||
*
|
||||
* <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
|
||||
*
|
||||
* <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
|
||||
*
|
||||
* <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
|
||||
* and masked to place it at field <y> of register <x>. This value
|
||||
* can be |'d with others to produce a full register value for
|
||||
* register <x>.
|
||||
*
|
||||
* <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
|
||||
* value can be ~'d and then &'d to clear the value of field <y> for
|
||||
* register <x>.
|
||||
*
|
||||
* <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
|
||||
* to place it at field <y> of register <x>. This value can be |'d
|
||||
* with others to produce a full register value for <x>.
|
||||
*
|
||||
* <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
|
||||
* <x> value 'r' after being shifted to place its LSB at bit 0.
|
||||
* This value is suitable for direct comparison with other unshifted
|
||||
* values appropriate for use in field <y> of register <x>.
|
||||
*
|
||||
* <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
|
||||
* field <y> of register <x>. This value is suitable for direct
|
||||
* comparison with unshifted values appropriate for use in field <y>
|
||||
* of register <x>.
|
||||
*/
|
||||
#ifndef _hw_pram_gk20a_h_
|
||||
#define _hw_pram_gk20a_h_
|
||||
|
||||
static inline u32 pram_data032_r(u32 i)
|
||||
{
|
||||
return 0x00700000 + i*4;
|
||||
}
|
||||
#endif
|
||||
@@ -37,6 +37,7 @@
|
||||
#include "hw_fb_gk20a.h"
|
||||
#include "hw_bus_gk20a.h"
|
||||
#include "hw_ram_gk20a.h"
|
||||
#include "hw_pram_gk20a.h"
|
||||
#include "hw_mc_gk20a.h"
|
||||
#include "hw_flush_gk20a.h"
|
||||
#include "hw_ltc_gk20a.h"
|
||||
@@ -44,10 +45,20 @@
|
||||
#include "kind_gk20a.h"
|
||||
#include "semaphore_gk20a.h"
|
||||
|
||||
/*
|
||||
* Flip this to force all gk20a_mem* accesses via PRAMIN from the start of the
|
||||
* boot, even for buffers that would work via cpu_va. In runtime, the flag is
|
||||
* in debugfs, called "force_pramin".
|
||||
*/
|
||||
#define GK20A_FORCE_PRAMIN_DEFAULT false
|
||||
|
||||
int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem)
|
||||
{
|
||||
void *cpu_va;
|
||||
|
||||
if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin)
|
||||
return 0;
|
||||
|
||||
if (WARN_ON(mem->cpu_va)) {
|
||||
gk20a_warn(dev_from_gk20a(g), "nested %s", __func__);
|
||||
return -EBUSY;
|
||||
@@ -66,20 +77,66 @@ int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem)
|
||||
|
||||
void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem)
|
||||
{
|
||||
if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin)
|
||||
return;
|
||||
|
||||
vunmap(mem->cpu_va);
|
||||
mem->cpu_va = NULL;
|
||||
}
|
||||
|
||||
/* WARNING: returns pramin_base_lock taken, complement with pramin_exit() */
|
||||
static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w)
|
||||
{
|
||||
u64 bufbase = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0);
|
||||
u64 addr = bufbase + w * sizeof(u32);
|
||||
u32 hi = (u32)((addr & ~(u64)0xfffff)
|
||||
>> bus_bar0_window_target_bar0_window_base_shift_v());
|
||||
u32 lo = (addr & 0xfffff);
|
||||
|
||||
gk20a_dbg(gpu_dbg_mem, "0x%08x:%08x begin for %p", hi, lo, mem);
|
||||
|
||||
WARN_ON(!bufbase);
|
||||
spin_lock(&g->mm.pramin_base_lock);
|
||||
if (g->mm.pramin_base != hi) {
|
||||
gk20a_writel(g, bus_bar0_window_r(),
|
||||
(g->mm.vidmem_is_vidmem
|
||||
&& mem->aperture == APERTURE_SYSMEM ?
|
||||
bus_bar0_window_target_sys_mem_noncoherent_f() :
|
||||
bus_bar0_window_target_vid_mem_f()) |
|
||||
bus_bar0_window_base_f(hi));
|
||||
gk20a_readl(g, bus_bar0_window_r());
|
||||
g->mm.pramin_base = hi;
|
||||
}
|
||||
|
||||
return lo;
|
||||
}
|
||||
|
||||
static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem)
|
||||
{
|
||||
gk20a_dbg(gpu_dbg_mem, "end for %p", mem);
|
||||
spin_unlock(&g->mm.pramin_base_lock);
|
||||
}
|
||||
|
||||
u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w)
|
||||
{
|
||||
u32 *ptr = mem->cpu_va;
|
||||
u32 data;
|
||||
u32 data = 0;
|
||||
|
||||
WARN_ON(!ptr);
|
||||
data = ptr[w];
|
||||
if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) {
|
||||
u32 *ptr = mem->cpu_va;
|
||||
|
||||
WARN_ON(!ptr);
|
||||
data = ptr[w];
|
||||
#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
|
||||
gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
|
||||
gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
|
||||
#endif
|
||||
} else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
|
||||
u32 addr = gk20a_pramin_enter(g, mem, w);
|
||||
data = gk20a_readl(g, pram_data032_r(addr / sizeof(u32)));
|
||||
gk20a_pramin_exit(g, mem);
|
||||
} else {
|
||||
WARN_ON("Accessing unallocated mem_desc");
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
@@ -106,13 +163,23 @@ void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem,
|
||||
|
||||
void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data)
|
||||
{
|
||||
u32 *ptr = mem->cpu_va;
|
||||
if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) {
|
||||
u32 *ptr = mem->cpu_va;
|
||||
|
||||
WARN_ON(!ptr);
|
||||
WARN_ON(!ptr);
|
||||
#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
|
||||
gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
|
||||
gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
|
||||
#endif
|
||||
ptr[w] = data;
|
||||
ptr[w] = data;
|
||||
} else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
|
||||
u32 addr = gk20a_pramin_enter(g, mem, w);
|
||||
gk20a_writel(g, pram_data032_r(addr / sizeof(u32)), data);
|
||||
/* read back to synchronize accesses*/
|
||||
gk20a_readl(g, pram_data032_r(addr / sizeof(u32)));
|
||||
gk20a_pramin_exit(g, mem);
|
||||
} else {
|
||||
WARN_ON("Accessing unallocated mem_desc");
|
||||
}
|
||||
}
|
||||
|
||||
void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data)
|
||||
@@ -535,6 +602,13 @@ static int gk20a_alloc_sysmem_flush(struct gk20a *g)
|
||||
return gk20a_gmmu_alloc(g, SZ_4K, &g->mm.sysmem_flush);
|
||||
}
|
||||
|
||||
static void gk20a_init_pramin(struct mm_gk20a *mm)
|
||||
{
|
||||
mm->pramin_base = 0;
|
||||
spin_lock_init(&mm->pramin_base_lock);
|
||||
mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT;
|
||||
}
|
||||
|
||||
int gk20a_init_mm_setup_sw(struct gk20a *g)
|
||||
{
|
||||
struct mm_gk20a *mm = &g->mm;
|
||||
@@ -558,6 +632,8 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
|
||||
(int)(mm->channel.user_size >> 20),
|
||||
(int)(mm->channel.kernel_size >> 20));
|
||||
|
||||
gk20a_init_pramin(mm);
|
||||
|
||||
err = gk20a_alloc_sysmem_flush(g);
|
||||
if (err)
|
||||
return err;
|
||||
@@ -586,6 +662,7 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
|
||||
/* set vm_alloc_share op here as gk20a_as_alloc_share needs it */
|
||||
g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share;
|
||||
mm->remove_support = gk20a_remove_mm_support;
|
||||
|
||||
mm->sw_ready = true;
|
||||
|
||||
gk20a_dbg_fn("done");
|
||||
@@ -690,6 +767,7 @@ static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order,
|
||||
entry->mem.cpu_va = page_address(pages);
|
||||
memset(entry->mem.cpu_va, 0, len);
|
||||
entry->mem.size = len;
|
||||
entry->mem.aperture = APERTURE_SYSMEM;
|
||||
FLUSH_CPU_DCACHE(entry->mem.cpu_va, sg_phys(entry->mem.sgt->sgl), len);
|
||||
|
||||
return 0;
|
||||
@@ -716,6 +794,7 @@ static void free_gmmu_phys_pages(struct vm_gk20a *vm,
|
||||
kfree(entry->mem.sgt);
|
||||
entry->mem.sgt = NULL;
|
||||
entry->mem.size = 0;
|
||||
entry->mem.aperture = APERTURE_INVALID;
|
||||
}
|
||||
|
||||
static int map_gmmu_phys_pages(struct gk20a_mm_entry *entry)
|
||||
@@ -2164,6 +2243,7 @@ int gk20a_gmmu_alloc_attr(struct gk20a *g, enum dma_attr attr, size_t size, stru
|
||||
goto fail_free;
|
||||
|
||||
mem->size = size;
|
||||
mem->aperture = APERTURE_SYSMEM;
|
||||
|
||||
gk20a_dbg_fn("done");
|
||||
|
||||
@@ -2210,6 +2290,7 @@ void gk20a_gmmu_free_attr(struct gk20a *g, enum dma_attr attr,
|
||||
gk20a_free_sgtable(&mem->sgt);
|
||||
|
||||
mem->size = 0;
|
||||
mem->aperture = APERTURE_INVALID;
|
||||
}
|
||||
|
||||
void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem)
|
||||
@@ -4015,6 +4096,9 @@ void gk20a_mm_debugfs_init(struct device *dev)
|
||||
|
||||
debugfs_create_x64("separate_fixed_allocs", 0664, gpu_root,
|
||||
&g->separate_fixed_allocs);
|
||||
|
||||
debugfs_create_bool("force_pramin", 0664, gpu_root,
|
||||
&g->mm.force_pramin);
|
||||
}
|
||||
|
||||
void gk20a_init_mm(struct gpu_ops *gops)
|
||||
|
||||
@@ -40,10 +40,17 @@
|
||||
outer_flush_range(pa, pa + (size_t)(size)); \
|
||||
} while (0)
|
||||
|
||||
enum gk20a_aperture {
|
||||
APERTURE_INVALID, /* e.g., unallocated */
|
||||
APERTURE_SYSMEM,
|
||||
APERTURE_VIDMEM
|
||||
};
|
||||
|
||||
struct mem_desc {
|
||||
void *cpu_va;
|
||||
struct page **pages;
|
||||
struct sg_table *sgt;
|
||||
enum gk20a_aperture aperture;
|
||||
size_t size;
|
||||
u64 gpu_va;
|
||||
};
|
||||
@@ -357,6 +364,14 @@ struct mm_gk20a {
|
||||
bool vidmem_is_vidmem;
|
||||
|
||||
struct mem_desc sysmem_flush;
|
||||
|
||||
u32 pramin_base;
|
||||
spinlock_t pramin_base_lock;
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0)
|
||||
u32 force_pramin; /* via debugfs */
|
||||
#else
|
||||
bool force_pramin; /* via debugfs */
|
||||
#endif
|
||||
};
|
||||
|
||||
int gk20a_mm_init(struct mm_gk20a *mm);
|
||||
|
||||
@@ -2443,8 +2443,7 @@ static int gk20a_prepare_ucode(struct gk20a *g)
|
||||
pmu->ucode_image = (u32 *)((u8 *)pmu->desc +
|
||||
pmu->desc->descriptor_size);
|
||||
|
||||
err = gk20a_gmmu_alloc_map_attr(vm, DMA_ATTR_READ_ONLY,
|
||||
GK20A_PMU_UCODE_SIZE_MAX, &pmu->ucode);
|
||||
err = gk20a_gmmu_alloc_map(vm, GK20A_PMU_UCODE_SIZE_MAX, &pmu->ucode);
|
||||
if (err)
|
||||
goto err_release_fw;
|
||||
|
||||
|
||||
@@ -50,6 +50,30 @@
|
||||
#ifndef _hw_bus_gm20b_h_
|
||||
#define _hw_bus_gm20b_h_
|
||||
|
||||
static inline u32 bus_bar0_window_r(void)
|
||||
{
|
||||
return 0x00001700;
|
||||
}
|
||||
static inline u32 bus_bar0_window_base_f(u32 v)
|
||||
{
|
||||
return (v & 0xffffff) << 0;
|
||||
}
|
||||
static inline u32 bus_bar0_window_target_vid_mem_f(void)
|
||||
{
|
||||
return 0x0;
|
||||
}
|
||||
static inline u32 bus_bar0_window_target_sys_mem_coherent_f(void)
|
||||
{
|
||||
return 0x2000000;
|
||||
}
|
||||
static inline u32 bus_bar0_window_target_sys_mem_noncoherent_f(void)
|
||||
{
|
||||
return 0x3000000;
|
||||
}
|
||||
static inline u32 bus_bar0_window_target_bar0_window_base_shift_v(void)
|
||||
{
|
||||
return 0x00000010;
|
||||
}
|
||||
static inline u32 bus_bar1_block_r(void)
|
||||
{
|
||||
return 0x00001704;
|
||||
|
||||
57
drivers/gpu/nvgpu/gm20b/hw_pram_gm20b.h
Normal file
57
drivers/gpu/nvgpu/gm20b/hw_pram_gm20b.h
Normal file
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
/*
|
||||
* Function naming determines intended use:
|
||||
*
|
||||
* <x>_r(void) : Returns the offset for register <x>.
|
||||
*
|
||||
* <x>_o(void) : Returns the offset for element <x>.
|
||||
*
|
||||
* <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
|
||||
*
|
||||
* <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
|
||||
*
|
||||
* <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
|
||||
* and masked to place it at field <y> of register <x>. This value
|
||||
* can be |'d with others to produce a full register value for
|
||||
* register <x>.
|
||||
*
|
||||
* <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
|
||||
* value can be ~'d and then &'d to clear the value of field <y> for
|
||||
* register <x>.
|
||||
*
|
||||
* <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
|
||||
* to place it at field <y> of register <x>. This value can be |'d
|
||||
* with others to produce a full register value for <x>.
|
||||
*
|
||||
* <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
|
||||
* <x> value 'r' after being shifted to place its LSB at bit 0.
|
||||
* This value is suitable for direct comparison with other unshifted
|
||||
* values appropriate for use in field <y> of register <x>.
|
||||
*
|
||||
* <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
|
||||
* field <y> of register <x>. This value is suitable for direct
|
||||
* comparison with unshifted values appropriate for use in field <y>
|
||||
* of register <x>.
|
||||
*/
|
||||
#ifndef _hw_pram_gm20b_h_
|
||||
#define _hw_pram_gm20b_h_
|
||||
|
||||
static inline u32 pram_data032_r(u32 i)
|
||||
{
|
||||
return 0x00700000 + i*4;
|
||||
}
|
||||
#endif
|
||||
Reference in New Issue
Block a user