mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 01:50:07 +03:00
gpu:nvgpu: add support for unmapped ptes
Add support for unmapped ptes during gmmu map. Bug 1587825 Change-Id: I6e42ef58bae70ce29e5b82852f77057855ca9971 Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: http://git-master/r/696507 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
Dan Willemsen
parent
a51abd7bb0
commit
cf0085ec23
@@ -98,7 +98,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
|
|||||||
struct sg_table *sgt, u64 buffer_offset,
|
struct sg_table *sgt, u64 buffer_offset,
|
||||||
u64 first_vaddr, u64 last_vaddr,
|
u64 first_vaddr, u64 last_vaddr,
|
||||||
u8 kind_v, u32 ctag_offset, bool cacheable,
|
u8 kind_v, u32 ctag_offset, bool cacheable,
|
||||||
int rw_flag,
|
bool umapped_pte, int rw_flag,
|
||||||
bool sparse);
|
bool sparse);
|
||||||
static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm);
|
static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm);
|
||||||
static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm);
|
static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm);
|
||||||
@@ -1115,6 +1115,8 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
|
|||||||
ctag_offset,
|
ctag_offset,
|
||||||
flags &
|
flags &
|
||||||
NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
|
NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
|
||||||
|
flags &
|
||||||
|
NVGPU_GPU_FLAGS_SUPPORT_UNMAPPED_PTE,
|
||||||
rw_flag,
|
rw_flag,
|
||||||
sparse);
|
sparse);
|
||||||
if (err) {
|
if (err) {
|
||||||
@@ -1161,7 +1163,7 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
|
|||||||
vaddr,
|
vaddr,
|
||||||
vaddr + size,
|
vaddr + size,
|
||||||
0, 0, false /* n/a for unmap */,
|
0, 0, false /* n/a for unmap */,
|
||||||
rw_flag,
|
false, rw_flag,
|
||||||
sparse);
|
sparse);
|
||||||
if (err)
|
if (err)
|
||||||
dev_err(dev_from_vm(vm),
|
dev_err(dev_from_vm(vm),
|
||||||
@@ -1729,7 +1731,8 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm,
|
|||||||
u32 i, u32 gmmu_pgsz_idx,
|
u32 i, u32 gmmu_pgsz_idx,
|
||||||
u64 iova,
|
u64 iova,
|
||||||
u32 kind_v, u32 *ctag,
|
u32 kind_v, u32 *ctag,
|
||||||
bool cacheable, int rw_flag, bool sparse)
|
bool cacheable, bool unammped_pte,
|
||||||
|
int rw_flag, bool sparse)
|
||||||
{
|
{
|
||||||
bool small_valid, big_valid;
|
bool small_valid, big_valid;
|
||||||
u64 pte_addr_small = 0, pte_addr_big = 0;
|
u64 pte_addr_small = 0, pte_addr_big = 0;
|
||||||
@@ -1775,7 +1778,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
|
|||||||
u32 i, u32 gmmu_pgsz_idx,
|
u32 i, u32 gmmu_pgsz_idx,
|
||||||
u64 iova,
|
u64 iova,
|
||||||
u32 kind_v, u32 *ctag,
|
u32 kind_v, u32 *ctag,
|
||||||
bool cacheable, int rw_flag, bool sparse)
|
bool cacheable, bool unmapped_pte,
|
||||||
|
int rw_flag, bool sparse)
|
||||||
{
|
{
|
||||||
struct gk20a *g = gk20a_from_vm(vm);
|
struct gk20a *g = gk20a_from_vm(vm);
|
||||||
u32 ctag_granularity = g->ops.fb.compression_page_size(g);
|
u32 ctag_granularity = g->ops.fb.compression_page_size(g);
|
||||||
@@ -1783,9 +1787,15 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
|
|||||||
u32 pte_w[2] = {0, 0}; /* invalid pte */
|
u32 pte_w[2] = {0, 0}; /* invalid pte */
|
||||||
|
|
||||||
if (iova) {
|
if (iova) {
|
||||||
|
if (unmapped_pte)
|
||||||
|
pte_w[0] = gmmu_pte_valid_false_f() |
|
||||||
|
gmmu_pte_address_sys_f(iova
|
||||||
|
>> gmmu_pte_address_shift_v());
|
||||||
|
else
|
||||||
pte_w[0] = gmmu_pte_valid_true_f() |
|
pte_w[0] = gmmu_pte_valid_true_f() |
|
||||||
gmmu_pte_address_sys_f(iova
|
gmmu_pte_address_sys_f(iova
|
||||||
>> gmmu_pte_address_shift_v());
|
>> gmmu_pte_address_shift_v());
|
||||||
|
|
||||||
pte_w[1] = gmmu_pte_aperture_video_memory_f() |
|
pte_w[1] = gmmu_pte_aperture_video_memory_f() |
|
||||||
gmmu_pte_kind_f(kind_v) |
|
gmmu_pte_kind_f(kind_v) |
|
||||||
gmmu_pte_comptagline_f(*ctag / ctag_granularity);
|
gmmu_pte_comptagline_f(*ctag / ctag_granularity);
|
||||||
@@ -1799,8 +1809,18 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
|
|||||||
pte_w[1] |=
|
pte_w[1] |=
|
||||||
gmmu_pte_read_disable_true_f();
|
gmmu_pte_read_disable_true_f();
|
||||||
}
|
}
|
||||||
|
if (!unmapped_pte) {
|
||||||
if (!cacheable)
|
if (!cacheable)
|
||||||
pte_w[1] |= gmmu_pte_vol_true_f();
|
pte_w[1] |=
|
||||||
|
gmmu_pte_vol_true_f();
|
||||||
|
else {
|
||||||
|
/* Store cachable value behind
|
||||||
|
* gmmu_pte_write_disable_true_f */
|
||||||
|
if (!cacheable)
|
||||||
|
pte_w[1] |=
|
||||||
|
gmmu_pte_write_disable_true_f();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
gk20a_dbg(gpu_dbg_pte,
|
gk20a_dbg(gpu_dbg_pte,
|
||||||
"pte=%d iova=0x%llx kind=%d ctag=%d vol=%d [0x%08x, 0x%08x]",
|
"pte=%d iova=0x%llx kind=%d ctag=%d vol=%d [0x%08x, 0x%08x]",
|
||||||
@@ -1829,7 +1849,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
|
|||||||
u64 iova,
|
u64 iova,
|
||||||
u64 gpu_va, u64 gpu_end,
|
u64 gpu_va, u64 gpu_end,
|
||||||
u8 kind_v, u32 *ctag,
|
u8 kind_v, u32 *ctag,
|
||||||
bool cacheable,
|
bool cacheable, bool unmapped_pte,
|
||||||
int rw_flag,
|
int rw_flag,
|
||||||
bool sparse,
|
bool sparse,
|
||||||
int lvl)
|
int lvl)
|
||||||
@@ -1877,7 +1897,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
|
|||||||
}
|
}
|
||||||
|
|
||||||
err = l->update_entry(vm, pte, pde_i, pgsz_idx,
|
err = l->update_entry(vm, pte, pde_i, pgsz_idx,
|
||||||
iova, kind_v, ctag, cacheable,
|
iova, kind_v, ctag, cacheable, unmapped_pte,
|
||||||
rw_flag, sparse);
|
rw_flag, sparse);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
@@ -1896,8 +1916,8 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
|
|||||||
iova,
|
iova,
|
||||||
gpu_va,
|
gpu_va,
|
||||||
next,
|
next,
|
||||||
kind_v, ctag,
|
kind_v, ctag, cacheable, unmapped_pte,
|
||||||
cacheable, rw_flag, sparse, lvl+1);
|
rw_flag, sparse, lvl+1);
|
||||||
unmap_gmmu_pages(next_pte);
|
unmap_gmmu_pages(next_pte);
|
||||||
|
|
||||||
if (err)
|
if (err)
|
||||||
@@ -1921,7 +1941,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
|
|||||||
u64 buffer_offset,
|
u64 buffer_offset,
|
||||||
u64 gpu_va, u64 gpu_end,
|
u64 gpu_va, u64 gpu_end,
|
||||||
u8 kind_v, u32 ctag_offset,
|
u8 kind_v, u32 ctag_offset,
|
||||||
bool cacheable,
|
bool cacheable, bool unmapped_pte,
|
||||||
int rw_flag,
|
int rw_flag,
|
||||||
bool sparse)
|
bool sparse)
|
||||||
{
|
{
|
||||||
@@ -1956,7 +1976,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
|
|||||||
iova,
|
iova,
|
||||||
gpu_va, gpu_end,
|
gpu_va, gpu_end,
|
||||||
kind_v, &ctag,
|
kind_v, &ctag,
|
||||||
cacheable, rw_flag, sparse, 0);
|
cacheable, unmapped_pte, rw_flag, sparse, 0);
|
||||||
unmap_gmmu_pages(&vm->pdb);
|
unmap_gmmu_pages(&vm->pdb);
|
||||||
|
|
||||||
smp_mb();
|
smp_mb();
|
||||||
|
|||||||
@@ -276,7 +276,8 @@ struct gk20a_mmu_level {
|
|||||||
u32 i, u32 gmmu_pgsz_idx,
|
u32 i, u32 gmmu_pgsz_idx,
|
||||||
u64 iova,
|
u64 iova,
|
||||||
u32 kind_v, u32 *ctag,
|
u32 kind_v, u32 *ctag,
|
||||||
bool cacheable, int rw_flag, bool sparse);
|
bool cacheable, bool unmapped_pte,
|
||||||
|
int rw_flag, bool sparse);
|
||||||
size_t entry_size;
|
size_t entry_size;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -104,6 +104,8 @@ struct nvgpu_gpu_zbc_query_table_args {
|
|||||||
#define NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS (1 << 3)
|
#define NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS (1 << 3)
|
||||||
/* NVGPU_IOCTL_CHANNEL_CYCLE_STATS is available */
|
/* NVGPU_IOCTL_CHANNEL_CYCLE_STATS is available */
|
||||||
#define NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS (1 << 4)
|
#define NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS (1 << 4)
|
||||||
|
/* MAP_BUFFER_EX with unmapped PTE */
|
||||||
|
#define NVGPU_GPU_FLAGS_SUPPORT_UNMAPPED_PTE (1 << 5)
|
||||||
|
|
||||||
struct nvgpu_gpu_characteristics {
|
struct nvgpu_gpu_characteristics {
|
||||||
__u32 arch;
|
__u32 arch;
|
||||||
|
|||||||
Reference in New Issue
Block a user