gpu: nvgpu: add platform atomic support

Add new variable in nvgpu_as_map_buffer_ex_args for app
to specify the platform atomic support for the page.
When platform atomic attribute flag is set, pte memory
aperture is set to be coherent type.

renamed nvgpu_aperture_mask_coh -> nvgpu_aperture_mask_raw
function.

bug 200473147

Change-Id: I18266724dafdc8dfd96a0711f23cf08e23682afc
Signed-off-by: Vinod G <vinodg@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2012679
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Vinod G
2019-02-05 18:05:42 -08:00
committed by mobile promotions
parent 45fa0441f7
commit 9e0a9004b7
13 changed files with 69 additions and 23 deletions

View File

@@ -713,7 +713,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
"vm=%s " "vm=%s "
"%-5s GPU virt %#-12llx +%#-9llx phys %#-12llx " "%-5s GPU virt %#-12llx +%#-9llx phys %#-12llx "
"phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | "
"kind=%#02x APT=%-6s %c%c%c%c", "kind=%#02x APT=%-6s %c%c%c%c%c",
vm->name, vm->name,
(sgt != NULL) ? "MAP" : "UNMAP", (sgt != NULL) ? "MAP" : "UNMAP",
virt_addr, virt_addr,
@@ -727,7 +727,8 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
attrs->cacheable ? 'C' : '-', attrs->cacheable ? 'C' : '-',
attrs->sparse ? 'S' : '-', attrs->sparse ? 'S' : '-',
attrs->priv ? 'P' : '-', attrs->priv ? 'P' : '-',
attrs->valid ? 'V' : '-'); attrs->valid ? 'V' : '-',
attrs->platform_atomic ? 'A' : '-');
err = __nvgpu_gmmu_do_update_page_table(vm, err = __nvgpu_gmmu_do_update_page_table(vm,
sgt, sgt,
@@ -785,7 +786,8 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
.sparse = sparse, .sparse = sparse,
.priv = priv, .priv = priv,
.valid = (flags & NVGPU_VM_MAP_UNMAPPED_PTE) == 0U, .valid = (flags & NVGPU_VM_MAP_UNMAPPED_PTE) == 0U,
.aperture = aperture .aperture = aperture,
.platform_atomic = (flags & NVGPU_VM_MAP_PLATFORM_ATOMIC) != 0U
}; };
/* /*

View File

@@ -34,7 +34,7 @@
* will not add any checks. If you want to simply use the default coherency then * will not add any checks. If you want to simply use the default coherency then
* use nvgpu_aperture_mask(). * use nvgpu_aperture_mask().
*/ */
u32 nvgpu_aperture_mask_coh(struct gk20a *g, enum nvgpu_aperture aperture, u32 nvgpu_aperture_mask_raw(struct gk20a *g, enum nvgpu_aperture aperture,
u32 sysmem_mask, u32 sysmem_coh_mask, u32 sysmem_mask, u32 sysmem_coh_mask,
u32 vidmem_mask) u32 vidmem_mask)
{ {
@@ -64,7 +64,7 @@ u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
{ {
enum nvgpu_aperture ap = mem->aperture; enum nvgpu_aperture ap = mem->aperture;
return nvgpu_aperture_mask_coh(g, ap, return nvgpu_aperture_mask_raw(g, ap,
sysmem_mask, sysmem_mask,
sysmem_coh_mask, sysmem_coh_mask,
vidmem_mask); vidmem_mask);

View File

@@ -40,4 +40,5 @@ void vgpu_gv11b_init_gpu_characteristics(struct gk20a *g)
nvgpu_set_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_USERMODE_SUBMIT, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_USERMODE_SUBMIT, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC, true);
} }

View File

@@ -229,7 +229,7 @@ static void __update_pte(struct vm_gk20a *vm,
pte_w[0] |= gmmu_pte_privilege_true_f(); pte_w[0] |= gmmu_pte_privilege_true_f();
} }
pte_w[1] = nvgpu_aperture_mask_coh(g, attrs->aperture, pte_w[1] = nvgpu_aperture_mask_raw(g, attrs->aperture,
gmmu_pte_aperture_sys_mem_ncoh_f(), gmmu_pte_aperture_sys_mem_ncoh_f(),
gmmu_pte_aperture_sys_mem_coh_f(), gmmu_pte_aperture_sys_mem_coh_f(),
gmmu_pte_aperture_video_memory_f()) | gmmu_pte_aperture_video_memory_f()) |

View File

@@ -80,6 +80,32 @@ clean_up_va:
return err; return err;
} }
/*
* For GV11B and TU104 MSS NVLINK HW settings are in force_snoop mode.
* This will force all the GPU mappings to be coherent.
* By default the mem aperture sets as sysmem_non_coherent and will use L2 mode.
* Change target pte aperture to sysmem_coherent if mem attribute requests for
* platform atomics to use rmw atomic capability.
*
*/
static u32 gmmu_aperture_mask(struct gk20a *g,
enum nvgpu_aperture mem_ap,
bool platform_atomic_attr,
u32 sysmem_mask,
u32 sysmem_coh_mask,
u32 vidmem_mask)
{
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC) &&
platform_atomic_attr) {
mem_ap = APERTURE_SYSMEM_COH;
}
return nvgpu_aperture_mask_raw(g, mem_ap,
sysmem_mask,
sysmem_coh_mask,
vidmem_mask);
}
static void update_gmmu_pde3_locked(struct vm_gk20a *vm, static void update_gmmu_pde3_locked(struct vm_gk20a *vm,
const struct gk20a_mmu_level *l, const struct gk20a_mmu_level *l,
struct nvgpu_gmmu_pd *pd, struct nvgpu_gmmu_pd *pd,
@@ -199,8 +225,9 @@ static void __update_pte(struct vm_gk20a *vm,
u32 pte_addr = attrs->aperture == APERTURE_SYSMEM ? u32 pte_addr = attrs->aperture == APERTURE_SYSMEM ?
gmmu_new_pte_address_sys_f(u64_lo32(phys_shifted)) : gmmu_new_pte_address_sys_f(u64_lo32(phys_shifted)) :
gmmu_new_pte_address_vid_f(u64_lo32(phys_shifted)); gmmu_new_pte_address_vid_f(u64_lo32(phys_shifted));
u32 pte_tgt = nvgpu_aperture_mask_coh(g, u32 pte_tgt = gmmu_aperture_mask(g,
attrs->aperture, attrs->aperture,
attrs->platform_atomic,
gmmu_new_pte_aperture_sys_mem_ncoh_f(), gmmu_new_pte_aperture_sys_mem_ncoh_f(),
gmmu_new_pte_aperture_sys_mem_coh_f(), gmmu_new_pte_aperture_sys_mem_coh_f(),
gmmu_new_pte_aperture_video_memory_f()); gmmu_new_pte_aperture_video_memory_f());
@@ -264,7 +291,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
"vm=%s " "vm=%s "
"PTE: i=%-4u size=%-2u | " "PTE: i=%-4u size=%-2u | "
"GPU %#-12llx phys %#-12llx " "GPU %#-12llx phys %#-12llx "
"pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c " "pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c%c "
"ctag=0x%08x " "ctag=0x%08x "
"[0x%08x, 0x%08x]", "[0x%08x, 0x%08x]",
vm->name, vm->name,
@@ -278,6 +305,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
attrs->sparse ? 'S' : '-', attrs->sparse ? 'S' : '-',
attrs->priv ? 'P' : '-', attrs->priv ? 'P' : '-',
attrs->valid ? 'V' : '-', attrs->valid ? 'V' : '-',
attrs->platform_atomic ? 'A' : '-',
(u32)attrs->ctag / g->ops.fb.compression_page_size(g), (u32)attrs->ctag / g->ops.fb.compression_page_size(g),
pte_w[1], pte_w[0]); pte_w[1], pte_w[0]);

View File

@@ -1120,6 +1120,7 @@ int gv11b_init_hal(struct gk20a *g)
nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false); nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false);
nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC, true);
g->name = "gv11b"; g->name = "gv11b";

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -180,10 +180,13 @@ struct gk20a;
/* ZBC STENCIL support*/ /* ZBC STENCIL support*/
#define NVGPU_SUPPORT_ZBC_STENCIL 71 #define NVGPU_SUPPORT_ZBC_STENCIL 71
/* PLATFORM_ATOMIC support */
#define NVGPU_SUPPORT_PLATFORM_ATOMIC 72
/* /*
* Must be greater than the largest bit offset in the above list. * Must be greater than the largest bit offset in the above list.
*/ */
#define NVGPU_MAX_ENABLED_BITS 72U #define NVGPU_MAX_ENABLED_BITS 73U
/** /**
* nvgpu_is_enabled - Check if the passed flag is enabled. * nvgpu_is_enabled - Check if the passed flag is enabled.

View File

@@ -61,11 +61,13 @@ enum gk20a_mem_rw_flag {
* kind_v: Kind attributes for mapping. * kind_v: Kind attributes for mapping.
* cacheable: Cacheability of the mapping. * cacheable: Cacheability of the mapping.
* rw_flag: Flag from enum gk20a_mem_rw_flag * rw_flag: Flag from enum gk20a_mem_rw_flag
* sparse: Set if the mapping should be sparse. * sparse: True if the mapping should be sparse.
* priv: Privilidged mapping. * priv: Privilidged mapping.
* valid: Set if the PTE should be marked valid. * valid: True if the PTE should be marked valid.
* aperture: VIDMEM or SYSMEM. * aperture: VIDMEM or SYSMEM.
* debug: When set print debugging info. * debug: When set print debugging info.
* l3_alloc: True if l3_alloc flag is valid.
* platform_atomic: True if platform_atomic flag is valid.
* *
* These fields are dynamically updated as necessary during the map: * These fields are dynamically updated as necessary during the map:
* *
@@ -83,8 +85,8 @@ struct nvgpu_gmmu_attrs {
bool valid; bool valid;
enum nvgpu_aperture aperture; enum nvgpu_aperture aperture;
bool debug; bool debug;
bool l3_alloc; bool l3_alloc;
bool platform_atomic;
}; };
struct gk20a_mmu_level { struct gk20a_mmu_level {

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -281,7 +281,7 @@ void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem); u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem);
u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem); u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem);
u32 nvgpu_aperture_mask_coh(struct gk20a *g, enum nvgpu_aperture aperture, u32 nvgpu_aperture_mask_raw(struct gk20a *g, enum nvgpu_aperture aperture,
u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask); u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask);
u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask); u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask);

View File

@@ -216,6 +216,7 @@ struct vm_gk20a {
#define NVGPU_VM_MAP_UNMAPPED_PTE BIT32(3) #define NVGPU_VM_MAP_UNMAPPED_PTE BIT32(3)
#define NVGPU_VM_MAP_DIRECT_KIND_CTRL BIT32(4) #define NVGPU_VM_MAP_DIRECT_KIND_CTRL BIT32(4)
#define NVGPU_VM_MAP_L3_ALLOC BIT32(5) #define NVGPU_VM_MAP_L3_ALLOC BIT32(5)
#define NVGPU_VM_MAP_PLATFORM_ATOMIC BIT32(6)
#define NVGPU_KIND_INVALID -1 #define NVGPU_KIND_INVALID -1

View File

@@ -54,6 +54,8 @@ static u32 nvgpu_vm_translate_linux_flags(struct gk20a *g, u32 flags)
core_flags |= NVGPU_VM_MAP_L3_ALLOC; core_flags |= NVGPU_VM_MAP_L3_ALLOC;
if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL)
core_flags |= NVGPU_VM_MAP_DIRECT_KIND_CTRL; core_flags |= NVGPU_VM_MAP_DIRECT_KIND_CTRL;
if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_PLATFORM_ATOMIC)
core_flags |= NVGPU_VM_MAP_PLATFORM_ATOMIC;
if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS) if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS)
nvgpu_warn(g, "Ignoring deprecated flag: " nvgpu_warn(g, "Ignoring deprecated flag: "

View File

@@ -1276,6 +1276,7 @@ int tu104_init_hal(struct gk20a *g)
nvgpu_set_enabled(g, NVGPU_SUPPORT_SEC2_RTOS, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_SEC2_RTOS, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_PMU_RTOS_FBQ, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_PMU_RTOS_FBQ, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC, true);
/* for now */ /* for now */
gops->clk.support_clk_freq_controller = false; gops->clk.support_clk_freq_controller = false;

View File

@@ -1927,6 +1927,7 @@ struct nvgpu_as_bind_channel_args {
#define NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS (1 << 6) #define NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS (1 << 6)
#define NVGPU_AS_MAP_BUFFER_FLAGS_L3_ALLOC (1 << 7) #define NVGPU_AS_MAP_BUFFER_FLAGS_L3_ALLOC (1 << 7)
#define NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL (1 << 8) #define NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL (1 << 8)
#define NVGPU_AS_MAP_BUFFER_FLAGS_PLATFORM_ATOMIC (1 << 9)
/* /*
* VM map buffer IOCTL * VM map buffer IOCTL
@@ -1975,6 +1976,10 @@ struct nvgpu_as_bind_channel_args {
* Set when userspace plans to pass in @compr_kind and @incompr_kind * Set when userspace plans to pass in @compr_kind and @incompr_kind
* instead of letting the kernel work out kind fields. * instead of letting the kernel work out kind fields.
* *
* %NVGPU_AS_MAP_BUFFER_FLAGS_PLATFORM_ATOMIC
*
* Specify that a mapping should use platform atomics.
*
* @kind [IN] * @kind [IN]
* *
* Specify the kind to use for the mapping. * Specify the kind to use for the mapping.