diff --git a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c index da35abb26..cf0fd6a70 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c @@ -713,7 +713,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, "vm=%s " "%-5s GPU virt %#-12llx +%#-9llx phys %#-12llx " "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " - "kind=%#02x APT=%-6s %c%c%c%c", + "kind=%#02x APT=%-6s %c%c%c%c%c", vm->name, (sgt != NULL) ? "MAP" : "UNMAP", virt_addr, @@ -727,7 +727,8 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, attrs->cacheable ? 'C' : '-', attrs->sparse ? 'S' : '-', attrs->priv ? 'P' : '-', - attrs->valid ? 'V' : '-'); + attrs->valid ? 'V' : '-', + attrs->platform_atomic ? 'A' : '-'); err = __nvgpu_gmmu_do_update_page_table(vm, sgt, @@ -785,7 +786,8 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, .sparse = sparse, .priv = priv, .valid = (flags & NVGPU_VM_MAP_UNMAPPED_PTE) == 0U, - .aperture = aperture + .aperture = aperture, + .platform_atomic = (flags & NVGPU_VM_MAP_PLATFORM_ATOMIC) != 0U }; /* diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c index f4528cc88..9c7498666 100644 --- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c @@ -34,7 +34,7 @@ * will not add any checks. If you want to simply use the default coherency then * use nvgpu_aperture_mask(). */ -u32 nvgpu_aperture_mask_coh(struct gk20a *g, enum nvgpu_aperture aperture, +u32 nvgpu_aperture_mask_raw(struct gk20a *g, enum nvgpu_aperture aperture, u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask) { @@ -64,7 +64,7 @@ u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, { enum nvgpu_aperture ap = mem->aperture; - return nvgpu_aperture_mask_coh(g, ap, + return nvgpu_aperture_mask_raw(g, ap, sysmem_mask, sysmem_coh_mask, vidmem_mask); diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_gv11b.c index 4c221cb3a..791e5816c 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_gv11b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_gv11b.c @@ -40,4 +40,5 @@ void vgpu_gv11b_init_gpu_characteristics(struct gk20a *g) nvgpu_set_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_USERMODE_SUBMIT, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, true); + nvgpu_set_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC, true); } diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index d6ca478a7..440053826 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -229,7 +229,7 @@ static void __update_pte(struct vm_gk20a *vm, pte_w[0] |= gmmu_pte_privilege_true_f(); } - pte_w[1] = nvgpu_aperture_mask_coh(g, attrs->aperture, + pte_w[1] = nvgpu_aperture_mask_raw(g, attrs->aperture, gmmu_pte_aperture_sys_mem_ncoh_f(), gmmu_pte_aperture_sys_mem_coh_f(), gmmu_pte_aperture_video_memory_f()) | diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index 02e29d61f..aaf73cc28 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c @@ -80,6 +80,32 @@ clean_up_va: return err; } +/* + * For GV11B and TU104 MSS NVLINK HW settings are in force_snoop mode. + * This will force all the GPU mappings to be coherent. + * By default the mem aperture sets as sysmem_non_coherent and will use L2 mode. + * Change target pte aperture to sysmem_coherent if mem attribute requests for + * platform atomics to use rmw atomic capability. + * + */ +static u32 gmmu_aperture_mask(struct gk20a *g, + enum nvgpu_aperture mem_ap, + bool platform_atomic_attr, + u32 sysmem_mask, + u32 sysmem_coh_mask, + u32 vidmem_mask) +{ + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC) && + platform_atomic_attr) { + mem_ap = APERTURE_SYSMEM_COH; + } + + return nvgpu_aperture_mask_raw(g, mem_ap, + sysmem_mask, + sysmem_coh_mask, + vidmem_mask); +} + static void update_gmmu_pde3_locked(struct vm_gk20a *vm, const struct gk20a_mmu_level *l, struct nvgpu_gmmu_pd *pd, @@ -199,8 +225,9 @@ static void __update_pte(struct vm_gk20a *vm, u32 pte_addr = attrs->aperture == APERTURE_SYSMEM ? gmmu_new_pte_address_sys_f(u64_lo32(phys_shifted)) : gmmu_new_pte_address_vid_f(u64_lo32(phys_shifted)); - u32 pte_tgt = nvgpu_aperture_mask_coh(g, + u32 pte_tgt = gmmu_aperture_mask(g, attrs->aperture, + attrs->platform_atomic, gmmu_new_pte_aperture_sys_mem_ncoh_f(), gmmu_new_pte_aperture_sys_mem_coh_f(), gmmu_new_pte_aperture_video_memory_f()); @@ -264,7 +291,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm, "vm=%s " "PTE: i=%-4u size=%-2u | " "GPU %#-12llx phys %#-12llx " - "pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c " + "pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c%c " "ctag=0x%08x " "[0x%08x, 0x%08x]", vm->name, @@ -278,6 +305,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm, attrs->sparse ? 'S' : '-', attrs->priv ? 'P' : '-', attrs->valid ? 'V' : '-', + attrs->platform_atomic ? 'A' : '-', (u32)attrs->ctag / g->ops.fb.compression_page_size(g), pte_w[1], pte_w[0]); diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 1130a7fb2..dcd91450f 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -1120,6 +1120,7 @@ int gv11b_init_hal(struct gk20a *g) nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false); nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, true); + nvgpu_set_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC, true); g->name = "gv11b"; diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h index 6dc21d675..d925ab202 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h +++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -180,10 +180,13 @@ struct gk20a; /* ZBC STENCIL support*/ #define NVGPU_SUPPORT_ZBC_STENCIL 71 +/* PLATFORM_ATOMIC support */ +#define NVGPU_SUPPORT_PLATFORM_ATOMIC 72 + /* * Must be greater than the largest bit offset in the above list. */ -#define NVGPU_MAX_ENABLED_BITS 72U +#define NVGPU_MAX_ENABLED_BITS 73U /** * nvgpu_is_enabled - Check if the passed flag is enabled. diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h index 3ad0b80da..762d1d6e8 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h @@ -57,17 +57,19 @@ enum gk20a_mem_rw_flag { * The following fields are set statically and do not change throughout the * mapping call: * - * pgsz: Index into the page size table. - * kind_v: Kind attributes for mapping. - * cacheable: Cacheability of the mapping. - * rw_flag: Flag from enum gk20a_mem_rw_flag - * sparse: Set if the mapping should be sparse. - * priv: Privilidged mapping. - * valid: Set if the PTE should be marked valid. - * aperture: VIDMEM or SYSMEM. - * debug: When set print debugging info. + * pgsz: Index into the page size table. + * kind_v: Kind attributes for mapping. + * cacheable: Cacheability of the mapping. + * rw_flag: Flag from enum gk20a_mem_rw_flag + * sparse: True if the mapping should be sparse. + * priv: Privilidged mapping. + * valid: True if the PTE should be marked valid. + * aperture: VIDMEM or SYSMEM. + * debug: When set print debugging info. + * l3_alloc: True if l3_alloc flag is valid. + * platform_atomic: True if platform_atomic flag is valid. * - * These fields are dynamically updated as necessary during the map: + * These fields are dynamically updated as necessary during the map: * * ctag: Comptag line in the comptag cache; * updated every time we write a PTE. @@ -83,8 +85,8 @@ struct nvgpu_gmmu_attrs { bool valid; enum nvgpu_aperture aperture; bool debug; - bool l3_alloc; + bool platform_atomic; }; struct gk20a_mmu_level { diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h index 728a5b2f5..639d01967 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -281,7 +281,7 @@ void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem); u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem); -u32 nvgpu_aperture_mask_coh(struct gk20a *g, enum nvgpu_aperture aperture, +u32 nvgpu_aperture_mask_raw(struct gk20a *g, enum nvgpu_aperture aperture, u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask); u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask); diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h index ce4d6b6b7..cd3a2fd37 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h @@ -216,6 +216,7 @@ struct vm_gk20a { #define NVGPU_VM_MAP_UNMAPPED_PTE BIT32(3) #define NVGPU_VM_MAP_DIRECT_KIND_CTRL BIT32(4) #define NVGPU_VM_MAP_L3_ALLOC BIT32(5) +#define NVGPU_VM_MAP_PLATFORM_ATOMIC BIT32(6) #define NVGPU_KIND_INVALID -1 diff --git a/drivers/gpu/nvgpu/os/linux/vm.c b/drivers/gpu/nvgpu/os/linux/vm.c index febbb6fb0..c9eea5ffe 100644 --- a/drivers/gpu/nvgpu/os/linux/vm.c +++ b/drivers/gpu/nvgpu/os/linux/vm.c @@ -54,6 +54,8 @@ static u32 nvgpu_vm_translate_linux_flags(struct gk20a *g, u32 flags) core_flags |= NVGPU_VM_MAP_L3_ALLOC; if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) core_flags |= NVGPU_VM_MAP_DIRECT_KIND_CTRL; + if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_PLATFORM_ATOMIC) + core_flags |= NVGPU_VM_MAP_PLATFORM_ATOMIC; if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS) nvgpu_warn(g, "Ignoring deprecated flag: " diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index 61844b9c9..d56b3f40c 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -1276,6 +1276,7 @@ int tu104_init_hal(struct gk20a *g) nvgpu_set_enabled(g, NVGPU_SUPPORT_SEC2_RTOS, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_PMU_RTOS_FBQ, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, true); + nvgpu_set_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC, true); /* for now */ gops->clk.support_clk_freq_controller = false; diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index d6e830eb9..8cf821a1c 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -1927,6 +1927,7 @@ struct nvgpu_as_bind_channel_args { #define NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS (1 << 6) #define NVGPU_AS_MAP_BUFFER_FLAGS_L3_ALLOC (1 << 7) #define NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL (1 << 8) +#define NVGPU_AS_MAP_BUFFER_FLAGS_PLATFORM_ATOMIC (1 << 9) /* * VM map buffer IOCTL @@ -1975,6 +1976,10 @@ struct nvgpu_as_bind_channel_args { * Set when userspace plans to pass in @compr_kind and @incompr_kind * instead of letting the kernel work out kind fields. * + * %NVGPU_AS_MAP_BUFFER_FLAGS_PLATFORM_ATOMIC + * + * Specify that a mapping should use platform atomics. + * * @kind [IN] * * Specify the kind to use for the mapping.