diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index afbad75c6..a5790e3aa 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -628,7 +628,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, "vm=%s " "%-5s GPU virt %#-12llx +%#-9llx phys %#-12llx " "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " - "kind=%#02x APT=%-6s %c%c%c%c%c", + "kind=%#02x APT=%-6s %c%c%c%c%c%c", vm->name, (sgt != NULL) ? "MAP" : "UNMAP", virt_addr, @@ -643,7 +643,8 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, attrs->sparse ? 'S' : '-', attrs->priv ? 'P' : '-', attrs->coherent ? 'I' : '-', - attrs->valid ? 'V' : '-'); + attrs->valid ? 'V' : '-', + attrs->platform_atomic ? 'A' : '-'); err = __nvgpu_gmmu_do_update_page_table(vm, sgt, @@ -702,7 +703,8 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, .priv = priv, .coherent = flags & NVGPU_VM_MAP_IO_COHERENT, .valid = (flags & NVGPU_VM_MAP_UNMAPPED_PTE) == 0U, - .aperture = aperture + .aperture = aperture, + .platform_atomic = (flags & NVGPU_VM_MAP_PLATFORM_ATOMIC) != 0U }; /* diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c index 8f6bd66e5..2326e0ae2 100644 --- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -31,7 +31,7 @@ * will not add any checks. If you want to simply use the default coherency then * use nvgpu_aperture_mask(). */ -u32 nvgpu_aperture_mask_coh(struct gk20a *g, enum nvgpu_aperture aperture, +u32 nvgpu_aperture_mask_raw(struct gk20a *g, enum nvgpu_aperture aperture, u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask) { @@ -71,7 +71,7 @@ u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, ap = APERTURE_SYSMEM_COH; } - return nvgpu_aperture_mask_coh(g, ap, + return nvgpu_aperture_mask_raw(g, ap, sysmem_mask, sysmem_coh_mask, vidmem_mask); diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 644531f11..18922d46c 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -222,7 +222,7 @@ static void __update_pte(struct vm_gk20a *vm, pte_w[0] |= gmmu_pte_privilege_true_f(); } - pte_w[1] = nvgpu_aperture_mask_coh(g, attrs->aperture, + pte_w[1] = nvgpu_aperture_mask_raw(g, attrs->aperture, gmmu_pte_aperture_sys_mem_ncoh_f(), gmmu_pte_aperture_sys_mem_coh_f(), gmmu_pte_aperture_video_memory_f()) | diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index 2c0056e1f..a4b291d89 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c @@ -1,7 +1,7 @@ /* * GP10B MMU * - * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -78,6 +78,32 @@ clean_up_va: return err; } +/* + * For GV11B and TU104 MSS NVLINK HW settings are in force_snoop mode. + * This will force all the GPU mappings to be coherent. + * By default the mem aperture sets as sysmem_non_coherent and will use L2 mode. + * Change target pte aperture to sysmem_coherent if mem attribute requests for + * platform atomics to use rmw atomic capability. + * + */ +static u32 gmmu_aperture_mask(struct gk20a *g, + enum nvgpu_aperture mem_ap, + bool platform_atomic_attr, + u32 sysmem_mask, + u32 sysmem_coh_mask, + u32 vidmem_mask) +{ + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC) && + platform_atomic_attr) { + mem_ap = APERTURE_SYSMEM_COH; + } + + return nvgpu_aperture_mask_raw(g, mem_ap, + sysmem_mask, + sysmem_coh_mask, + vidmem_mask); +} + static void update_gmmu_pde3_locked(struct vm_gk20a *vm, const struct gk20a_mmu_level *l, struct nvgpu_gmmu_pd *pd, @@ -191,8 +217,9 @@ static void __update_pte(struct vm_gk20a *vm, u32 pte_addr = attrs->aperture == APERTURE_SYSMEM ? gmmu_new_pte_address_sys_f(phys_shifted) : gmmu_new_pte_address_vid_f(phys_shifted); - u32 pte_tgt = nvgpu_aperture_mask_coh(g, + u32 pte_tgt = gmmu_aperture_mask(g, attrs->aperture, + attrs->platform_atomic, gmmu_new_pte_aperture_sys_mem_ncoh_f(), gmmu_new_pte_aperture_sys_mem_coh_f(), gmmu_new_pte_aperture_video_memory_f()); @@ -253,7 +280,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm, "vm=%s " "PTE: i=%-4u size=%-2u | " "GPU %#-12llx phys %#-12llx " - "pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c%c " + "pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c%c%c " "ctag=0x%08x " "[0x%08x, 0x%08x]", vm->name, @@ -268,6 +295,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm, attrs->priv ? 'P' : '-', attrs->coherent ? 'I' : '-', attrs->valid ? 'V' : '-', + attrs->platform_atomic ? 'A' : '-', (u32)attrs->ctag / g->ops.fb.compression_page_size(g), pte_w[1], pte_w[0]); diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 4f2981331..68ea78a6f 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -1,7 +1,7 @@ /* * GV11B Tegra HAL interface * - * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -957,6 +957,7 @@ int gv11b_init_hal(struct gk20a *g) __nvgpu_set_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL, true); __nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false); + __nvgpu_set_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC, true); g->name = "gv11b"; diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h index 76f0f2bdb..fc7bab903 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h +++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -178,10 +178,13 @@ struct gk20a; /* NVGPU_GPU_IOCTL_GET_GPU_LOAD is available */ #define NVGPU_SUPPORT_GET_GPU_LOAD 70 +/* PLATFORM_ATOMIC support */ +#define NVGPU_SUPPORT_PLATFORM_ATOMIC 71 + /* * Must be greater than the largest bit offset in the above list. */ -#define NVGPU_MAX_ENABLED_BITS 71 +#define NVGPU_MAX_ENABLED_BITS 72 /** * nvgpu_is_enabled - Check if the passed flag is enabled. diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h index a70015f8e..66cb52185 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -155,6 +155,7 @@ struct nvgpu_gmmu_pd { * valid: Set if the PTE should be marked valid. * aperture: VIDMEM or SYSMEM. * debug: When set print debugging info. + * platform_atomic: True if platform_atomic flag is valid. * * These fields are dynamically updated as necessary during the map: * @@ -173,8 +174,8 @@ struct nvgpu_gmmu_attrs { bool valid; enum nvgpu_aperture aperture; bool debug; - bool l3_alloc; + bool platform_atomic; }; struct gk20a_mmu_level { diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h index 8e62a1d64..4e84f2af8 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -349,7 +349,7 @@ void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem); u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem); -u32 nvgpu_aperture_mask_coh(struct gk20a *g, enum nvgpu_aperture aperture, +u32 nvgpu_aperture_mask_raw(struct gk20a *g, enum nvgpu_aperture aperture, u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask); u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask); diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h index 667497174..3867c7456 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -213,6 +213,7 @@ struct vm_gk20a { #define NVGPU_VM_MAP_UNMAPPED_PTE BIT32(3) #define NVGPU_VM_MAP_DIRECT_KIND_CTRL BIT32(4) #define NVGPU_VM_MAP_L3_ALLOC BIT32(5) +#define NVGPU_VM_MAP_PLATFORM_ATOMIC BIT32(6) #define NVGPU_KIND_INVALID -1 diff --git a/drivers/gpu/nvgpu/os/linux/vm.c b/drivers/gpu/nvgpu/os/linux/vm.c index 27a91cf61..dc807ab6d 100644 --- a/drivers/gpu/nvgpu/os/linux/vm.c +++ b/drivers/gpu/nvgpu/os/linux/vm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -54,6 +54,8 @@ static u32 nvgpu_vm_translate_linux_flags(struct gk20a *g, u32 flags) core_flags |= NVGPU_VM_MAP_L3_ALLOC; if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) core_flags |= NVGPU_VM_MAP_DIRECT_KIND_CTRL; + if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_PLATFORM_ATOMIC) + core_flags |= NVGPU_VM_MAP_PLATFORM_ATOMIC; if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS) nvgpu_warn(g, "Ignoring deprecated flag: " diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_gv11b.c index 1bcd151ae..baddae133 100644 --- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_gv11b.c +++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_gv11b.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -43,6 +43,7 @@ int vgpu_gv11b_init_gpu_characteristics(struct gk20a *g) __nvgpu_set_enabled(g, NVGPU_SUPPORT_SCG, true); __nvgpu_set_enabled(g, NVGPU_SUPPORT_SYNCPOINT_ADDRESS, true); __nvgpu_set_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT, true); + __nvgpu_set_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC, true); return 0; } diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 0488e563e..9197011bb 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -1,7 +1,7 @@ /* * NVGPU Public Interface Header * - * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -1894,6 +1894,7 @@ struct nvgpu_as_bind_channel_args { #define NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS (1 << 6) #define NVGPU_AS_MAP_BUFFER_FLAGS_L3_ALLOC (1 << 7) #define NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL (1 << 8) +#define NVGPU_AS_MAP_BUFFER_FLAGS_PLATFORM_ATOMIC (1 << 9) /* * VM map buffer IOCTL @@ -1940,6 +1941,10 @@ struct nvgpu_as_bind_channel_args { * Set when userspace plans to pass in @compr_kind and @incompr_kind * instead of letting the kernel work out kind fields. * + * %NVGPU_AS_MAP_BUFFER_FLAGS_PLATFORM_ATOMIC + * + * Specify that a mapping should use platform atomics. + * * @kind [IN] * * Specify the kind to use for the mapping.