From 73f07366c3f34885ecaf4f33a4a094aa1c046c8f Mon Sep 17 00:00:00 2001 From: dt Date: Wed, 20 Jan 2021 11:25:37 +0000 Subject: [PATCH] gpu: nvgpu: Add doxygen update for common mm unit This is adding some doxygen comments for common mm unit that includes adding return values, description and some format changes. JIRA NVGPU-6381 Change-Id: Ibbe1af5b9e4356bf02bb591116e08735ce77b323 Signed-off-by: dt Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2472907 Reviewed-by: svc-mobile-coverity Reviewed-by: svc-mobile-misra Reviewed-by: svc-mobile-cert Reviewed-by: Lakshmanan M Reviewed-by: Vaibhav Kachore Reviewed-by: mobile promotions Tested-by: mobile promotions GVS: Gerrit_Virtual_Submit --- .../common/mm/allocators/bitmap_allocator.c | 10 +- .../common/mm/allocators/buddy_allocator.c | 46 +++- drivers/gpu/nvgpu/include/nvgpu/allocator.h | 199 ++++++++++++-- drivers/gpu/nvgpu/include/nvgpu/as.h | 39 ++- drivers/gpu/nvgpu/include/nvgpu/dma.h | 98 ++++--- drivers/gpu/nvgpu/include/nvgpu/gmmu.h | 255 ++++++++++-------- drivers/gpu/nvgpu/include/nvgpu/pd_cache.h | 85 +++--- 7 files changed, 514 insertions(+), 218 deletions(-) diff --git a/drivers/gpu/nvgpu/common/mm/allocators/bitmap_allocator.c b/drivers/gpu/nvgpu/common/mm/allocators/bitmap_allocator.c index a333bcf68..25976a48f 100644 --- a/drivers/gpu/nvgpu/common/mm/allocators/bitmap_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/allocators/bitmap_allocator.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -206,6 +206,14 @@ static int nvgpu_bitmap_store_alloc(struct nvgpu_bitmap_allocator *a, /* * @len is in bytes. This routine will figure out the right number of bits to * actually allocate. The return is the address in bytes as well. + * + * This is a find-first-fit allocator. + * Check the input parameter validity. + * Acquire the alloc_lock. + * Searche a bitmap for the first space that is large enough to satisfy the + * requested size of bits by walking the next available free blocks by + * bitmap_find_next_zero_area(). + * Release the alloc_lock. */ static u64 nvgpu_bitmap_balloc(struct nvgpu_allocator *na, u64 len) { diff --git a/drivers/gpu/nvgpu/common/mm/allocators/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/allocators/buddy_allocator.c index e0233af70..636dc46c1 100644 --- a/drivers/gpu/nvgpu/common/mm/allocators/buddy_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/allocators/buddy_allocator.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -287,6 +287,10 @@ cleanup: /* * Clean up and destroy the passed allocator. + * Walk the allocator for any pending allocations. + * Free up all pending allocations. + * Free any memory allocated a allocator init time. + * Destroy the lock and bzero the allocator completely. */ static void nvgpu_buddy_allocator_destroy(struct nvgpu_allocator *na) { @@ -903,6 +907,14 @@ static void balloc_do_free_fixed(struct nvgpu_buddy_allocator *a, /* * Allocate memory from the passed allocator. + * Acquire the allocator lock. + * Compute the order by calling balloc_get_order(). + * Compute the pte size supported for this allocation by calling + * nvgpu_balloc_page_size_to_pte_size(). + * If we could not satisfy the required size buddy then call + * balloc_split_buddy() to get the requiredsize by dividing the large size buddy. + * Free the remaining buddy to the respective list. + * Release the alloc_lock. */ static u64 nvgpu_buddy_balloc_pte(struct nvgpu_allocator *na, u64 len, u32 page_size) @@ -959,7 +971,15 @@ static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *na, u64 len) } /* - * Requires @na to be locked. + * Check the input parameter validity. + * Acquire the alloc_lock. + * Compute the order with respective to the input size. + * Compute the pte_size for the given page size and return error for + * invalid pte size. + * Call balloc_is_range_free() to get the free range with the address given. + * Call balloc_make_fixed_buddy() to generate the list of buddies. + * Make the book keeping of allocated objects to the respective lists. + * Release the alloc_lock. */ static u64 nvgpu_balloc_fixed_buddy_locked(struct nvgpu_allocator *na, u64 base, u64 len, u32 page_size) @@ -1144,6 +1164,12 @@ static bool nvgpu_buddy_reserve_is_possible(struct nvgpu_buddy_allocator *a, /* * Carveouts can only be reserved before any regular allocations have been * made. + * - Check the validity of input paramemters. + * - Acquire the allocator lock. + * - Call nvgpu_balloc_fixed_buddy_locked() to reserve the object + * with \a co.base and \a co.length. + * - Add the allocated object to the book keeping list. + * - Release the allocator lock. */ static int nvgpu_buddy_reserve_co(struct nvgpu_allocator *na, struct nvgpu_alloc_carveout *co) @@ -1189,6 +1215,15 @@ done: /* * Carveouts can be release at any time. + * - Acquire the allocator lock. + * - Remove the carve out from the allocator list. + * - Call nvgpu_buddy_bfree_locked() to free the carve out + * - nvgpu_buddy_bfree_locked() will first check the address is fixed + * or not by calling balloc_free_fixed(). + * - If the address is fixed then free it by calling balloc_do_free_fixed(). + * - Else free it through balloc_free_buddy(). + * - Recompute the size of the allocator and coalesce the objects. + * - Release the lock. */ static void nvgpu_buddy_release_co(struct nvgpu_allocator *na, struct nvgpu_alloc_carveout *co) @@ -1230,7 +1265,12 @@ static u64 nvgpu_buddy_alloc_end(struct nvgpu_allocator *a) return ba->end; } - +/* + * - Acquire the allocator lock. + * - Check the availability of space between start and end of + * the allocator. + * - Release the allocator lock. + */ static u64 nvgpu_buddy_alloc_space(struct nvgpu_allocator *a) { struct nvgpu_buddy_allocator *ba = buddy_allocator(a); diff --git a/drivers/gpu/nvgpu/include/nvgpu/allocator.h b/drivers/gpu/nvgpu/include/nvgpu/allocator.h index 26ed43103..6b8a11c11 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/allocator.h +++ b/drivers/gpu/nvgpu/include/nvgpu/allocator.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -399,10 +399,33 @@ static inline void alloc_unlock(struct nvgpu_allocator *a) * @param[in] blk_size Block size of buddy allocator. * @param[in] max_order Maximum allowed buddy order. * @param[in] flags Flags indicating buddy allocator conditions. + * Valid flags are + * - GPU_ALLOC_GVA_SPACE + * - GPU_ALLOC_NO_ALLOC_PAGE + * - GPU_ALLOC_4K_VIDMEM_PAGES + * - GPU_ALLOC_FORCE_CONTIG + * - GPU_ALLOC_NO_SCATTER_GATHER * - * @return 0 in case of success, < 0 otherwise. - * @retval -EINVAL in case of incorrect input value. - * @retval -ENOMEM in case there is not enough memory for allocation. + * Construct a buddy allocator in \a na. A buddy allocator manages memory by + * splitting all memory into "buddies" - pairs of adjacent blocks of memory. + * Each buddy can be further subdivided into buddies, again, allowing for + * arbitrary power-of-two sized blocks to be allocated. + * + * Call nvgpu_buddy_check_argument_limits() to check the validity of the + * inputs. This function verifies that the input arguments are valid for a + * buddy allocator. Specifically the #block_size of a buddy allocator must be + * a power-of-two, #max_order must be less than #GPU_BALLOC_MAX_ORDER, + * and size must be non-zero. + * Call nvgpu_alloc_common_init() to initialize the basic operations like + * allocation, free and query opearions for the respective allocator. + * Initialize some lists and locks to maintain the allocator objects. + * + * @return 0 in case of success, < 0 otherwise. + * @retval -EINVAL in case of incorrect input value. + * - When \a length is zero. + * - \a blk_size is not a power of two. + * - \a base and \a length is not aligned with \a blk_size. + * @retval -ENOMEM in case there is no enough memory for allocation. */ int nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *na, struct vm_gk20a *vm, const char *name, @@ -419,10 +442,26 @@ int nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *na, * @param[in] length Size of bitmap allocator. * @param[in] blk_size Block size of bitmap allocator. * @param[in] flags Flags indicating bitmap allocator conditions. + * Valid flags are + * - GPU_ALLOC_GVA_SPACE + * - GPU_ALLOC_NO_ALLOC_PAGE + * - GPU_ALLOC_FORCE_CONTIG + * - GPU_ALLOC_NO_SCATTER_GATHER * - * @return 0 in case of success, < 0 otherwise. - * @retval -EINVAL in case of incorrect input value. - * @retval -ENOMEM in case there is not enough memory for allocation. + * Call nvgpu_bitmap_check_argument_limits() to check the validity + * of input paramemeters. This function verifies the input parameters + * are valid specifically the #block_size of a bitmap allocator must be + * a power-of-two, #base and #length must be aligned with #blk_size. + * Call nvgpu_alloc_common_init() to initialize the basic operations like + * allocation, free and query opearions for the respective allocator. + * Initialize some lists and locks to maintain the allocator objects. + * + * @return 0 in case of success, < 0 otherwise. + * @retval -EINVAL in case of incorrect input value. + * - When \a length is zero. + * - \a blk_size is not a power of two. + * - \a base and \a length is not aligned with \a blk_size. + * @retval -ENOMEM in case there is no enough memory for allocation. */ int nvgpu_bitmap_allocator_init(struct gk20a *g, struct nvgpu_allocator *na, const char *name, u64 base, u64 length, @@ -439,11 +478,16 @@ int nvgpu_bitmap_allocator_init(struct gk20a *g, struct nvgpu_allocator *na, * @param[in] base Base address of page allocator. * @param[in] length Size of page allocator. * @param[in] blk_size Block size of page allocator. - * @param[in] flags Flags indicating page allocator conditions. - * - * @return 0 in case of success, < 0 otherwise. - * @retval -EINVAL in case of incorrect input value. - * @retval -ENOMEM in case there is not enough memory for allocation. + * @param[in] flags Flags indicating page allocator conditions. Valid + * flags are + * - GPU_ALLOC_GVA_SPACE + * - GPU_ALLOC_NO_ALLOC_PAGE + * - GPU_ALLOC_4K_VIDMEM_PAGES + * - GPU_ALLOC_FORCE_CONTIG + * - GPU_ALLOC_NO_SCATTER_GATHER + * @return 0 in case of success, < 0 otherwise. + * @retval -EINVAL in case of incorrect input value. + * @retval -ENOMEM in case there is no enough memory for allocation. */ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *na, const char *name, u64 base, u64 length, @@ -464,15 +508,53 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *na, * @param[in] max_order Max order of resource slices that can be * allocated. Applicable to buddy allocator only. * @param[in] flags Flags indicating additional conditions. - * @param[in] alloc_type Allocator type. - - * Returns 0 in case of success, < 0 otherwise. + * Valid flags are + * - GPU_ALLOC_GVA_SPACE + * - GPU_ALLOC_NO_ALLOC_PAGE + * - GPU_ALLOC_4K_VIDMEM_PAGES + * - GPU_ALLOC_FORCE_CONTIG + * - GPU_ALLOC_NO_SCATTER_GATHER + * @param[in] alloc_type Allocator type. Valid types are + * - BUDDY_ALLOCATOR + * - PAGE_ALLOCATOR + * - BITMAP_ALLOCATOR + * + * Call *allocator_init() to initialize the respective allocators. + * + * @return 0 in case of success, < 0 otherwise. + * @retval -EINVAL in allocator type is not valid. + * @retval -EINVAL in case of incorrect input value. + * - When \a length is zero. + * - \a blk_size is not a power of two. + * - \a base and \a length is not aligned with \a blk_size. + * @retval -ENOMEM in case there is no enough memory for allocation. */ int nvgpu_allocator_init(struct gk20a *g, struct nvgpu_allocator *na, struct vm_gk20a *vm, const char *name, u64 base, u64 length, u64 blk_size, u64 max_order, u64 flags, enum nvgpu_allocator_type alloc_type); +#ifdef CONFIG_NVGPU_FENCE +/** + * @brief Initialize lockless allocator. + * + * @param[in] g Pointer to GPU structure. + * @param[in] na Pointer to allocator structure. + * @param[in] name Name of lockless allocator. + * @param[in] base Base address of lockless allocator. + * @param[in] length Size of lockless allocator. + * @param[in] blk_size Block size of lockless allocator. + * @param[in] flags Flags indicating lockless allocator conditions. + * + * @return 0 in case of success, < 0 otherwise. + * @retval -EINVAL in case of incorrect input value. + * @retval -ENOMEM in case there is no enough memory for allocation. + */ +int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *na, + const char *name, u64 base, u64 length, + u64 blk_size, u64 flags); +#endif + /** * Largest block of resources that fits in address space. */ @@ -484,7 +566,16 @@ int nvgpu_allocator_init(struct gk20a *g, struct nvgpu_allocator *na, * @param[in] a Pointer to nvgpu allocator. * @param[in] len Size of allocation. * - * @return Address of allocation in case of success, 0 otherwise. + * Invoke the underlying allocator's implementation of the alloc + * operation. + * + * @return Address of allocation in case of success, + * 0 otherwise. + * @retval 0 For the failure and reasons can be one of the following + * - input parameters are not valid. + * - There is no free space available in the allocator. + * - zalloc failure for no memory conditions. + * */ u64 nvgpu_alloc(struct nvgpu_allocator *a, u64 len); @@ -495,7 +586,14 @@ u64 nvgpu_alloc(struct nvgpu_allocator *a, u64 len); * @param[in] len Size of allocation. * @param[in] page_size Page size of resource. * - * @return Address of allocation in case of success, 0 otherwise. + * + * Invoke the underlying allocator's implementation of the alloc_pte + * operation. + * + * @return Address of allocation in case of success, 0 otherwise. + * @retval 0 For the failure and reasons can be one of the following + * - input parameters are not valid. + * - There is no free space available in the allocator. */ u64 nvgpu_alloc_pte(struct nvgpu_allocator *a, u64 len, u32 page_size); @@ -504,6 +602,11 @@ u64 nvgpu_alloc_pte(struct nvgpu_allocator *a, u64 len, u32 page_size); * * @param[in] a Pointer to nvgpu allocator. * @param[in] addr Base address of allocation. + * + * Invoke the underlying allocator's implementation of the free + * operation. + * + * @return None */ void nvgpu_free(struct nvgpu_allocator *a, u64 addr); @@ -515,7 +618,13 @@ void nvgpu_free(struct nvgpu_allocator *a, u64 addr); * @param[in] len Size of allocation. * @param[in] page_size Page size of resource. * - * @return Address of allocation in case of success, 0 otherwise. + * Invoke the underlying allocator's implementation of the alloc_fixed + * operation. + * + * @return Address of allocation in case of success. + * @retval 0 For failure, in any of the reasons below + * invalid inputs. + * space unavailability to satisfy the requirement. */ u64 nvgpu_alloc_fixed(struct nvgpu_allocator *a, u64 base, u64 len, u32 page_size); @@ -526,6 +635,11 @@ u64 nvgpu_alloc_fixed(struct nvgpu_allocator *a, u64 base, u64 len, * @param[in] a Pointer to nvgpu allocator. * @param[in] base Start address of resource. * @param[in] len Size of allocation. + * + * Invoke the underlying allocator's implementation of the free_fixed + * operation. + * + * @return None. */ void nvgpu_free_fixed(struct nvgpu_allocator *a, u64 base, u64 len); @@ -535,7 +649,15 @@ void nvgpu_free_fixed(struct nvgpu_allocator *a, u64 base, u64 len); * @param[in] a Pointer to nvgpu allocator. * @param[in] co Pointer to carveout structure. * - * @return 0 in case of success, < 0 in case of failure. + * Invoke the underlying allocator's implementation of the + * alloc_reserve_carveout operation. + * + * @return 0 in case of success, < 0 in case of failure. + * @retval -EINVAL For invalid input parameters. + * @retval -EBUSY For the unavailability of the base of the + * the carveout. + * @retval -ENOMEM For unavailability of the object. + * */ int nvgpu_alloc_reserve_carveout(struct nvgpu_allocator *a, struct nvgpu_alloc_carveout *co); @@ -545,6 +667,11 @@ int nvgpu_alloc_reserve_carveout(struct nvgpu_allocator *a, * * @param a Pointer to nvgpu allocator. * @param co Pointer to carveout structure. + * + * Invoke the underlying allocator's implementation of the release_carveout + * operation. + * + * @return None. */ void nvgpu_alloc_release_carveout(struct nvgpu_allocator *a, struct nvgpu_alloc_carveout *co); @@ -554,7 +681,9 @@ void nvgpu_alloc_release_carveout(struct nvgpu_allocator *a, * * @param[in] a Pointer to nvgpu allocator. * - * @return Allocator start address. + * Invoke the underlying allocator's implementation of the base operation. + * + * @return Allocator start address. */ u64 nvgpu_alloc_base(struct nvgpu_allocator *a); @@ -563,7 +692,10 @@ u64 nvgpu_alloc_base(struct nvgpu_allocator *a); * * @param a Pointer to nvgpu allocator. * - * @return Allocator length address. + * Invoke the underlying allocator's implementation of the length + * operation. + * + * @return Allocator length. */ u64 nvgpu_alloc_length(struct nvgpu_allocator *a); @@ -572,7 +704,9 @@ u64 nvgpu_alloc_length(struct nvgpu_allocator *a); * * @param[in] a Pointer to nvgpu allocator. * - * @return Allocator end address. + * Invoke the underlying allocator's implementation of the end operation. + * + * @return Allocator end address. */ u64 nvgpu_alloc_end(struct nvgpu_allocator *a); @@ -581,7 +715,8 @@ u64 nvgpu_alloc_end(struct nvgpu_allocator *a); * * @param a Pointer to nvgpu allocator. * - * @return True if allocator is initialized, false otherwise. + * + * @return True if allocator is initialized, false otherwise. */ bool nvgpu_alloc_initialized(struct nvgpu_allocator *a); @@ -590,7 +725,11 @@ bool nvgpu_alloc_initialized(struct nvgpu_allocator *a); * * @param[in] a Pointer to nvgpu allocator. * - * @return Available allocator space. + * Invoke the underlying allocator's implementation of the space + * operation. + * + * @return Available allocator space. + * */ u64 nvgpu_alloc_space(struct nvgpu_allocator *a); @@ -598,6 +737,12 @@ u64 nvgpu_alloc_space(struct nvgpu_allocator *a); * @brief Interface to destroy allocator. * * @param[in] a Pointer to nvgpu allocator. + * + * Invoke the underlying allocator's implementation of the destroy + * operation. + * + * @return None. + * */ void nvgpu_alloc_destroy(struct nvgpu_allocator *a); @@ -619,7 +764,7 @@ void nvgpu_alloc_print_stats(struct nvgpu_allocator *a, * * @param[in] a Pointer to nvgpu allocator. * - * @return GPU pointer. + * @return GPU pointer. */ static inline struct gk20a *nvgpu_alloc_to_gpu(struct nvgpu_allocator *a) { @@ -656,7 +801,9 @@ void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a); * @param[in] dbg Debug flag. * @param[in] ops Pointer to allocator operations. * - * @return 0 in case of success, < 0 in case of failure. + * @return 0 in case of success, < 0 in case of failure. + * @retval -EINVAL For any of the inputs is NULL. + * */ int nvgpu_alloc_common_init(struct nvgpu_allocator *a, struct gk20a *g, const char *name, void *priv, bool dbg, diff --git a/drivers/gpu/nvgpu/include/nvgpu/as.h b/drivers/gpu/nvgpu/include/nvgpu/as.h index 17580f1ab..34ceba107 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/as.h +++ b/drivers/gpu/nvgpu/include/nvgpu/as.h @@ -1,7 +1,7 @@ /* * GK20A Address Spaces * - * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -74,10 +74,18 @@ struct gk20a_as_share { * * @param as_share [in] The address space share to release. * - * Call gk20a_vm_release_share on the provided \a as_share and release the - * corresponding share id. + * Release the address space share \a as_share that is created + * by gk20a_as_alloc_share(). + * + * @return EOK in case of success, < 0 in case of failure. + * + * @retval -ENODEV For struct g is NULL. + * @retval -EINVAL For the power contxt associated with struct nvgpu_os_rmos + * is NULL. + * @retval -EINVAL For the power function pointer associated with struct + * nvgpu_module is NULL. + * @retval -EIO For setting clock related failures. * - * @return 0 in case of success, < 0 in case of failure. */ int gk20a_as_release_share(struct gk20a_as_share *as_share); @@ -98,8 +106,10 @@ int gk20a_vm_release_share(struct gk20a_as_share *as_share); * * @param g [in] The GPU * @param big_page_size [in] Big page size to use for the VM, - * set 0 for no big pages - * @param flags [in] NVGPU_AS_ALLOC_* flags + * set 0 for 64K big page size. + * @param flags [in] NVGPU_AS_ALLOC_* flags. The flags are + * NVGPU_AS_ALLOC_USERSPACE_MANAGED and + * NVGPU_AS_ALLOC_UNIFIED_VA. * @param va_range_start [in] Requested user managed memory start * address, used to map buffers, save data * should be aligned by PDE @@ -112,11 +122,22 @@ int gk20a_vm_release_share(struct gk20a_as_share *as_share); * structure * * Allocate the gk20a_as_share structure and the VM associated with it, based - * on the provided \a big_page_size and NVGPU_AS_ALLOC_* \a flags. + * on the provided \a big_page_size and NVGPU_AS_ALLOC_* \a flags. + * Check the validity of \a big_page_size by the big_page_size should be power + * of two and it should be in the range supported big page sizes supported by the GPU. * - * Notes: if \a big_page_size == 0, the default big page size is used. + * @note if \a big_page_size == 0, the default big page size(64K) is used. + * @note The \a flags is always set as NVGPU_AS_ALLOC_USERSPACE_MANAGED(AS + * allocation flag for userspace managed) + * + * @return 0 in case of success, < 0 in case of failure. + * + * @retval -ENODEV For struct GPU is NULL. + * @retval -EIO For setting clock related failures. + * @retval -ENOMEM For memory allocation failures. + * @retval -EINVAL For any parameter compute failures from gk20a_vm_alloc_share(). + * @retval -ENOMEM For allocated VM is NULL. * - * @return 0 in case of success, < 0 in case of failure. */ int gk20a_as_alloc_share(struct gk20a *g, u32 big_page_size, u32 flags, u64 va_range_start, diff --git a/drivers/gpu/nvgpu/include/nvgpu/dma.h b/drivers/gpu/nvgpu/include/nvgpu/dma.h index d75efccd0..b74832b80 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/dma.h +++ b/drivers/gpu/nvgpu/include/nvgpu/dma.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -87,7 +87,18 @@ bool nvgpu_iommuable(struct gk20a *g); * Allocate memory suitable for doing DMA. Store the allocation info in #mem. * Returns 0 on success and a suitable error code when there's an error. This * memory can be either placed in VIDMEM or SYSMEM, which ever is more - * convenient for the driver. + * convenient for the driver. The final memory allocation is done by OS specific + * allocation routine. + * + * @note As linux is a kernel space driver, it will use dma_alloc_attr() linux + * API to allocate dmaable memory. + * Qnx will use memory service module to acheive the same. + * @note the parameter flags is always zero for nvgpu_dma_alloc_flags(). + * + * @return 0 For success, < 0 for failure. + * @retval -ENOMEM For failure in handle creation or mapping operation. + * @retval -EINVAL For failure in handle query. + * */ int nvgpu_dma_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem); @@ -96,6 +107,9 @@ int nvgpu_dma_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem); * * @param g - The GPU. * @param flags - Flags modifying the operation of the DMA allocation. + * The following flags are accepted: + * - %NVGPU_DMA_NO_KERNEL_MAPPING + * - %NVGPU_DMA_READ_ONLY * @param size - Size of the allocation in bytes. * @param mem - Struct for storing the allocation information. * @@ -104,10 +118,9 @@ int nvgpu_dma_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem); * memory can be either placed in VIDMEM or SYSMEM, which ever is more * convenient for the driver. * - * The following flags are accepted: - * - * %NVGPU_DMA_NO_KERNEL_MAPPING - * %NVGPU_DMA_READ_ONLY + * @return 0 For success, < 0 for failure. + * @retval -ENOMEM For failure in handle creation or mapping operation. + * @retval -EINVAL For failure in handle query. */ int nvgpu_dma_alloc_flags(struct gk20a *g, unsigned long flags, size_t size, struct nvgpu_mem *mem); @@ -122,6 +135,10 @@ int nvgpu_dma_alloc_flags(struct gk20a *g, unsigned long flags, size_t size, * Allocate memory suitable for doing DMA. Store the allocation info in #mem. * Returns 0 on success and a suitable error code when there's an error. This * allocates memory specifically in SYSMEM. + * + * @return 0 For success, < 0 for failure. + * @retval -ENOMEM For failure in handle creation or mapping operation. + * @retval -EINVAL For failure in handle query. */ int nvgpu_dma_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem); @@ -130,6 +147,9 @@ int nvgpu_dma_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem); * * @param g - The GPU. * @param flags - Flags modifying the operation of the DMA allocation. + * The following flags are accepted: + * - %NVGPU_DMA_NO_KERNEL_MAPPING + * - %NVGPU_DMA_READ_ONLY * @param size - Size of the allocation in bytes. * @param mem - Struct for storing the allocation information. * @@ -137,10 +157,9 @@ int nvgpu_dma_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem); * Returns 0 on success and a suitable error code when there's an error. This * allocates memory specifically in SYSMEM. * - * The following flags are accepted: - * - * %NVGPU_DMA_NO_KERNEL_MAPPING - * %NVGPU_DMA_READ_ONLY + * @return 0 For success, < 0 for failure. + * @retval -ENOMEM For failure in handle creation or mapping operation. + * @retval -EINVAL For failure in handle query. */ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, size_t size, struct nvgpu_mem *mem); @@ -164,6 +183,7 @@ int nvgpu_dma_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem); * * @param g - The GPU. * @param flags - Flags modifying the operation of the DMA allocation. + * - %NVGPU_DMA_NO_KERNEL_MAPPING is the only accepted flag. * @param size - Size of the allocation in bytes. * @param mem - Struct for storing the allocation information. * @@ -171,10 +191,6 @@ int nvgpu_dma_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem); * Returns 0 on success and a suitable error code when there's an error. This * allocates memory specifically in VIDMEM. * - * Only the following flags are accepted: - * - * %NVGPU_DMA_NO_KERNEL_MAPPING - * */ int nvgpu_dma_alloc_flags_vid(struct gk20a *g, unsigned long flags, size_t size, struct nvgpu_mem *mem); @@ -202,6 +218,8 @@ int nvgpu_dma_alloc_vid_at(struct gk20a *g, * * @param g - The GPU. * @param flags - Flags modifying the operation of the DMA allocation. + * Only the following flags are accepted: + * - %NVGPU_DMA_NO_KERNEL_MAPPING * @param size - Size of the allocation in bytes. * @param mem - Struct for storing the allocation information. * @param at - A specific location to attempt to allocate memory from or 0 if @@ -211,9 +229,6 @@ int nvgpu_dma_alloc_vid_at(struct gk20a *g, * Returns 0 on success and a suitable error code when there's an error. This * allocates memory specifically in VIDMEM. * - * Only the following flags are accepted: - * - * %NVGPU_DMA_NO_KERNEL_MAPPING */ int nvgpu_dma_alloc_flags_vid_at(struct gk20a *g, unsigned long flags, size_t size, struct nvgpu_mem *mem, u64 at); @@ -234,6 +249,8 @@ int nvgpu_dma_alloc_flags_vid_at(struct gk20a *g, unsigned long flags, * nvgpu_dma_alloc_vid() * nvgpu_dma_alloc_flags_vid() * nvgpu_dma_alloc_flags_vid_at() + * + * @return None */ void nvgpu_dma_free(struct gk20a *g, struct nvgpu_mem *mem); @@ -253,6 +270,11 @@ void nvgpu_dma_free(struct gk20a *g, struct nvgpu_mem *mem); * cannot use nvgpu_gmmu_map() on said buffer - it will overwrite the necessary * information for the DMA unmap routines to actually unmap the buffer. You * will either leak mappings or see GMMU faults. + * + * @return 0 For success, <0 for failure. + * @retval -ENOMEM For failure in handle creation or mapping operation. + * @retval -EINVAL For failure in handle query. + * @retval -ENOMEM For failure in gpu mapping. */ int nvgpu_dma_alloc_map(struct vm_gk20a *vm, size_t size, struct nvgpu_mem *mem); @@ -262,6 +284,9 @@ int nvgpu_dma_alloc_map(struct vm_gk20a *vm, size_t size, * * @param vm - VM context for GMMU mapping. * @param flags - Flags modifying the operation of the DMA allocation. + * Accepted flags are + * - %NVGPU_DMA_NO_KERNEL_MAPPING + * - %NVGPU_DMA_READ_ONLY * @param size - Size of the allocation in bytes. * @param mem - Struct for storing the allocation information. * @@ -270,11 +295,10 @@ int nvgpu_dma_alloc_map(struct vm_gk20a *vm, size_t size, * either placed in VIDMEM or SYSMEM, which ever is more convenient for the * driver. * - * This version passes #flags on to the underlying DMA allocation. The accepted - * flags are: - * - * %NVGPU_DMA_NO_KERNEL_MAPPING - * %NVGPU_DMA_READ_ONLY + * @return 0 For success, <0 for failure. + * @retval -ENOMEM For failure in handle creation or mapping operation. + * @retval -EINVAL For failure in handle query. + * @retval -ENOMEM For failure in gpu mapping. */ int nvgpu_dma_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags, size_t size, struct nvgpu_mem *mem); @@ -288,6 +312,11 @@ int nvgpu_dma_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags, * * Allocate memory suitable for doing DMA and map that memory into the GMMU. * This memory will be placed in SYSMEM. + * + * @return 0 For success, <0 for failure. + * @retval -ENOMEM For failure in handle creation or mapping operation. + * @retval -EINVAL For failure in handle query. + * @retval -ENOMEM For failure in gpu mapping. */ int nvgpu_dma_alloc_map_sys(struct vm_gk20a *vm, size_t size, struct nvgpu_mem *mem); @@ -297,17 +326,19 @@ int nvgpu_dma_alloc_map_sys(struct vm_gk20a *vm, size_t size, * * @param vm - VM context for GMMU mapping. * @param flags - Flags modifying the operation of the DMA allocation. + * Accepted flags are: + * - %NVGPU_DMA_NO_KERNEL_MAPPING + * - %NVGPU_DMA_READ_ONLY * @param size - Size of the allocation in bytes. * @param mem - Struct for storing the allocation information. * * Allocate memory suitable for doing DMA and map that memory into the GMMU. * This memory will be placed in SYSMEM. * - * This version passes #flags on to the underlying DMA allocation. The accepted - * flags are: - * - * %NVGPU_DMA_NO_KERNEL_MAPPING - * %NVGPU_DMA_READ_ONLY + * @return 0 For success, <0 for failure. + * @retval -ENOMEM For failure in handle creation or mapping operation. + * @retval -EINVAL For failure in handle query. + * @retval -ENOMEM For failure in gpu mapping. */ int nvgpu_dma_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags, size_t size, struct nvgpu_mem *mem); @@ -331,24 +362,22 @@ int nvgpu_dma_alloc_map_vid(struct vm_gk20a *vm, size_t size, * * @param vm - VM context for GMMU mapping. * @param flags - Flags modifying the operation of the DMA allocation. + * Accepted flags are: + * - %NVGPU_DMA_NO_KERNEL_MAPPING + * - %NVGPU_DMA_READ_ONLY * @param size - Size of the allocation in bytes. * @param mem - Struct for storing the allocation information. * * Allocate memory suitable for doing DMA and map that memory into the GMMU. * This memory will be placed in VIDMEM. * - * This version passes #flags on to the underlying DMA allocation. The accepted - * flags are: - * - * %NVGPU_DMA_NO_KERNEL_MAPPING - * %NVGPU_DMA_READ_ONLY */ int nvgpu_dma_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags, size_t size, struct nvgpu_mem *mem); #endif /* CONFIG_NVGPU_DGPU */ /** - * nvgpu_dma_unmap_free - Free a DMA allocation + * nvgpu_dma_unmap_free - Free a DMA allocation and unmap the GPUVA. * * @param g - The GPU. * @param mem - An allocation to free. @@ -361,6 +390,9 @@ int nvgpu_dma_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags, * nvgpu_dma_alloc_map_flags_sys() * nvgpu_dma_alloc_map_vid() * nvgpu_dma_alloc_map_flags_vid() + * + * @return None + * */ void nvgpu_dma_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h index 670509ddb..5d08de122 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -229,11 +229,11 @@ static inline const char *nvgpu_gmmu_perm_str(enum gk20a_mem_rw_flag p) * @param vm [in] Pointer to virtual memory structure. * * Init Page Table: - * - Allocates the DMA memory for a page directory. - * This handles the necessary PD cache logistics. Since on Parker and - * later GPUs some of the page directories are smaller than a page packing - * these PDs together saves a lot of memory. - * #nvgpu_pd_alloc() does the pd cache allocation. + * Allocates the DMA memory for a page directory. + * This handles the necessary PD cache logistics. Since on Parker and + * later GPUs some of the page directories are smaller than a page packing + * these PDs together saves a lot of memory. + * #nvgpu_pd_alloc() does the pd cache allocation. * * PDB size here must be at least 4096 bytes so that its address is 4K * aligned. Although lower PDE tables can be aligned at 256B boundaries @@ -242,47 +242,55 @@ static inline const char *nvgpu_gmmu_perm_str(enum gk20a_mem_rw_flag p) * Currently NVGPU_CPU_PAGE_SIZE is used, even when 64K, to work around an issue * with the PDB TLB invalidate code not being pd_cache aware yet. * - * @return 0 in case of success. - * -ENOMEM (< 0) in case of failure. + * @return 0 in case of success. + * @retval -ENOMEM For any allocation failures from kzalloc and dma_alloc + * functions. */ int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm); /** - * @brief Map memory into the GMMU. + * @brief Map memory into the GMMU. This is required to make the particular + * context on the GR, CE to access the given virtual address. * * @param vm [in] Pointer to virtual memory structure. - * @param mem [in] Structure for storing the memory information. + * @param mem [in] Structure for storing the memory informati on. * @param size [in] Size of the buffer in bytes. * @param flags [in] Mapping flags. * - Min: NVGPU_VM_MAP_FIXED_OFFSET * - Max: NVGPU_VM_MAP_PLATFORM_ATOMIC * @param rw_flag [in] Flag designates the requested GMMU mapping. + * - Min: gk20a_mem_flag_none + * - Max: gk20a_mem_flag_write_only * @param priv [in] True if the mapping should be Privileged. * @param aperture [in] Where the memory actually was allocated from. + * - Min: APERTURE_SYSMEM + * - Max: APERTURE_VIDMEM * * Core GMMU map function for the nvgpu to use. The GPU VA will be * allocated for client. * * GMMU Map: - * - Retrives the nvgpu_sgt which contains the memory handle information. - * - Acquires the VM GMMU lock to the avoid race. - * - Decodes the Mapping flags, rw_flag, priv and aperture for GMMU mapping. - * - Allocates a new GPU VA range for a specific size. - * #nvgpu_vm_alloc_va() reserves the GPU VA. - * - Program PDE and PTE entry with PA/IPA, mapping flags, rw_flag and aperture - * information. #nvgpu_gmmu_update_page_table does the pde and pte updates. - * - Chip specific stuff is handled at the PTE/PDE programming HAL layer. - * GMMU level entry format will be different for each GPU family - * (i.e, gv11b, gp10b). - * - Invalidates the GPU TLB, gm20b_fb_tlb_invalidate does the tlb invalidate. - * - Release the VM GMMU lock. + * Retrives the nvgpu_sgt which contains the memory handle information. + * Acquires the VM GMMU lock to the avoid race. + * Decodes the Mapping flags, rw_flag, priv and aperture for GMMU mapping. + * Allocates a new GPU VA range for a specific size.#nvgpu_vm_alloc_va() reserves + * the GPU VA. + * Program PDE and PTE entry with PA/IPA, mapping flags, rw_flag and aperture + * information. #nvgpu_gmmu_update_page_table() does the pde and pte updates. + * Chip specific stuff is handled at the PTE/PDE programming HAL layer. GMMU level + * entry format will be different for each GPU family (i.e, gv11b, gp10b). + * Internally nvgpu_set_pd_level() program the different level of page table. + * Invalidates the GPU TLB, gm20b_fb_tlb_invalidate does the tlb invalidate. + * Release the VM GMMU lock. + * + * @return valid GMMU VA start address in case of success. + * @retval 0 in case of all possible failures. + * Possible Failure cases: + * - Memory handle is invalid. + * - No free GPU VA space (GPU VA space full). + * - TLB invalidate timeout. + * - invalid inputs. * - * @return valid GMMU VA start address in case of success. - * 0 in case of all possible failures. - * Possible Failure cases: - * - Memory handle is invalid. - * - No free GPU VA space (GPU VA space full). - * - TLB invalidate timeout. */ u64 nvgpu_gmmu_map(struct vm_gk20a *vm, struct nvgpu_mem *mem, @@ -293,7 +301,9 @@ u64 nvgpu_gmmu_map(struct vm_gk20a *vm, enum nvgpu_aperture aperture); /** - * @brief Map memory into the GMMU at a fixed address. + * @brief Map memory into the GMMU at a fixed address. This is required to + * make the parrticular context on the GR, CE to access the given virtual + * address. * * @param vm [in] Pointer to virtual memory structure. * @param mem [in] Structure for storing the memory information. @@ -303,28 +313,33 @@ u64 nvgpu_gmmu_map(struct vm_gk20a *vm, * - Min: NVGPU_VM_MAP_FIXED_OFFSET * - Max: NVGPU_VM_MAP_PLATFORM_ATOMIC * @param rw_flag [in] Flag designates the requested GMMU mapping. + * - Min: gk20a_mem_flag_none + * - Max: gk20a_mem_flag_write_only * @param priv [in] True if the mapping should be Privileged. * @param aperture [in] Where the memory actually was allocated from. - * + * - Min: APERTURE_SYSMEM + * - Max: APERTURE_VIDMEM * * GMMU Map at a fixed address: - * - Retrives the nvgpu_sgt which contains the memory handle information. - * - Acquires the VM GMMU lock to the avoid race. - * - Decodes the Mapping flags, rw_flag, priv and aperture for GMMU mapping. - * - Program PDE and PTE entry with PA/IPA, mapping flags, rw_flag and aperture - * information. #nvgpu_gmmu_update_page_table does the pde and pte updates. - * - Chip specific stuff is handled at the PTE/PDE programming HAL layer. - * GMMU level entry format will be different for each GPU family - * (i.e, gv11b, gp10b). - * - Invalidates the GPU TLB, gm20b_fb_tlb_invalidate does the tlb invalidate. - * - Release the VM GMMU lock. + * Retrives the nvgpu_sgt which contains the memory handle information. + * Acquires the VM GMMU lock to the avoid race. + * Decodes the Mapping flags, rw_flag, priv and aperture for GMMU mapping. + * Program PDE and PTE entry with PA/IPA, mapping flags, rw_flag and aperture + * information. #nvgpu_gmmu_update_page_table does the pde and pte updates. + * Chip specific stuff is handled at the PTE/PDE programming HAL layer. + * GMMU level entry format will be different for each GPU family (i.e, gv11b, gp10b). + * Internally nvgpu_set_pd_level() will be called to program the different level of + * the page table. + * Invalidates the GPU TLB, gm20b_fb_tlb_invalidate does the tlb invalidate. + * Release the VM GMMU lock. * - * @return valid GMMU VA start address in case of success. - * 0 in case of all possible failures. - * Possible Failure cases: - * - Memory handle is invalid. - * - No free GPU VA space at @addr passed by client. - * - TLB invalidate timeout. + * @return valid GMMU VA start address in case of success. + * @return 0 in case of all possible failures. + * Possible Failure cases: + * - Memory handle is invalid. + * - No free GPU VA space at @addr passed by client. + * - TLB invalidate timeout. + * - invalid inputs. */ u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm, struct nvgpu_mem *mem, @@ -337,6 +352,7 @@ u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm, /** * @brief Unmap a memory mapped by nvgpu_gmmu_map()/nvgpu_gmmu_map_fixed(). + * This is required to remove the translations from the GPU page table. * * @param vm [in] Pointer to virtual memory structure. * @param mem [in] Structure for storing the memory information. @@ -345,19 +361,19 @@ u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm, * Core GMMU unmap function for the nvgpu to use. * * GMMU Unmap: - * - Acquires the VM GMMU lock to the avoid race. - * - Free the reserved GPU VA space staring at @gpu_va. - * #nvgpu_vm_free_va does free the GPU VA space. - * - Program PDE and PTE entry with default information which is internally - * frees up the GPU VA space. - * - Chip specific stuff is handled at the PTE/PDE programming HAL layer. - * GMMU level entry format will be different for each GPU family - * (i.e, gv11b). - * - Flush the GPU L2. gv11b_mm_l2_flush does the L2 flush. - * - Invalidates the GPU TLB, gm20b_fb_tlb_invalidate does the tlb invalidate. - * - Release the VM GMMU lock. + * Acquires the VM GMMU lock to the avoid race. + * Free the reserved GPU VA space staring at @gpu_va. + * #nvgpu_vm_free_va does free the GPU VA space. + * Program PDE and PTE entry with default information which is internally + * frees up the GPU VA space. + * Chip specific stuff is handled at the PTE/PDE programming HAL layer. + * GMMU level entry format will be different for each GPU family + * (i.e, gv11b). + * Flush the GPU L2. gv11b_mm_l2_flush does the L2 flush. + * Invalidates the GPU TLB, gm20b_fb_tlb_invalidate does the tlb invalidate. + * Release the VM GMMU lock. * - * @return None. + * @return None. */ void nvgpu_gmmu_unmap(struct vm_gk20a *vm, struct nvgpu_mem *mem, @@ -369,14 +385,13 @@ void nvgpu_gmmu_unmap(struct vm_gk20a *vm, * @param g [in] The GPU. * * Compute number of words in a PTE: - * - Iterate to the PTE level. The levels array is always NULL terminated. - * GMMU level entry format will be different for each GPU family - * (i.e, gv11b). + * Iterate to the PTE level. The levels array is always NULL terminated. + * GMMU level entry format will be different for each GPU family + * (i.e, gv11b). * * This computes and returns the size of a PTE for the passed chip. * - * @return number of words in a PTE in case of success. - * 0 in case failure. + * @return number of words in a PTE in case of success. */ u32 nvgpu_pte_words(struct gk20a *g); @@ -389,15 +404,16 @@ u32 nvgpu_pte_words(struct gk20a *g); * @param pte [out] Set to the contents of the PTE. * * Get the contents of a PTE: - * - Find a PTE in the passed VM based on the passed GPU virtual address. This - * will @pte with a copy of the contents of the PTE. @pte must be an array of - * u32s large enough to contain the PTE. This can be computed using - * nvgpu_pte_words(). + * Find a PTE in the passed VM based on the passed GPU virtual address. This + * will @pte with a copy of the contents of the PTE. @pte must be an array of + * u32s large enough to contain the PTE. This can be computed using + * nvgpu_pte_words(). * * If you wish to write to this PTE then you may modify @pte and then use the * nvgpu_set_pte(). * - * @return 0 if the PTE is found and -EINVAL otherwise. + * @return 0 if the PTE is found. + * @retval -EINVAL If any of the compuation fails inside. */ int nvgpu_get_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte); @@ -410,17 +426,18 @@ int nvgpu_get_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte); * @param pte [in] The contents of the PTE to write. * * Set the contents of a PTE: - * - Find a PTE and overwrite the contents of that PTE with the passed in data - * located in @pte. If the PTE does not exist then no writing will happen. - * That is this function will not fill out the page tables for you. - * The expectation is that the passed @vaddr has already been mapped and - * this is just modifying the mapping - * (for instance changing invalid to valid). + * Find a PTE and overwrite the contents of that PTE with the passed in data + * located in @pte by calling nvgpu_locate_pte(). If the PTE does not exist + * then no writing will happen. That is this function will not fill out the + * page tables for you. The expectation is that the passed @vaddr has already + * been mapped and this is just modifying the mapping (for instance changing + * invalid to valid). * * @pte must contain at least the required words for the PTE. See * nvgpu_pte_words(). * - * @return 0 on success and -EINVAL otherwise. + * @return 0 on success. + * @retval -EINVAL for failure. */ int nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte); @@ -431,47 +448,53 @@ int nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte); /** * @brief Mutex Locked version of map memory routine. * - * @param vm [in] Pointer to virtual memory structure. - * @param vaddr [in] GPU virtual address. - * @param sgt [in] Pointer to scatter gather table for - * direct "physical" nvgpu_mem structures. + * @param vm [in] Pointer to virtual memory structure. + * @param vaddr [in] GPU virtual address. + * @param sgt [in] Pointer to scatter gather table for + * direct "physical" nvgpu_mem structures. * @param buffer_offset [in] Offset address from start of the memory. - * @param size [in] Size of the buffer in bytes. - * @param pgsz_idx [in] Index into the page size table. - * - Min: GMMU_PAGE_SIZE_SMALL - * - Max: GMMU_PAGE_SIZE_KERNEL - * @param kind_v [in] Kind attributes for mapping. + * @param size [in] Size of the buffer in bytes. + * @param pgsz_idx [in] Index into the page size table. + * - Min: GMMU_PAGE_SIZE_SMALL + * - Max: GMMU_PAGE_SIZE_KERNEL + * @param kind_v [in] Kind attributes for mapping. * @param ctag_offset [in] Size of the buffer in bytes. * @param flags [in] Mapping flags. * - Min: NVGPU_VM_MAP_FIXED_OFFSET * - Max: NVGPU_VM_MAP_PLATFORM_ATOMIC - * @param rw_flag [in] Flag designates the requested GMMU mapping. + * @param rw_flag [in] Flag designates the requested GMMU mapping. + * - Min: gk20a_mem_flag_none + * - Max: gk20a_mem_flag_write_only * @param clear_ctags [in] True if ctags clear is required. - * @param sparse [in] True if the mapping should be sparse. - * @param priv [in] True if the mapping should be Privileged. - * @param batch [in] Mapping_batch handle. Structure which track - * whether the L2 flush and TLB invalidate is - * required or not during map/unmap. - * @param aperture [in] Where the memory actually was allocated from. + * @param sparse [in] True if the mapping should be sparse. + * @param priv [in] True if the mapping should be Privileged. + * @param batch [in] Mapping_batch handle. Structure which track + * whether the L2 flush and TLB invalidate is + * required or not during map/unmap. + * @param aperture [in] Where the memory actually was allocated from. + * - Min: APERTURE_SYSMEM + * - Max: APERTURE_VIDMEM * * Native GPU "HAL" functions for GMMU Map. * * Locked version of GMMU Map routine: - * - Decodes the Mapping flags, rw_flag, priv and aperture for GMMU mapping. - * - Allocates a new GPU VA range for a specific size if vaddr is 0. - * #nvgpu_vm_alloc_va() reserves the GPU VA. - * - Program PDE and PTE entry with PA/IPA, mapping flags, rw_flag and aperture - * information. #nvgpu_gmmu_update_page_table does the pde and pte updates. - * - Chip specific stuff is handled at the PTE/PDE programming HAL layer. - * GMMU level entry format will be different for each GPU family - * (i.e, gv11b, gp10b). - * - Invalidates the GPU TLB, gm20b_fb_tlb_invalidate does the tlb invalidate. + * Decodes the Mapping flags, rw_flag, priv and aperture for GMMU mapping. + * Allocates a new GPU VA range for a specific size if vaddr is 0. + * #nvgpu_vm_alloc_va() reserves the GPU VA. + * Program PDE and PTE entry with PA/IPA, mapping flags, rw_flag and aperture + * information. #nvgpu_gmmu_update_page_table does the pde and pte updates. + * Chip specific stuff is handled at the PTE/PDE programming HAL layer. + * GMMU level entry format will be different for each GPU family + * (i.e, gv11b, gp10b). + * Invalidates the GPU TLB, gm20b_fb_tlb_invalidate does the tlb invalidate. * - * @return valid GMMU VA start address in case of success. - * 0 in case of all possible failures. - * Possible Failure cases: - * - No free GPU VA space (GPU VA space full). - * - TLB invalidate timeout. + * @return valid GMMU VA start address in case of success. + * @retval 0 in case of all possible failures. + * Possible Failure cases: + * - No free GPU VA space (GPU VA space full). + * - TLB invalidate timeout. + * - Any of the invlaid input parameters. + * - Failure inside any of the functions called. */ u64 nvgpu_gmmu_map_locked(struct vm_gk20a *vm, u64 vaddr, @@ -500,6 +523,8 @@ u64 nvgpu_gmmu_map_locked(struct vm_gk20a *vm, * - Max: GMMU_PAGE_SIZE_KERNEL * @param va_allocated [in] Indicates if gpu_va address is valid/allocated. * @param rw_flag [in] Flag designates the requested GMMU mapping. + * - Min: gk20a_mem_flag_none + * - Max: gk20a_mem_flag_write_only * @param sparse [in] True if the mapping should be sparse. * @param batch [in] Mapping_batch handle. Structure which track * whether the L2 flush and TLB invalidate is @@ -508,17 +533,17 @@ u64 nvgpu_gmmu_map_locked(struct vm_gk20a *vm, * Native GPU "HAL" functions for GMMU Unmap. * * Locked version of GMMU Unmap routine: - * - Free the reserved GPU VA space staring at @gpu_va. - * #nvgpu_vm_free_va does free the GPU VA space. - * - Program PDE and PTE entry with default information which is internally - * frees up the GPU VA space. - * - Chip specific stuff is handled at the PTE/PDE programming HAL layer. - * GMMU level entry format will be different for each GPU family - * (i.e, gv11b). - * - Flush the GPU L2. gv11b_mm_l2_flush does the L2 flush. - * - Invalidates the GPU TLB, gm20b_fb_tlb_invalidate does the tlb invalidate. + * Free the reserved GPU VA space staring at \a gpu_va. + * #nvgpu_vm_free_va does free the GPU VA space. + * Program PDE and PTE entry with default information which is internally + * frees up the GPU VA space. + * Chip specific stuff is handled at the PTE/PDE programming HAL layer. + * GMMU level entry format will be different for each GPU family + * (i.e, gv11b). + * Flush the GPU L2. gv11b_mm_l2_flush does the L2 flush. + * Invalidates the GPU TLB, gm20b_fb_tlb_invalidate does the tlb invalidate. * - * @return None. + * @return None. */ void nvgpu_gmmu_unmap_locked(struct vm_gk20a *vm, u64 vaddr, diff --git a/drivers/gpu/nvgpu/include/nvgpu/pd_cache.h b/drivers/gpu/nvgpu/include/nvgpu/pd_cache.h index 16b166775..29bbc456e 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/pd_cache.h +++ b/drivers/gpu/nvgpu/include/nvgpu/pd_cache.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -83,16 +83,20 @@ struct nvgpu_gmmu_pd { * @param bytes [in] PD size. * * Allocates a page directory: - * - Allocates the DMA memory for a page directory. - * This handles the necessary PD cache logistics. Since Parker and - * later GPUs, some of the page directories are smaller than a page. - * Hence, packing these PDs together saves a lot of memory. - * - If PD is bigger than a page just do a regular DMA alloc. - * #nvgpu_pd_cache_alloc_direct() does the pd cache allocation. + * Allocates the DMA memory for a page directory. + * This handles the necessary PD cache logistics. Since Parker and + * later GPUs, some of the page directories are smaller than a page. + * Hence, packing these PDs together saves a lot of memory. + * If PD is bigger than a page just do a regular DMA alloc. + * #nvgpu_pd_cache_alloc_direct() does the pd cache allocation. * * - * @return 0 in case of success. - * -ENOMEM (< 0) in case of failure. + * @return 0 in case of success. + * @retval -ENOMEM in case of failure. Reasons can be any one + * of the following + * --kzalloc failure. + * --failures internal to dma alloc* functions. + * */ int nvgpu_pd_alloc(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, u32 bytes); @@ -103,10 +107,11 @@ int nvgpu_pd_alloc(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, u32 bytes); * @param pd [in] Pointer to pd_cache memory structure. * * Free the Page Directory DMA memory: - * - Free the DMA memory allocated using nvgpu_pd_alloc. - * #nvgpu_pd_cache_free_direct() frees the pd cache. + * Free the DMA memory allocated using nvgpu_pd_alloc by + * calling #nvgpu_pd_cache_free_direct(). + * Call #nvgpu_pd_cache_free() if the pd is cached. * - * @return None + * @return None */ void nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd); @@ -116,11 +121,13 @@ void nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd); * @param g [in] The GPU. * * Initialize the pd_cache: - * - Allocates the zero initialized memory area for #nvgpu_pd_cache. - * - Initializes the mutexes and list nodes for pd_cache tracking stuff. + * Allocates the zero initialized memory area for #nvgpu_pd_cache. + * Initializes the mutexes and list nodes for pd_cache tracking stuff. + * Make sure not to reinitialize the pd_cache again by initilalizing + * mm.pd_cache. * - * @return 0 in case of success. - * -ENOMEM (< 0) in case of failure. + * @return 0 in case of success. + * @retval -ENOMEM in case of kzalloc failure. */ int nvgpu_pd_cache_init(struct gk20a *g); @@ -130,11 +137,12 @@ int nvgpu_pd_cache_init(struct gk20a *g); * @param g [in] The GPU. * * Free the pd_cache: - * - Reset the list nodes used for pd_cache tracking stuff. - * - Free the #nvgpu_pd_cache internal structure allocated - * by nvgpu_pd_cache_init(). + * Reset the list nodes used for pd_cache tracking stuff. + * Free the #nvgpu_pd_cache internal structure allocated + * by nvgpu_pd_cache_init(). + * Reset the mm.pd_cache to NULL. * - * @return None + * @return None */ void nvgpu_pd_cache_fini(struct gk20a *g); @@ -149,10 +157,10 @@ void nvgpu_pd_cache_fini(struct gk20a *g); * - Max: GMMU_PAGE_SIZE_KERNEL * * Compute the pd offset: - * - ((@pd_idx * GMMU level entry size / 4). + * ((@pd_idx * GMMU level entry size / 4). + * + * @return pd offset at \a pd_idx. * - * @return valid pd offset in case of valid @pd_idx. - * Invalid pd offset in case of invalid/random @pd_idx. */ u32 nvgpu_pd_offset_from_index(const struct gk20a_mmu_level *l, u32 pd_idx); @@ -165,10 +173,10 @@ u32 nvgpu_pd_offset_from_index(const struct gk20a_mmu_level *l, u32 pd_idx); * @param data [in] Data to write into pd mem. * * Write data content into pd mem: - * - Offset = ((start address of the pd / 4 + @w). - * - Write data content into offset address. + * Offset = ((start address of the pd / 4 + @w). + * Write data content into offset address by calling #nvgpu_mem_wr32(). * - * @return None + * @return None */ void nvgpu_pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd, size_t w, u32 data); @@ -180,15 +188,30 @@ void nvgpu_pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd, * @param pd [in] Pointer to GMMU page directory structure. * * Write data content into pd mem: - * - Return the _physical_ address of a page directory for GMMU programming. - * - PD base in context inst block. - * #nvgpu_mem_get_addr returns the _physical_ address of pd mem. + * Return the _physical_ address of a page directory for GMMU programming. + * PD base in context inst block. + * #nvgpu_mem_get_addr returns the _physical_ address of pd mem. * - * @return valid pd physical address in case of valid pd mem. - * Invalid pd physical address in case of invalid/random pd mem. + * @return pd physical address in case of valid pd mem. + * @retval Zero in case of invalid/random pd mem. */ u64 nvgpu_pd_gpu_addr(struct gk20a *g, struct nvgpu_gmmu_pd *pd); +/** + * @brief Allocate memory for a page directory. + * + * @param g [in] The GPU. + * @param pd [in] Pointer to GMMU page directory structure. + * + * - Set NVGPU_DMA_PHYSICALLY_ADDRESSED if \a bytes is more than + * NVGPU_CPU_PAGE_SIZE. + * - Call #nvgpu_dma_alloc_flags() to allocate dmaable memory for + * pd. + * + * @return Zero For succcess. + * @retval -ENOMEM For any allocation failure. + */ int nvgpu_pd_cache_alloc_direct(struct gk20a *g, struct nvgpu_gmmu_pd *pd, u32 bytes); + #endif