From 73f07366c3f34885ecaf4f33a4a094aa1c046c8f Mon Sep 17 00:00:00 2001
From: dt <dt@nvidia.com>
Date: Wed, 20 Jan 2021 11:25:37 +0000
Subject: [PATCH] gpu: nvgpu: Add doxygen update for common mm unit

This is adding some doxygen comments for common mm unit
that includes adding return values, description and
some format changes.

JIRA NVGPU-6381

Change-Id: Ibbe1af5b9e4356bf02bb591116e08735ce77b323
Signed-off-by: dt <dt@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2472907
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Lakshmanan M <lm@nvidia.com>
Reviewed-by: Vaibhav Kachore <vkachore@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 .../common/mm/allocators/bitmap_allocator.c   |  10 +-
 .../common/mm/allocators/buddy_allocator.c    |  46 +++-
 drivers/gpu/nvgpu/include/nvgpu/allocator.h   | 199 ++++++++++++--
 drivers/gpu/nvgpu/include/nvgpu/as.h          |  39 ++-
 drivers/gpu/nvgpu/include/nvgpu/dma.h         |  98 ++++---
 drivers/gpu/nvgpu/include/nvgpu/gmmu.h        | 255 ++++++++++--------
 drivers/gpu/nvgpu/include/nvgpu/pd_cache.h    |  85 +++---
 7 files changed, 514 insertions(+), 218 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/mm/allocators/bitmap_allocator.c b/drivers/gpu/nvgpu/common/mm/allocators/bitmap_allocator.c
index a333bcf68..25976a48f 100644
--- a/drivers/gpu/nvgpu/common/mm/allocators/bitmap_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/allocators/bitmap_allocator.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2021, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -206,6 +206,14 @@ static int nvgpu_bitmap_store_alloc(struct nvgpu_bitmap_allocator *a,
 /*
  * @len is in bytes. This routine will figure out the right number of bits to
  * actually allocate. The return is the address in bytes as well.
+ *
+ * This is a find-first-fit allocator.
+ * Check the input parameter validity.
+ * Acquire the alloc_lock.
+ * Searche a bitmap for the first space that is large enough to satisfy the
+ *  requested size of bits by walking the next available free blocks by
+ *  bitmap_find_next_zero_area().
+ * Release the alloc_lock.
  */
 static u64 nvgpu_bitmap_balloc(struct nvgpu_allocator *na, u64 len)
 {
diff --git a/drivers/gpu/nvgpu/common/mm/allocators/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/allocators/buddy_allocator.c
index e0233af70..636dc46c1 100644
--- a/drivers/gpu/nvgpu/common/mm/allocators/buddy_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/allocators/buddy_allocator.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2021, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -287,6 +287,10 @@ cleanup:
 
 /*
  * Clean up and destroy the passed allocator.
+ * Walk the allocator for any pending allocations.
+ * Free up all pending allocations.
+ * Free any memory allocated a allocator init time.
+ * Destroy the lock and bzero the allocator completely.
  */
 static void nvgpu_buddy_allocator_destroy(struct nvgpu_allocator *na)
 {
@@ -903,6 +907,14 @@ static void balloc_do_free_fixed(struct nvgpu_buddy_allocator *a,
 
 /*
  * Allocate memory from the passed allocator.
+ * Acquire the allocator lock.
+ * Compute the order by calling balloc_get_order().
+ * Compute the pte size supported for this allocation by calling
+ *  nvgpu_balloc_page_size_to_pte_size().
+ * If we could not satisfy the required size buddy then call
+ *  balloc_split_buddy() to get the requiredsize by dividing the large size buddy.
+ * Free the remaining buddy to the respective list.
+ * Release the alloc_lock.
  */
 static u64 nvgpu_buddy_balloc_pte(struct nvgpu_allocator *na, u64 len,
 				  u32 page_size)
@@ -959,7 +971,15 @@ static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *na, u64 len)
 }
 
 /*
- * Requires @na to be locked.
+ * Check the input parameter validity.
+ * Acquire the alloc_lock.
+ * Compute the order with respective to the input size.
+ * Compute the pte_size for the given page size and return error for
+ *  invalid pte size.
+ * Call balloc_is_range_free() to get the free range with the address given.
+ * Call balloc_make_fixed_buddy() to generate the list of buddies.
+ * Make the book keeping of allocated objects to the respective lists.
+ * Release the alloc_lock.
  */
 static u64 nvgpu_balloc_fixed_buddy_locked(struct nvgpu_allocator *na,
 					   u64 base, u64 len, u32 page_size)
@@ -1144,6 +1164,12 @@ static bool nvgpu_buddy_reserve_is_possible(struct nvgpu_buddy_allocator *a,
 /*
  * Carveouts can only be reserved before any regular allocations have been
  * made.
+ * - Check the validity of input paramemters.
+ * - Acquire the allocator lock.
+ * - Call nvgpu_balloc_fixed_buddy_locked() to reserve the object
+ *   with \a co.base and \a co.length.
+ * - Add the allocated object to the book keeping list.
+ * - Release the allocator lock.
  */
 static int nvgpu_buddy_reserve_co(struct nvgpu_allocator *na,
 				  struct nvgpu_alloc_carveout *co)
@@ -1189,6 +1215,15 @@ done:
 
 /*
  * Carveouts can be release at any time.
+ * - Acquire the allocator lock.
+ * - Remove the carve out from the allocator list.
+ * - Call nvgpu_buddy_bfree_locked() to free the carve out
+ *   - nvgpu_buddy_bfree_locked() will first check the address is fixed
+ *     or not by calling balloc_free_fixed().
+ *   - If the address is fixed then free it by calling balloc_do_free_fixed().
+ *   - Else free it through balloc_free_buddy().
+ *   - Recompute the size of the allocator and coalesce the objects.
+ * - Release the lock.
  */
 static void nvgpu_buddy_release_co(struct nvgpu_allocator *na,
 				   struct nvgpu_alloc_carveout *co)
@@ -1230,7 +1265,12 @@ static u64 nvgpu_buddy_alloc_end(struct nvgpu_allocator *a)
 
 	return ba->end;
 }
-
+/*
+ * - Acquire the allocator lock.
+ * - Check the availability of space between start and end of
+ *   the allocator.
+ * - Release the allocator lock.
+ */
 static u64 nvgpu_buddy_alloc_space(struct nvgpu_allocator *a)
 {
 	struct nvgpu_buddy_allocator *ba = buddy_allocator(a);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/allocator.h b/drivers/gpu/nvgpu/include/nvgpu/allocator.h
index 26ed43103..6b8a11c11 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/allocator.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/allocator.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2021, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -399,10 +399,33 @@ static inline void alloc_unlock(struct nvgpu_allocator *a)
  * @param[in] blk_size	Block size of buddy allocator.
  * @param[in] max_order	Maximum allowed buddy order.
  * @param[in] flags	Flags indicating buddy allocator conditions.
+ *			Valid flags are
+ *			- GPU_ALLOC_GVA_SPACE
+ *			- GPU_ALLOC_NO_ALLOC_PAGE
+ *			- GPU_ALLOC_4K_VIDMEM_PAGES
+ *			- GPU_ALLOC_FORCE_CONTIG
+ *			- GPU_ALLOC_NO_SCATTER_GATHER
  *
- * @return 0 in case of success, < 0 otherwise.
- * @retval -EINVAL in case of incorrect input value.
- * @retval -ENOMEM in case there is not enough memory for allocation.
+ * Construct a buddy allocator in \a na. A buddy allocator manages memory by
+ * splitting all memory into "buddies" - pairs of adjacent blocks of memory.
+ * Each buddy can be further subdivided into buddies, again, allowing for
+ * arbitrary power-of-two sized blocks to be allocated.
+ *
+ * Call nvgpu_buddy_check_argument_limits() to check the validity of the
+ *  inputs. This function verifies that the input arguments are valid for a
+ *  buddy allocator. Specifically the #block_size of a buddy allocator must be
+ *  a power-of-two, #max_order must be less than #GPU_BALLOC_MAX_ORDER,
+ *  and size must be non-zero.
+ * Call nvgpu_alloc_common_init() to initialize the basic operations like
+ *  allocation, free and query opearions for the respective allocator.
+ * Initialize some lists and locks to maintain the allocator objects.
+ *
+ * @return	0 in case of success, < 0 otherwise.
+ * @retval 	-EINVAL in case of incorrect input value.
+ * 		- When \a length is zero.
+ * 		- \a blk_size is not a power of two.
+ * 		- \a base and \a length is not aligned with \a blk_size.
+ * @retval 	-ENOMEM in case there is no enough memory for allocation.
  */
 int nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *na,
 			       struct vm_gk20a *vm, const char *name,
@@ -419,10 +442,26 @@ int nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *na,
  * @param[in] length	Size of bitmap allocator.
  * @param[in] blk_size	Block size of bitmap allocator.
  * @param[in] flags	Flags indicating bitmap allocator conditions.
+ *			Valid flags are
+ *			- GPU_ALLOC_GVA_SPACE
+ *			- GPU_ALLOC_NO_ALLOC_PAGE
+ *			- GPU_ALLOC_FORCE_CONTIG
+ *			- GPU_ALLOC_NO_SCATTER_GATHER
  *
- * @return 0 in case of success, < 0 otherwise.
- * @retval -EINVAL in case of incorrect input value.
- * @retval -ENOMEM in case there is not enough memory for allocation.
+ * Call nvgpu_bitmap_check_argument_limits() to check the validity
+ *  of input paramemeters. This function verifies the input parameters
+ *  are valid specifically the #block_size of a bitmap allocator must be
+ *  a power-of-two, #base and #length must be aligned with #blk_size.
+ * Call nvgpu_alloc_common_init() to initialize the basic operations like
+ *  allocation, free and query opearions for the respective allocator.
+ * Initialize some lists and locks to maintain the allocator objects.
+ *
+ * @return	0 in case of success, < 0 otherwise.
+ * @retval	-EINVAL in case of incorrect input value.
+ * 		- When \a length is zero.
+ * 		- \a blk_size is not a power of two.
+ * 		- \a base and \a length is not aligned with \a blk_size.
+ * @retval	-ENOMEM in case there is no enough memory for allocation.
  */
 int nvgpu_bitmap_allocator_init(struct gk20a *g, struct nvgpu_allocator *na,
 				const char *name, u64 base, u64 length,
@@ -439,11 +478,16 @@ int nvgpu_bitmap_allocator_init(struct gk20a *g, struct nvgpu_allocator *na,
  * @param[in] base	Base address of page allocator.
  * @param[in] length	Size of page allocator.
  * @param[in] blk_size	Block size of page allocator.
- * @param[in] flags	Flags indicating page allocator conditions.
- *
- * @return 0 in case of success, < 0 otherwise.
- * @retval -EINVAL in case of incorrect input value.
- * @retval -ENOMEM in case there is not enough memory for allocation.
+ * @param[in] flags	Flags indicating page allocator conditions. Valid
+ *			flags are
+ *			- GPU_ALLOC_GVA_SPACE
+ *			- GPU_ALLOC_NO_ALLOC_PAGE
+ * 			- GPU_ALLOC_4K_VIDMEM_PAGES
+ *			- GPU_ALLOC_FORCE_CONTIG
+ *			- GPU_ALLOC_NO_SCATTER_GATHER
+ * @return	0 in case of success, < 0 otherwise.
+ * @retval	-EINVAL in case of incorrect input value.
+ * @retval	-ENOMEM in case there is no enough memory for allocation.
  */
 int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *na,
 			      const char *name, u64 base, u64 length,
@@ -464,15 +508,53 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *na,
  * @param[in] max_order		Max order of resource slices that can be
  *				allocated. Applicable to buddy allocator only.
  * @param[in] flags		Flags indicating additional conditions.
- * @param[in] alloc_type	Allocator type.
-
- * Returns 0 in case of success, < 0 otherwise.
+ *				Valid flags are
+ *				- GPU_ALLOC_GVA_SPACE
+ *				- GPU_ALLOC_NO_ALLOC_PAGE
+ *				- GPU_ALLOC_4K_VIDMEM_PAGES
+ *				- GPU_ALLOC_FORCE_CONTIG
+ *				- GPU_ALLOC_NO_SCATTER_GATHER
+ * @param[in] alloc_type	Allocator type. Valid types are
+ *				- BUDDY_ALLOCATOR
+ *				- PAGE_ALLOCATOR
+ *				- BITMAP_ALLOCATOR
+ *
+ * Call *allocator_init() to initialize the respective allocators.
+ *
+ * @return	0 in case of success, < 0 otherwise.
+ * @retval	-EINVAL in allocator type is not valid.
+ * @retval	-EINVAL in case of incorrect input value.
+ * 		- When \a length is zero.
+ * 		- \a blk_size is not a power of two.
+ * 		- \a base and \a length is not aligned with \a blk_size.
+ * @retval	-ENOMEM in case there is no enough memory for allocation.
  */
 int nvgpu_allocator_init(struct gk20a *g, struct nvgpu_allocator *na,
 			      struct vm_gk20a *vm, const char *name,
 			      u64 base, u64 length, u64 blk_size, u64 max_order,
 			      u64 flags, enum nvgpu_allocator_type alloc_type);
 
+#ifdef CONFIG_NVGPU_FENCE
+/**
+ * @brief Initialize lockless allocator.
+ *
+ * @param[in] g		Pointer to GPU structure.
+ * @param[in] na	Pointer to allocator structure.
+ * @param[in] name	Name of lockless allocator.
+ * @param[in] base	Base address of lockless allocator.
+ * @param[in] length	Size of lockless allocator.
+ * @param[in] blk_size	Block size of lockless allocator.
+ * @param[in] flags	Flags indicating lockless allocator conditions.
+ *
+ * @return 0 in case of success, < 0 otherwise.
+ * @retval -EINVAL in case of incorrect input value.
+ * @retval -ENOMEM in case there is no enough memory for allocation.
+ */
+int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *na,
+				  const char *name, u64 base, u64 length,
+				  u64 blk_size, u64 flags);
+#endif
+
 /**
  * Largest block of resources that fits in address space.
  */
@@ -484,7 +566,16 @@ int nvgpu_allocator_init(struct gk20a *g, struct nvgpu_allocator *na,
  * @param[in] a		Pointer to nvgpu allocator.
  * @param[in] len	Size of allocation.
  *
- * @return Address of allocation in case of success, 0 otherwise.
+ * Invoke the underlying allocator's implementation of the alloc
+ * operation.
+ *
+ * @return	Address of allocation in case of success,
+ * 		0 otherwise.
+ * @retval	0 For the failure and reasons can be one of the following
+ * 		- input parameters are not valid.
+ * 		- There is no free space available in the allocator.
+ * 		- zalloc failure for no memory conditions.
+ *
  */
 u64  nvgpu_alloc(struct nvgpu_allocator *a, u64 len);
 
@@ -495,7 +586,14 @@ u64  nvgpu_alloc(struct nvgpu_allocator *a, u64 len);
  * @param[in] len	Size of allocation.
  * @param[in] page_size	Page size of resource.
  *
- * @return Address of allocation in case of success, 0 otherwise.
+ *
+ * Invoke the underlying allocator's implementation of the alloc_pte
+ * operation.
+ *
+ * @return 	Address of allocation in case of success, 0 otherwise.
+ * @retval	0 For the failure and reasons can be one of the following
+ * 		- input parameters are not valid.
+ * 		- There is no free space available in the allocator.
  */
 u64  nvgpu_alloc_pte(struct nvgpu_allocator *a, u64 len, u32 page_size);
 
@@ -504,6 +602,11 @@ u64  nvgpu_alloc_pte(struct nvgpu_allocator *a, u64 len, u32 page_size);
  *
  * @param[in] a		Pointer to nvgpu allocator.
  * @param[in] addr	Base address of allocation.
+ *
+ * Invoke the underlying allocator's implementation of the free
+ * operation.
+ *
+ * @return	None
  */
 void nvgpu_free(struct nvgpu_allocator *a, u64 addr);
 
@@ -515,7 +618,13 @@ void nvgpu_free(struct nvgpu_allocator *a, u64 addr);
  * @param[in] len	Size of allocation.
  * @param[in] page_size	Page size of resource.
  *
- * @return Address of allocation in case of success, 0 otherwise.
+ * Invoke the underlying allocator's implementation of the alloc_fixed
+ * operation.
+ *
+ * @return	Address of allocation in case of success.
+ * @retval	0 For failure, in any of the reasons below
+ *		 invalid inputs.
+ *		 space unavailability to satisfy the requirement.
  */
 u64  nvgpu_alloc_fixed(struct nvgpu_allocator *a, u64 base, u64 len,
 		       u32 page_size);
@@ -526,6 +635,11 @@ u64  nvgpu_alloc_fixed(struct nvgpu_allocator *a, u64 base, u64 len,
  * @param[in] a		Pointer to nvgpu allocator.
  * @param[in] base	Start address of resource.
  * @param[in] len	Size of allocation.
+ *
+ * Invoke the underlying allocator's implementation of the free_fixed
+ * operation.
+ *
+ * @return	None.
  */
 void nvgpu_free_fixed(struct nvgpu_allocator *a, u64 base, u64 len);
 
@@ -535,7 +649,15 @@ void nvgpu_free_fixed(struct nvgpu_allocator *a, u64 base, u64 len);
  * @param[in] a		Pointer to nvgpu allocator.
  * @param[in] co	Pointer to carveout structure.
  *
- * @return 0 in case of success, < 0 in case of failure.
+ * Invoke the underlying allocator's implementation of the
+ * alloc_reserve_carveout operation.
+ *
+ * @return	0 in case of success, < 0 in case of failure.
+ * @retval	-EINVAL For invalid input parameters.
+ * @retval	-EBUSY For the unavailability of the base of the
+ * 		the carveout.
+ * @retval	-ENOMEM For unavailability of the object.
+ *
  */
 int  nvgpu_alloc_reserve_carveout(struct nvgpu_allocator *a,
 				  struct nvgpu_alloc_carveout *co);
@@ -545,6 +667,11 @@ int  nvgpu_alloc_reserve_carveout(struct nvgpu_allocator *a,
  *
  * @param a		Pointer to nvgpu allocator.
  * @param co		Pointer to carveout structure.
+ *
+ * Invoke the underlying allocator's implementation of the release_carveout
+ * operation.
+ *
+ * @return	None.
  */
 void nvgpu_alloc_release_carveout(struct nvgpu_allocator *a,
 				  struct nvgpu_alloc_carveout *co);
@@ -554,7 +681,9 @@ void nvgpu_alloc_release_carveout(struct nvgpu_allocator *a,
  *
  * @param[in] a		Pointer to nvgpu allocator.
  *
- * @return Allocator start address.
+ * Invoke the underlying allocator's implementation of the base operation.
+ *
+ * @return	Allocator start address.
  */
 u64  nvgpu_alloc_base(struct nvgpu_allocator *a);
 
@@ -563,7 +692,10 @@ u64  nvgpu_alloc_base(struct nvgpu_allocator *a);
  *
  * @param a		Pointer to nvgpu allocator.
  *
- * @return Allocator length address.
+ * Invoke the underlying allocator's implementation of the length
+ * operation.
+ *
+ * @return	Allocator length.
  */
 u64  nvgpu_alloc_length(struct nvgpu_allocator *a);
 
@@ -572,7 +704,9 @@ u64  nvgpu_alloc_length(struct nvgpu_allocator *a);
  *
  * @param[in] a		Pointer to nvgpu allocator.
  *
- * @return Allocator end address.
+ * Invoke the underlying allocator's implementation of the end operation.
+ *
+ * @return	Allocator end address.
  */
 u64  nvgpu_alloc_end(struct nvgpu_allocator *a);
 
@@ -581,7 +715,8 @@ u64  nvgpu_alloc_end(struct nvgpu_allocator *a);
  *
  * @param a		Pointer to nvgpu allocator.
  *
- * @return True if allocator is initialized, false otherwise.
+ *
+ * @return	True if allocator is initialized, false otherwise.
  */
 bool nvgpu_alloc_initialized(struct nvgpu_allocator *a);
 
@@ -590,7 +725,11 @@ bool nvgpu_alloc_initialized(struct nvgpu_allocator *a);
  *
  * @param[in] a		Pointer to nvgpu allocator.
  *
- * @return Available allocator space.
+ * Invoke the underlying allocator's implementation of the space
+ * operation.
+ *
+ * @return	Available allocator space.
+ *
  */
 u64  nvgpu_alloc_space(struct nvgpu_allocator *a);
 
@@ -598,6 +737,12 @@ u64  nvgpu_alloc_space(struct nvgpu_allocator *a);
  * @brief Interface to destroy allocator.
  *
  * @param[in] a		Pointer to nvgpu allocator.
+ *
+ * Invoke the underlying allocator's implementation of the destroy
+ * operation.
+ *
+ * @return	None.
+ *
  */
 void nvgpu_alloc_destroy(struct nvgpu_allocator *a);
 
@@ -619,7 +764,7 @@ void nvgpu_alloc_print_stats(struct nvgpu_allocator *a,
  *
  * @param[in] a		Pointer to nvgpu allocator.
  *
- * @return GPU pointer.
+ * @return	GPU pointer.
  */
 static inline struct gk20a *nvgpu_alloc_to_gpu(struct nvgpu_allocator *a)
 {
@@ -656,7 +801,9 @@ void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a);
  * @param[in] dbg	Debug flag.
  * @param[in] ops	Pointer to allocator operations.
  *
- * @return 0 in case of success, < 0 in case of failure.
+ * @return	0 in case of success, < 0 in case of failure.
+ * @retval	-EINVAL For any of the inputs is NULL.
+ *
  */
 int  nvgpu_alloc_common_init(struct nvgpu_allocator *a, struct gk20a *g,
 			     const char *name, void *priv, bool dbg,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/as.h b/drivers/gpu/nvgpu/include/nvgpu/as.h
index 17580f1ab..34ceba107 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/as.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/as.h
@@ -1,7 +1,7 @@
 /*
  * GK20A Address Spaces
  *
- * Copyright (c) 2011-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2021, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -74,10 +74,18 @@ struct gk20a_as_share {
  *
  * @param as_share [in] The address space share to release.
  *
- * Call gk20a_vm_release_share on the provided \a as_share and release the
- * corresponding share id.
+ * Release the address space share \a as_share that is created
+ * by gk20a_as_alloc_share().
+ *
+ * @return	 EOK in case of success, < 0 in case of failure.
+ *
+ * @retval	-ENODEV For struct g is NULL.
+ * @retval	-EINVAL For the power contxt associated with struct nvgpu_os_rmos
+ * 		is NULL.
+ * @retval	-EINVAL For the power function pointer associated with struct
+ * 		nvgpu_module is NULL.
+ * @retval	-EIO For setting clock related failures.
  *
- * @return 0 in case of success, < 0 in case of failure.
  */
 int gk20a_as_release_share(struct gk20a_as_share *as_share);
 
@@ -98,8 +106,10 @@ int gk20a_vm_release_share(struct gk20a_as_share *as_share);
  *
  * @param g [in]			The GPU
  * @param big_page_size [in]		Big page size to use for the VM,
- *					set 0 for no big pages
- * @param flags [in]			NVGPU_AS_ALLOC_* flags
+ *					set 0 for 64K big page size.
+ * @param flags [in]			NVGPU_AS_ALLOC_* flags. The flags are
+ * 					NVGPU_AS_ALLOC_USERSPACE_MANAGED and
+ * 					NVGPU_AS_ALLOC_UNIFIED_VA.
  * @param va_range_start [in]		Requested user managed memory start
  *					address, used to map buffers, save data
  *					should be aligned by PDE
@@ -112,11 +122,22 @@ int gk20a_vm_release_share(struct gk20a_as_share *as_share);
  *					structure
  *
  * Allocate the gk20a_as_share structure and the VM associated with it, based
- * on the provided \a big_page_size and NVGPU_AS_ALLOC_* \a flags.
+ *  on the provided \a big_page_size and NVGPU_AS_ALLOC_* \a flags.
+ * Check the validity of \a big_page_size by the big_page_size should be power
+ *  of two and it should be in the range supported big page sizes supported by the GPU.
  *
- * Notes: if \a big_page_size == 0, the default big page size is used.
+ * @note	if \a big_page_size == 0, the default big page size(64K) is used.
+ * @note	The \a flags is always set as NVGPU_AS_ALLOC_USERSPACE_MANAGED(AS
+ * 		allocation flag for userspace managed)
+ *
+ * @return	0 in case of success, < 0 in case of failure.
+ *
+ * @retval	-ENODEV For struct GPU is NULL.
+ * @retval	-EIO For setting clock related failures.
+ * @retval	-ENOMEM For memory allocation failures.
+ * @retval	-EINVAL For any parameter compute failures from gk20a_vm_alloc_share().
+ * @retval	-ENOMEM For allocated VM is NULL.
  *
- * @return 0 in case of success, < 0 in case of failure.
  */
 int gk20a_as_alloc_share(struct gk20a *g, u32 big_page_size,
 			u32 flags, u64 va_range_start,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/dma.h b/drivers/gpu/nvgpu/include/nvgpu/dma.h
index d75efccd0..b74832b80 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/dma.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/dma.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2021, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -87,7 +87,18 @@ bool nvgpu_iommuable(struct gk20a *g);
  * Allocate memory suitable for doing DMA. Store the allocation info in #mem.
  * Returns 0 on success and a suitable error code when there's an error. This
  * memory can be either placed in VIDMEM or SYSMEM, which ever is more
- * convenient for the driver.
+ * convenient for the driver. The final memory allocation is done by OS specific
+ * allocation routine.
+ *
+ * @note As linux is a kernel space driver, it will use dma_alloc_attr() linux
+ *       API to allocate dmaable memory.
+ *       Qnx will use memory service module to acheive the same.
+ * @note the parameter flags is always zero for nvgpu_dma_alloc_flags().
+ *
+ * @return	0 For success, < 0 for failure.
+ * @retval	-ENOMEM For failure in handle creation or mapping operation.
+ * @retval	-EINVAL For failure in handle query.
+ *
  */
 int nvgpu_dma_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
 
@@ -96,6 +107,9 @@ int nvgpu_dma_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
  *
  * @param g     - The GPU.
  * @param flags - Flags modifying the operation of the DMA allocation.
+ *                The following flags are accepted:
+ *                - %NVGPU_DMA_NO_KERNEL_MAPPING
+ *                - %NVGPU_DMA_READ_ONLY
  * @param size  - Size of the allocation in bytes.
  * @param mem   - Struct for storing the allocation information.
  *
@@ -104,10 +118,9 @@ int nvgpu_dma_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
  * memory can be either placed in VIDMEM or SYSMEM, which ever is more
  * convenient for the driver.
  *
- * The following flags are accepted:
- *
- *   %NVGPU_DMA_NO_KERNEL_MAPPING
- *   %NVGPU_DMA_READ_ONLY
+ * @return	0 For success, < 0 for failure.
+ * @retval	-ENOMEM For failure in handle creation or mapping operation.
+ * @retval	-EINVAL For failure in handle query.
  */
 int nvgpu_dma_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
 		struct nvgpu_mem *mem);
@@ -122,6 +135,10 @@ int nvgpu_dma_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
  * Allocate memory suitable for doing DMA. Store the allocation info in #mem.
  * Returns 0 on success and a suitable error code when there's an error. This
  * allocates memory specifically in SYSMEM.
+ *
+ * @return	0 For success, < 0 for failure.
+ * @retval	-ENOMEM For failure in handle creation or mapping operation.
+ * @retval	-EINVAL For failure in handle query.
  */
 int nvgpu_dma_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
 
@@ -130,6 +147,9 @@ int nvgpu_dma_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
  *
  * @param g     - The GPU.
  * @param flags - Flags modifying the operation of the DMA allocation.
+ *                The following flags are accepted:
+ *                - %NVGPU_DMA_NO_KERNEL_MAPPING
+ *                - %NVGPU_DMA_READ_ONLY
  * @param size  - Size of the allocation in bytes.
  * @param mem   - Struct for storing the allocation information.
  *
@@ -137,10 +157,9 @@ int nvgpu_dma_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
  * Returns 0 on success and a suitable error code when there's an error. This
  * allocates memory specifically in SYSMEM.
  *
- * The following flags are accepted:
- *
- *   %NVGPU_DMA_NO_KERNEL_MAPPING
- *   %NVGPU_DMA_READ_ONLY
+ * @return	0 For success, < 0 for failure.
+ * @retval	-ENOMEM For failure in handle creation or mapping operation.
+ * @retval	-EINVAL For failure in handle query.
  */
 int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
 		size_t size, struct nvgpu_mem *mem);
@@ -164,6 +183,7 @@ int nvgpu_dma_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
  *
  * @param g     - The GPU.
  * @param flags - Flags modifying the operation of the DMA allocation.
+ *                - %NVGPU_DMA_NO_KERNEL_MAPPING is the only accepted flag.
  * @param size  - Size of the allocation in bytes.
  * @param mem   - Struct for storing the allocation information.
  *
@@ -171,10 +191,6 @@ int nvgpu_dma_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
  * Returns 0 on success and a suitable error code when there's an error. This
  * allocates memory specifically in VIDMEM.
  *
- * Only the following flags are accepted:
- *
- *   %NVGPU_DMA_NO_KERNEL_MAPPING
- *
  */
 int nvgpu_dma_alloc_flags_vid(struct gk20a *g, unsigned long flags,
 		size_t size, struct nvgpu_mem *mem);
@@ -202,6 +218,8 @@ int nvgpu_dma_alloc_vid_at(struct gk20a *g,
  *
  * @param g     - The GPU.
  * @param flags - Flags modifying the operation of the DMA allocation.
+ *                Only the following flags are accepted:
+ *                - %NVGPU_DMA_NO_KERNEL_MAPPING
  * @param size  - Size of the allocation in bytes.
  * @param mem   - Struct for storing the allocation information.
  * @param at    - A specific location to attempt to allocate memory from or 0 if
@@ -211,9 +229,6 @@ int nvgpu_dma_alloc_vid_at(struct gk20a *g,
  * Returns 0 on success and a suitable error code when there's an error. This
  * allocates memory specifically in VIDMEM.
  *
- * Only the following flags are accepted:
- *
- *   %NVGPU_DMA_NO_KERNEL_MAPPING
  */
 int nvgpu_dma_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
 		size_t size, struct nvgpu_mem *mem, u64 at);
@@ -234,6 +249,8 @@ int nvgpu_dma_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
  *   nvgpu_dma_alloc_vid()
  *   nvgpu_dma_alloc_flags_vid()
  *   nvgpu_dma_alloc_flags_vid_at()
+ *
+ * @return	None
  */
 void nvgpu_dma_free(struct gk20a *g, struct nvgpu_mem *mem);
 
@@ -253,6 +270,11 @@ void nvgpu_dma_free(struct gk20a *g, struct nvgpu_mem *mem);
  * cannot use nvgpu_gmmu_map() on said buffer - it will overwrite the necessary
  * information for the DMA unmap routines to actually unmap the buffer. You
  * will either leak mappings or see GMMU faults.
+ *
+ * @return	0 For success, <0 for failure.
+ * @retval	-ENOMEM For failure in handle creation or mapping operation.
+ * @retval	-EINVAL For failure in handle query.
+ * @retval	-ENOMEM For failure in gpu mapping.
  */
 int nvgpu_dma_alloc_map(struct vm_gk20a *vm, size_t size,
 		struct nvgpu_mem *mem);
@@ -262,6 +284,9 @@ int nvgpu_dma_alloc_map(struct vm_gk20a *vm, size_t size,
  *
  * @param vm    - VM context for GMMU mapping.
  * @param flags - Flags modifying the operation of the DMA allocation.
+ *                Accepted flags are
+ *                - %NVGPU_DMA_NO_KERNEL_MAPPING
+ *                - %NVGPU_DMA_READ_ONLY
  * @param size  - Size of the allocation in bytes.
  * @param mem   - Struct for storing the allocation information.
  *
@@ -270,11 +295,10 @@ int nvgpu_dma_alloc_map(struct vm_gk20a *vm, size_t size,
  * either placed in VIDMEM or SYSMEM, which ever is more convenient for the
  * driver.
  *
- * This version passes #flags on to the underlying DMA allocation. The accepted
- * flags are:
- *
- *   %NVGPU_DMA_NO_KERNEL_MAPPING
- *   %NVGPU_DMA_READ_ONLY
+ * @return	0 For success, <0 for failure.
+ * @retval	-ENOMEM For failure in handle creation or mapping operation.
+ * @retval	-EINVAL For failure in handle query.
+ * @retval	-ENOMEM For failure in gpu mapping.
  */
 int nvgpu_dma_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags,
 		size_t size, struct nvgpu_mem *mem);
@@ -288,6 +312,11 @@ int nvgpu_dma_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags,
  *
  * Allocate memory suitable for doing DMA and map that memory into the GMMU.
  * This memory will be placed in SYSMEM.
+ *
+ * @return	0 For success, <0 for failure.
+ * @retval	-ENOMEM For failure in handle creation or mapping operation.
+ * @retval	-EINVAL For failure in handle query.
+ * @retval	-ENOMEM For failure in gpu mapping.
  */
 int nvgpu_dma_alloc_map_sys(struct vm_gk20a *vm, size_t size,
 		struct nvgpu_mem *mem);
@@ -297,17 +326,19 @@ int nvgpu_dma_alloc_map_sys(struct vm_gk20a *vm, size_t size,
  *
  * @param vm    - VM context for GMMU mapping.
  * @param flags - Flags modifying the operation of the DMA allocation.
+ *                Accepted flags are:
+ *                - %NVGPU_DMA_NO_KERNEL_MAPPING
+ *                - %NVGPU_DMA_READ_ONLY
  * @param size  - Size of the allocation in bytes.
  * @param mem   - Struct for storing the allocation information.
  *
  * Allocate memory suitable for doing DMA and map that memory into the GMMU.
  * This memory will be placed in SYSMEM.
  *
- * This version passes #flags on to the underlying DMA allocation. The accepted
- * flags are:
- *
- *   %NVGPU_DMA_NO_KERNEL_MAPPING
- *   %NVGPU_DMA_READ_ONLY
+ * @return	0 For success, <0 for failure.
+ * @retval	-ENOMEM For failure in handle creation or mapping operation.
+ * @retval	-EINVAL For failure in handle query.
+ * @retval	-ENOMEM For failure in gpu mapping.
  */
 int nvgpu_dma_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags,
 		size_t size, struct nvgpu_mem *mem);
@@ -331,24 +362,22 @@ int nvgpu_dma_alloc_map_vid(struct vm_gk20a *vm, size_t size,
  *
  * @param vm    - VM context for GMMU mapping.
  * @param flags - Flags modifying the operation of the DMA allocation.
+ *                Accepted flags are:
+ *                - %NVGPU_DMA_NO_KERNEL_MAPPING
+ *                - %NVGPU_DMA_READ_ONLY
  * @param size  - Size of the allocation in bytes.
  * @param mem   - Struct for storing the allocation information.
  *
  * Allocate memory suitable for doing DMA and map that memory into the GMMU.
  * This memory will be placed in VIDMEM.
  *
- * This version passes #flags on to the underlying DMA allocation. The accepted
- * flags are:
- *
- *   %NVGPU_DMA_NO_KERNEL_MAPPING
- *   %NVGPU_DMA_READ_ONLY
  */
 int nvgpu_dma_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
 		size_t size, struct nvgpu_mem *mem);
 #endif /* CONFIG_NVGPU_DGPU */
 
 /**
- * nvgpu_dma_unmap_free - Free a DMA allocation
+ * nvgpu_dma_unmap_free - Free a DMA allocation and unmap the GPUVA.
  *
  * @param g   - The GPU.
  * @param mem - An allocation to free.
@@ -361,6 +390,9 @@ int nvgpu_dma_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
  *   nvgpu_dma_alloc_map_flags_sys()
  *   nvgpu_dma_alloc_map_vid()
  *   nvgpu_dma_alloc_map_flags_vid()
+ *
+ * @return	None
+ *
  */
 void nvgpu_dma_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem);
 
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
index 670509ddb..5d08de122 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2021, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -229,11 +229,11 @@ static inline const char *nvgpu_gmmu_perm_str(enum gk20a_mem_rw_flag p)
  * @param vm	[in]	Pointer to virtual memory structure.
  *
  * Init Page Table:
- * - Allocates the DMA memory for a page directory.
- *   This handles the necessary PD cache logistics. Since on Parker and
- *   later GPUs some of the page  directories are smaller than a page packing
- *   these PDs together saves a lot of memory.
- *   #nvgpu_pd_alloc() does the pd cache allocation.
+ * Allocates the DMA memory for a page directory.
+ * This handles the necessary PD cache logistics. Since on Parker and
+ *  later GPUs some of the page  directories are smaller than a page packing
+ *  these PDs together saves a lot of memory.
+ * #nvgpu_pd_alloc() does the pd cache allocation.
  *
  * PDB size here must be at least 4096 bytes so that its address is 4K
  * aligned. Although lower PDE tables can be aligned at 256B boundaries
@@ -242,47 +242,55 @@ static inline const char *nvgpu_gmmu_perm_str(enum gk20a_mem_rw_flag p)
  * Currently NVGPU_CPU_PAGE_SIZE is used, even when 64K, to work around an issue
  * with the PDB TLB invalidate code not being pd_cache aware yet.
  *
- * @return 0 in case of success.
- * -ENOMEM (< 0) in case of failure.
+ * @return	0 in case of success.
+ * @retval	-ENOMEM For any allocation failures from kzalloc and dma_alloc
+ * 		functions.
  */
 int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm);
 
 /**
- * @brief Map memory into the GMMU.
+ * @brief Map memory into the GMMU. This is required to make the particular
+ * context on the GR, CE to access the given virtual address.
  *
  * @param vm		[in]	Pointer to virtual memory structure.
- * @param mem		[in]	Structure for storing the memory information.
+ * @param mem		[in]	Structure for storing the memory informati on.
  * @param size		[in]	Size of the buffer in bytes.
  * @param flags		[in]	Mapping flags.
  *                              - Min: NVGPU_VM_MAP_FIXED_OFFSET
  *                              - Max: NVGPU_VM_MAP_PLATFORM_ATOMIC
  * @param rw_flag	[in]	Flag designates the requested GMMU mapping.
+ * 				- Min: gk20a_mem_flag_none
+ * 				- Max: gk20a_mem_flag_write_only
  * @param priv		[in]	True if the mapping should be Privileged.
  * @param aperture	[in]	Where the memory actually was allocated from.
+ *				- Min: APERTURE_SYSMEM
+ *				- Max: APERTURE_VIDMEM
  *
  * Core GMMU map function for the nvgpu to use. The GPU VA will be
  * allocated for client.
  *
  * GMMU Map:
- * - Retrives the nvgpu_sgt which contains the memory handle information.
- * - Acquires the VM GMMU lock to the avoid race.
- * - Decodes the Mapping flags, rw_flag, priv and aperture for GMMU mapping.
- * - Allocates a new GPU VA range for a specific size.
- *   #nvgpu_vm_alloc_va() reserves the GPU VA.
- * - Program PDE and PTE entry with PA/IPA, mapping flags, rw_flag and aperture
- *   information. #nvgpu_gmmu_update_page_table does the pde and pte updates.
- * - Chip specific stuff is handled at the PTE/PDE programming HAL layer.
- *   GMMU level entry format will be different for each GPU family
- *   (i.e, gv11b, gp10b).
- * - Invalidates the GPU TLB, gm20b_fb_tlb_invalidate does the tlb invalidate.
- * - Release the VM GMMU lock.
+ * Retrives the nvgpu_sgt which contains the memory handle information.
+ * Acquires the VM GMMU lock to the avoid race.
+ * Decodes the Mapping flags, rw_flag, priv and aperture for GMMU mapping.
+ * Allocates a new GPU VA range for a specific size.#nvgpu_vm_alloc_va() reserves
+ *  the GPU VA.
+ * Program PDE and PTE entry with PA/IPA, mapping flags, rw_flag and aperture
+ *  information. #nvgpu_gmmu_update_page_table() does the pde and pte updates.
+ * Chip specific stuff is handled at the PTE/PDE programming HAL layer. GMMU level
+ *  entry format will be different for each GPU family (i.e, gv11b, gp10b).
+ *  Internally nvgpu_set_pd_level() program the different level of page table.
+ * Invalidates the GPU TLB, gm20b_fb_tlb_invalidate does the tlb invalidate.
+ * Release the VM GMMU lock.
+ *
+ * @return	valid GMMU VA start address in case of success.
+ * @retval	0 in case of all possible failures.
+ * 		Possible Failure cases:
+ * 			- Memory handle is invalid.
+ * 			- No free GPU VA space (GPU VA space full).
+ * 			- TLB invalidate timeout.
+ * 			- invalid inputs.
  *
- * @return valid GMMU VA start address in case of success.
- * 0 in case of all possible failures.
- * Possible Failure cases:
- * - Memory handle is invalid.
- * - No free GPU VA space (GPU VA space full).
- * - TLB invalidate timeout.
  */
 u64 nvgpu_gmmu_map(struct vm_gk20a *vm,
 		   struct nvgpu_mem *mem,
@@ -293,7 +301,9 @@ u64 nvgpu_gmmu_map(struct vm_gk20a *vm,
 		   enum nvgpu_aperture aperture);
 
 /**
- * @brief Map memory into the GMMU at a fixed address.
+ * @brief Map memory into the GMMU at a fixed address. This is required to
+ * make the parrticular context on the GR, CE to access the given virtual
+ * address.
  *
  * @param vm		[in]	Pointer to virtual memory structure.
  * @param mem		[in]	Structure for storing the memory information.
@@ -303,28 +313,33 @@ u64 nvgpu_gmmu_map(struct vm_gk20a *vm,
  *                              - Min: NVGPU_VM_MAP_FIXED_OFFSET
  *                              - Max: NVGPU_VM_MAP_PLATFORM_ATOMIC
  * @param rw_flag	[in]	Flag designates the requested GMMU mapping.
+ * 				- Min: gk20a_mem_flag_none
+ * 				- Max: gk20a_mem_flag_write_only
  * @param priv		[in]	True if the mapping should be Privileged.
  * @param aperture	[in]	Where the memory actually was allocated from.
- *
+ *				- Min: APERTURE_SYSMEM
+ *				- Max: APERTURE_VIDMEM
  *
  * GMMU Map at a fixed address:
- * - Retrives the nvgpu_sgt which contains the memory handle information.
- * - Acquires the VM GMMU lock to the avoid race.
- * - Decodes the Mapping flags, rw_flag, priv and aperture for GMMU mapping.
- * - Program PDE and PTE entry with PA/IPA, mapping flags, rw_flag and aperture
- *   information. #nvgpu_gmmu_update_page_table does the pde and pte updates.
- * - Chip specific stuff is handled at the PTE/PDE programming HAL layer.
- *   GMMU level entry format will be different for each GPU family
- *   (i.e, gv11b, gp10b).
- * - Invalidates the GPU TLB, gm20b_fb_tlb_invalidate does the tlb invalidate.
- * - Release the VM GMMU lock.
+ * Retrives the nvgpu_sgt which contains the memory handle information.
+ * Acquires the VM GMMU lock to the avoid race.
+ * Decodes the Mapping flags, rw_flag, priv and aperture for GMMU mapping.
+ * Program PDE and PTE entry with PA/IPA, mapping flags, rw_flag and aperture
+ *  information. #nvgpu_gmmu_update_page_table does the pde and pte updates.
+ * Chip specific stuff is handled at the PTE/PDE programming HAL layer.
+ *  GMMU level entry format will be different for each GPU family (i.e, gv11b, gp10b).
+ *  Internally nvgpu_set_pd_level() will be called to program the different level of
+ *  the page table.
+ * Invalidates the GPU TLB, gm20b_fb_tlb_invalidate does the tlb invalidate.
+ * Release the VM GMMU lock.
  *
- * @return valid GMMU VA start address in case of success.
- * 0 in case of all possible failures.
- * Possible Failure cases:
- * - Memory handle is invalid.
- * - No free GPU VA space at @addr passed by client.
- * - TLB invalidate timeout.
+ * @return	valid GMMU VA start address in case of success.
+ * @return 	0 in case of all possible failures.
+ * 		Possible Failure cases:
+ * 			- Memory handle is invalid.
+ * 			- No free GPU VA space at @addr passed by client.
+ * 			- TLB invalidate timeout.
+ * 			- invalid inputs.
  */
 u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm,
 			 struct nvgpu_mem *mem,
@@ -337,6 +352,7 @@ u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm,
 
 /**
  * @brief Unmap a memory mapped by nvgpu_gmmu_map()/nvgpu_gmmu_map_fixed().
+ * This is required to remove the translations from the GPU page table.
  *
  * @param vm		[in]	Pointer to virtual memory structure.
  * @param mem		[in]	Structure for storing the memory information.
@@ -345,19 +361,19 @@ u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm,
  * Core GMMU unmap function for the nvgpu to use.
  *
  * GMMU Unmap:
- * - Acquires the VM GMMU lock to the avoid race.
- * - Free the reserved GPU VA space staring at @gpu_va.
- *   #nvgpu_vm_free_va does free the GPU VA space.
- * - Program PDE and PTE entry with default information which is internally
- *   frees up the GPU VA space.
- * - Chip specific stuff is handled at the PTE/PDE programming HAL layer.
- *   GMMU level entry format will be different for each GPU family
- *   (i.e, gv11b).
- * - Flush the GPU L2. gv11b_mm_l2_flush does the L2 flush.
- * - Invalidates the GPU TLB, gm20b_fb_tlb_invalidate does the tlb invalidate.
- * - Release the VM GMMU lock.
+ * Acquires the VM GMMU lock to the avoid race.
+ * Free the reserved GPU VA space staring at @gpu_va.
+ * #nvgpu_vm_free_va does free the GPU VA space.
+ * Program PDE and PTE entry with default information which is internally
+ * frees up the GPU VA space.
+ * Chip specific stuff is handled at the PTE/PDE programming HAL layer.
+ * GMMU level entry format will be different for each GPU family
+ * (i.e, gv11b).
+ * Flush the GPU L2. gv11b_mm_l2_flush does the L2 flush.
+ * Invalidates the GPU TLB, gm20b_fb_tlb_invalidate does the tlb invalidate.
+ * Release the VM GMMU lock.
  *
- * @return None.
+ * @return	None.
  */
 void nvgpu_gmmu_unmap(struct vm_gk20a *vm,
 		      struct nvgpu_mem *mem,
@@ -369,14 +385,13 @@ void nvgpu_gmmu_unmap(struct vm_gk20a *vm,
  * @param g	[in]	The GPU.
  *
  * Compute number of words in a PTE:
- * - Iterate to the PTE level. The levels array is always NULL terminated.
- *   GMMU level entry format will be different for each GPU family
- *   (i.e, gv11b).
+ * Iterate to the PTE level. The levels array is always NULL terminated.
+ * GMMU level entry format will be different for each GPU family
+ * (i.e, gv11b).
  *
  * This computes and returns the size of a PTE for the passed chip.
  *
- * @return number of words in a PTE in case of success.
- * 0 in case failure.
+ * @return	number of words in a PTE in case of success.
  */
 u32 nvgpu_pte_words(struct gk20a *g);
 
@@ -389,15 +404,16 @@ u32 nvgpu_pte_words(struct gk20a *g);
  * @param pte	[out]	Set to the contents of the PTE.
  *
  * Get the contents of a PTE:
- * - Find a PTE in the passed VM based on the passed GPU virtual address. This
- *   will @pte with a copy of the contents of the PTE. @pte must be an array of
- *   u32s large enough to contain the PTE. This can be computed using
- *   nvgpu_pte_words().
+ * Find a PTE in the passed VM based on the passed GPU virtual address. This
+ * will @pte with a copy of the contents of the PTE. @pte must be an array of
+ * u32s large enough to contain the PTE. This can be computed using
+ * nvgpu_pte_words().
  *
  * If you wish to write to this PTE then you may modify @pte and then use the
  * nvgpu_set_pte().
  *
- * @return 0 if the PTE is found and -EINVAL otherwise.
+ * @return	0 if the PTE is found.
+ * @retval	-EINVAL If any of the compuation fails inside.
  */
 int nvgpu_get_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte);
 
@@ -410,17 +426,18 @@ int nvgpu_get_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte);
  * @param pte	[in]	The contents of the PTE to write.
  *
  * Set the contents of a PTE:
- * - Find a PTE and overwrite the contents of that PTE with the passed in data
- *   located in @pte. If the PTE does not exist then no writing will happen.
- *   That is this function will not fill out the page tables for you.
- *   The expectation is that the passed @vaddr has already been mapped and
- *   this is just modifying the mapping
- *   (for instance changing invalid to valid).
+ * Find a PTE and overwrite the contents of that PTE with the passed in data
+ * located in @pte by calling nvgpu_locate_pte(). If the PTE does not exist
+ * then no writing will happen. That is this function will not fill out the
+ * page tables for you. The expectation is that the passed @vaddr has already
+ * been mapped and this is just modifying the mapping (for instance changing
+ * invalid to valid).
  *
  * @pte must contain at least the required words for the PTE. See
  * nvgpu_pte_words().
  *
- * @return 0 on success and -EINVAL otherwise.
+ * @return 	0 on success.
+ * @retval	-EINVAL for failure.
  */
 int nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte);
 
@@ -431,47 +448,53 @@ int nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte);
 /**
  * @brief Mutex Locked version of map memory routine.
  *
- * @param vm			[in]	Pointer to virtual memory structure.
- * @param vaddr			[in]	GPU virtual address.
- * @param sgt			[in]	Pointer to scatter gather table for
- *                                      direct "physical" nvgpu_mem structures.
+ * @param vm		[in]	Pointer to virtual memory structure.
+ * @param vaddr		[in]	GPU virtual address.
+ * @param sgt		[in]	Pointer to scatter gather table for
+ *                              direct "physical" nvgpu_mem structures.
  * @param buffer_offset	[in]	Offset address from start of the memory.
- * @param size			[in]	Size of the buffer in bytes.
- * @param pgsz_idx		[in]	Index into the page size table.
- *                                      - Min: GMMU_PAGE_SIZE_SMALL
- *                                      - Max: GMMU_PAGE_SIZE_KERNEL
- * @param kind_v		[in]	Kind attributes for mapping.
+ * @param size		[in]	Size of the buffer in bytes.
+ * @param pgsz_idx	[in]	Index into the page size table.
+ *                              - Min: GMMU_PAGE_SIZE_SMALL
+ *                              - Max: GMMU_PAGE_SIZE_KERNEL
+ * @param kind_v	[in]	Kind attributes for mapping.
  * @param ctag_offset	[in]	Size of the buffer in bytes.
  * @param flags         [in]	Mapping flags.
  *                              - Min: NVGPU_VM_MAP_FIXED_OFFSET
  *                              - Max: NVGPU_VM_MAP_PLATFORM_ATOMIC
- * @param rw_flag		[in]	Flag designates the requested GMMU mapping.
+ * @param rw_flag	[in]	Flag designates the requested GMMU mapping.
+ * 				- Min: gk20a_mem_flag_none
+ * 				- Max: gk20a_mem_flag_write_only
  * @param clear_ctags	[in]	True if ctags clear is required.
- * @param sparse		[in]	True if the mapping should be sparse.
- * @param priv			[in]	True if the mapping should be Privileged.
- * @param batch			[in]	Mapping_batch handle. Structure which track
- *                                  whether the L2 flush and TLB invalidate is
- *                                  required or not during map/unmap.
- * @param aperture		[in]	Where the memory actually was allocated from.
+ * @param sparse	[in]	True if the mapping should be sparse.
+ * @param priv		[in]	True if the mapping should be Privileged.
+ * @param batch		[in]	Mapping_batch handle. Structure which track
+ *                              whether the L2 flush and TLB invalidate is
+ *                              required or not during map/unmap.
+ * @param aperture	[in]	Where the memory actually was allocated from.
+ *				- Min: APERTURE_SYSMEM
+ *				- Max: APERTURE_VIDMEM
  *
  * Native GPU "HAL" functions for GMMU Map.
  *
  * Locked version of GMMU Map routine:
- * - Decodes the Mapping flags, rw_flag, priv and aperture for GMMU mapping.
- * - Allocates a new GPU VA range for a specific size if vaddr is 0.
- *   #nvgpu_vm_alloc_va() reserves the GPU VA.
- * - Program PDE and PTE entry with PA/IPA, mapping flags, rw_flag and aperture
- *   information. #nvgpu_gmmu_update_page_table does the pde and pte updates.
- * - Chip specific stuff is handled at the PTE/PDE programming HAL layer.
- *   GMMU level entry format will be different for each GPU family
- *   (i.e, gv11b, gp10b).
- * - Invalidates the GPU TLB, gm20b_fb_tlb_invalidate does the tlb invalidate.
+ * Decodes the Mapping flags, rw_flag, priv and aperture for GMMU mapping.
+ * Allocates a new GPU VA range for a specific size if vaddr is 0.
+ * #nvgpu_vm_alloc_va() reserves the GPU VA.
+ * Program PDE and PTE entry with PA/IPA, mapping flags, rw_flag and aperture
+ *  information. #nvgpu_gmmu_update_page_table does the pde and pte updates.
+ * Chip specific stuff is handled at the PTE/PDE programming HAL layer.
+ *  GMMU level entry format will be different for each GPU family
+ *  (i.e, gv11b, gp10b).
+ * Invalidates the GPU TLB, gm20b_fb_tlb_invalidate does the tlb invalidate.
  *
- * @return valid GMMU VA start address in case of success.
- * 0 in case of all possible failures.
- * Possible Failure cases:
- * - No free GPU VA space (GPU VA space full).
- * - TLB invalidate timeout.
+ * @return	valid GMMU VA start address in case of success.
+ * @retval	0 in case of all possible failures.
+ * 		Possible Failure cases:
+ * 		 - No free GPU VA space (GPU VA space full).
+ * 		 - TLB invalidate timeout.
+ * 		 - Any of the invlaid input parameters.
+ * 		 - Failure inside any of the functions called.
  */
 u64 nvgpu_gmmu_map_locked(struct vm_gk20a *vm,
 			  u64 vaddr,
@@ -500,6 +523,8 @@ u64 nvgpu_gmmu_map_locked(struct vm_gk20a *vm,
  *                                      - Max: GMMU_PAGE_SIZE_KERNEL
  * @param va_allocated	[in]	Indicates if gpu_va address is valid/allocated.
  * @param rw_flag		[in]	Flag designates the requested GMMU mapping.
+ * 					- Min: gk20a_mem_flag_none
+ * 					- Max: gk20a_mem_flag_write_only
  * @param sparse		[in]	True if the mapping should be sparse.
  * @param batch			[in]	Mapping_batch handle. Structure which track
  *                                  whether the L2 flush and TLB invalidate is
@@ -508,17 +533,17 @@ u64 nvgpu_gmmu_map_locked(struct vm_gk20a *vm,
  * Native GPU "HAL" functions for GMMU Unmap.
  *
  * Locked version of GMMU Unmap routine:
- * - Free the reserved GPU VA space staring at @gpu_va.
- *   #nvgpu_vm_free_va does free the GPU VA space.
- * - Program PDE and PTE entry with default information which is internally
- *   frees up the GPU VA space.
- * - Chip specific stuff is handled at the PTE/PDE programming HAL layer.
- *   GMMU level entry format will be different for each GPU family
- *   (i.e, gv11b).
- * - Flush the GPU L2. gv11b_mm_l2_flush does the L2 flush.
- * - Invalidates the GPU TLB, gm20b_fb_tlb_invalidate does the tlb invalidate.
+ * Free the reserved GPU VA space staring at \a gpu_va.
+ * #nvgpu_vm_free_va does free the GPU VA space.
+ * Program PDE and PTE entry with default information which is internally
+ *  frees up the GPU VA space.
+ * Chip specific stuff is handled at the PTE/PDE programming HAL layer.
+ * GMMU level entry format will be different for each GPU family
+ *  (i.e, gv11b).
+ * Flush the GPU L2. gv11b_mm_l2_flush does the L2 flush.
+ * Invalidates the GPU TLB, gm20b_fb_tlb_invalidate does the tlb invalidate.
  *
- * @return None.
+ * @return	None.
  */
 void nvgpu_gmmu_unmap_locked(struct vm_gk20a *vm,
 			     u64 vaddr,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/pd_cache.h b/drivers/gpu/nvgpu/include/nvgpu/pd_cache.h
index 16b166775..29bbc456e 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/pd_cache.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/pd_cache.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -83,16 +83,20 @@ struct nvgpu_gmmu_pd {
  * @param bytes	[in]	PD size.
  *
  * Allocates a page directory:
- * - Allocates the DMA memory for a page directory.
- *   This handles the necessary PD cache logistics. Since Parker and
- *   later GPUs, some of the page  directories are smaller than a page.
- *   Hence, packing these PDs together saves a lot of memory.
- * - If PD is bigger than a page just do a regular DMA alloc.
- *   #nvgpu_pd_cache_alloc_direct() does the pd cache allocation.
+ * Allocates the DMA memory for a page directory.
+ * This handles the necessary PD cache logistics. Since Parker and
+ *  later GPUs, some of the page  directories are smaller than a page.
+ *  Hence, packing these PDs together saves a lot of memory.
+ * If PD is bigger than a page just do a regular DMA alloc.
+ * #nvgpu_pd_cache_alloc_direct() does the pd cache allocation.
  *
  *
- * @return 0 in case of success.
- * -ENOMEM (< 0) in case of failure.
+ * @return	0 in case of success.
+ * @retval	-ENOMEM in case of failure. Reasons can be any one
+ * 		of the following
+ * 		--kzalloc failure.
+ * 		--failures internal to dma alloc* functions.
+ *
  */
 int  nvgpu_pd_alloc(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, u32 bytes);
 
@@ -103,10 +107,11 @@ int  nvgpu_pd_alloc(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, u32 bytes);
  * @param pd	[in]	Pointer to pd_cache memory structure.
  *
  * Free the Page Directory DMA memory:
- * - Free the DMA memory allocated using nvgpu_pd_alloc.
- *   #nvgpu_pd_cache_free_direct() frees the pd cache.
+ * Free the DMA memory allocated using nvgpu_pd_alloc by
+ *  calling #nvgpu_pd_cache_free_direct().
+ * Call #nvgpu_pd_cache_free() if the pd is cached.
  *
- * @return None
+ * @return	None
  */
 void nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd);
 
@@ -116,11 +121,13 @@ void nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd);
  * @param g	[in]	The GPU.
  *
  * Initialize the pd_cache:
- * - Allocates the zero initialized memory area for #nvgpu_pd_cache.
- * - Initializes the mutexes and list nodes for pd_cache tracking stuff.
+ * Allocates the zero initialized memory area for #nvgpu_pd_cache.
+ * Initializes the mutexes and list nodes for pd_cache tracking stuff.
+ * Make sure not to reinitialize the pd_cache again by initilalizing
+ *  mm.pd_cache.
  *
- * @return 0 in case of success.
- * -ENOMEM (< 0) in case of failure.
+ * @return	0 in case of success.
+ * @retval	-ENOMEM in case of kzalloc failure.
  */
 int  nvgpu_pd_cache_init(struct gk20a *g);
 
@@ -130,11 +137,12 @@ int  nvgpu_pd_cache_init(struct gk20a *g);
  * @param g	[in]	The GPU.
  *
  * Free the pd_cache:
- * - Reset the list nodes used for pd_cache tracking stuff.
- * - Free the #nvgpu_pd_cache internal structure allocated
- *   by nvgpu_pd_cache_init().
+ * Reset the list nodes used for pd_cache tracking stuff.
+ * Free the #nvgpu_pd_cache internal structure allocated
+ *  by nvgpu_pd_cache_init().
+ * Reset the mm.pd_cache to NULL.
  *
- * @return None
+ * @return	None
  */
 void nvgpu_pd_cache_fini(struct gk20a *g);
 
@@ -149,10 +157,10 @@ void nvgpu_pd_cache_fini(struct gk20a *g);
  *				- Max: GMMU_PAGE_SIZE_KERNEL
  *
  * Compute the pd offset:
- * - ((@pd_idx * GMMU level entry size / 4).
+ * ((@pd_idx * GMMU level entry size / 4).
+ *
+ * @return	pd offset at \a pd_idx.
  *
- * @return valid pd offset in case of valid @pd_idx.
- * Invalid pd offset in case of invalid/random @pd_idx.
  */
 u32  nvgpu_pd_offset_from_index(const struct gk20a_mmu_level *l, u32 pd_idx);
 
@@ -165,10 +173,10 @@ u32  nvgpu_pd_offset_from_index(const struct gk20a_mmu_level *l, u32 pd_idx);
  * @param data	[in]	Data to write into pd mem.
  *
  * Write data content into pd mem:
- * - Offset = ((start address of the pd / 4 + @w).
- * - Write data content into offset address.
+ * Offset = ((start address of the pd / 4 + @w).
+ * Write data content into offset address by calling #nvgpu_mem_wr32().
  *
- * @return None
+ * @return	None
  */
 void nvgpu_pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd,
 		    size_t w, u32 data);
@@ -180,15 +188,30 @@ void nvgpu_pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd,
  * @param pd	[in]	Pointer to GMMU page directory structure.
  *
  * Write data content into pd mem:
- * - Return the _physical_ address of a page directory for GMMU programming.
- * - PD base in context inst block.
- *   #nvgpu_mem_get_addr returns the _physical_ address of pd mem.
+ * Return the _physical_ address of a page directory for GMMU programming.
+ * PD base in context inst block.
+ * #nvgpu_mem_get_addr returns the _physical_ address of pd mem.
  *
- * @return valid pd physical address in case of valid pd mem.
- * Invalid pd physical address in case of invalid/random pd mem.
+ * @return	pd physical address in case of valid pd mem.
+ * @retval	Zero in case of invalid/random pd mem.
  */
 u64  nvgpu_pd_gpu_addr(struct gk20a *g, struct nvgpu_gmmu_pd *pd);
 
+/**
+ * @brief Allocate memory for a page directory.
+ *
+ * @param g	[in]	The GPU.
+ * @param pd	[in]	Pointer to GMMU page directory structure.
+ *
+ * - Set NVGPU_DMA_PHYSICALLY_ADDRESSED if \a bytes is more than
+ *   NVGPU_CPU_PAGE_SIZE.
+ * - Call #nvgpu_dma_alloc_flags() to allocate dmaable memory for
+ *   pd.
+ *
+ * @return	Zero For succcess.
+ * @retval	-ENOMEM For any allocation failure.
+ */
 int nvgpu_pd_cache_alloc_direct(struct gk20a *g,
 				struct nvgpu_gmmu_pd *pd, u32 bytes);
+
 #endif