Files
linux-nvgpu/drivers/gpu/nvgpu/include/nvgpu/gops/mm.h
Martin Radev 81d95456b9 gpu: nvgpu: Rename PLATFORM_ATOMIC to SYSTEM_COHERENT
To support current and future usecases, it would be
beneficial to select the SYSTEM_COHERENT aperture explicitly.

The benefits are:
- platform atomic code is cleaned-up.
- userspace can select the SYSTEM_COHERENT aperture for any
  specific usecases.

Bug 3959027

Change-Id: I6489ebe87fa75cc760930277bad5e0cacca80eb6
Signed-off-by: Martin Radev <mradev@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2864177
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com>
Reviewed-by: Seema Khowala <seemaj@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
2023-04-21 11:31:53 -07:00

628 lines
20 KiB
C

/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_GOPS_MM_H
#define NVGPU_GOPS_MM_H
/**
* @file
*
* MM HAL interface.
*/
#include <nvgpu/types.h>
struct gk20a;
/**
* This structure describes the HAL functions related to
* GMMU fault handling.
*/
struct gops_mm_mmu_fault {
/**
* @brief HAL to initialize the software setup of
* GMMU fault buffer.
*
* @param g [in] The GPU.
*
* Initializes the software setup of GMMU fault buffer:
* - Initializes the hub isr mutex to avoid race during
* GMMU fault buffer read/write handling from
* nvgpu software side.
* - Allocates memory to store the non replayable
* GMMU fault information.
*
* @return 0 in case of success, < 0 in case of failure.
* Possible failure case:
* - Insufficient system memory (ENOMEM).
*/
int (*setup_sw)(struct gk20a *g);
/**
* @brief HAL to initialize the hardware setup of
* GMMU fault buffer.
*
* @param g [in] The GPU.
*
* Initializes the hardware setup of GMMU fault buffer:
* - Configures the GMMU fault buffer base address and its
* size information in fbhubmmu specific BAR0 register.
*/
void (*setup_hw)(struct gk20a *g);
/**
* @brief HAL to free the GMMU fault buffer.
*
* @param g [in] The GPU.
*
* Free the GMMU fault buffer:
* - Free the GMMU fault buffer memory.
* - Destroy the hub isr mutex.
*/
void (*info_mem_destroy)(struct gk20a *g);
/**
* @brief HAL to disable the hardware setup of GMMU
* fault buffer.
*
* @param g [in] The GPU.
*
* Disable the hardware setup of GMMU fault buffer.
*/
void (*disable_hw)(struct gk20a *g);
/**
* @brief HAL to parse mmu fault info read from h/w.
*
* @param mmufault [in] Pointer to memory containing info
* to be parsed.
*
*/
void (*parse_mmu_fault_info)(struct mmu_fault_info *mmufault);
};
/**
* This structure describes the HAL functions related to
* fb and L2 hardware operations.
*/
struct gops_mm_cache {
/**
* @brief HAL to flush the frame buffer memory.
*
* @param g [in] The GPU.
*
* Flush the frame buffer memory:
* - Flushes the FB. Then, waits for completion (by polling)
* upto polling timeout.
*
* @return 0 in case of success, < 0 in case of failure.
* Possible failure case:
* - CPU polling timeout during FB flush operation (-EBUSY).
*/
int (*fb_flush)(struct gk20a *g);
/**
* @brief HAL to invalidate the L2.
*
* @param g [in] The GPU.
*
* Invalidate the L2:
* - Trigger the L2 invalidate operation. Then, waits for
* completion (by polling) upto polling timeout.
*
* Note: It does not return error. But CPU polling can timeout.
*/
void (*l2_invalidate)(struct gk20a *g);
/**
* @brief HAL to flush and invalidate the L2 and fb.
*
* @param g [in] The GPU.
* @param invalidate [in] true if L2 invalidate is also
* required.
*
* Flush and invalidate the L2 and fb:
* - Trigger the fb flush operation. Then, waits for completion
* (by polling) upto polling timeout.
* - Trigger the L2 invalidate operation. Then, waits for
* completion (by polling) upto polling timeout.
*
* @return 0 in case of success, < 0 in case of failure.
* Possible failure cases:
* - CPU polling timeout during FB flush operation (-EBUSY).
* - CPU polling timeout during L2 invalidate operation
* (-EBUSY).
*/
int (*l2_flush)(struct gk20a *g, bool invalidate);
#ifdef CONFIG_NVGPU_COMPRESSION
/**
* @brief HAL to flush Compression Bit Cache memory.
*
* @param g [in] The GPU.
*
* Flush and invalidate the L2 and fb:
* - Trigger all dirty lines from the CBC to L2.
* Then, waits for completion (by polling) upto
* polling timeout.
*
* Note: It does not return error. But CPU polling can timeout.
*/
void (*cbc_clean)(struct gk20a *g);
#endif
};
/**
* This structure describes the HAL functions related to
* GMMU operations.
*/
struct gops_mm_gmmu {
/**
* @brief HAL to get the GMMU level mapping info structure.
*
* @param g [in] The GPU.
* @param big_page_size [in] Big page size supported by GMMU.
*
* @return Pointer to GMMU level mapping info structure.
*/
const struct gk20a_mmu_level *
(*get_mmu_levels)(struct gk20a *g, u64 big_page_size);
/**
* @brief HAL to get maximum page table levels supported by the
* GMMU HW.
*
* @param g [in] The GPU.
*
* @return Page table levels supported by GPU.
*/
u32 (*get_max_page_table_levels)(struct gk20a *g);
/**
* @brief Map #sgt into the GPU address space described by #vm.
*
* @param vm [in] Pointer to virtual memory
* structure.
* @param map_offset [in] GPU virtual address.
* @param sgt [in] Pointer to scatter gather table
* for direct "physical" nvgpu_mem
* structures.
* @param buffer_offset [in] Offset address from start of
* the memory.
* @param size [in] Size of the buffer in bytes.
* @param pgsz_idx [in] Index into the page size table.
* - Min: GMMU_PAGE_SIZE_SMALL
* - Max: GMMU_PAGE_SIZE_KERNEL
* @param kind_v [in] Kind attributes for mapping.
* @param ctag_offset [in] Size of the buffer in bytes.
* @param flags [in] Mapping flags.
* Min:NVGPU_VM_MAP_FIXED_OFFSET
* Max:NVGPU_VM_MAP_SYSTEM_COHERENT
* @param rw_flag [in] Flag designates the requested
* GMMU mapping.
* @param clear_ctags [in] True if ctags clear is required.
* @param sparse [in] True if the mapping should be
* sparse.
* @param priv [in] True if the mapping should be
* privileged.
* @param batch [in] Mapping_batch handle. Structure
* which track whether the L2 flush
* and TLB invalidate are required
* or not during map/unmap.
* @param aperture [in] Where the memory actually was
* allocated from.
*
* Locked version of GMMU Map routine:
* - Decodes the Mapping flags, rw_flag, priv and aperture for
* GMMU mapping.
* - Allocates a new GPU VA range for a specific size
* if vaddr is 0.
* #nvgpu_vm_alloc_va() reserves the GPU VA.
* - Program PDE and PTE entry with PA/IPA, mapping flags,
* rw_flag and aperture information.
* #nvgpu_gmmu_update_page_table does the pde and pte updates.
* - Chip specific stuff is handled at the PTE/PDE
* programming HAL layer.
* GMMU level entry format will be different for each
* GPU family (i.e, gv11b, gp10b).
* - Invalidates the GPU TLB, gm20b_fb_tlb_invalidate does the
* tlb invalidate.
*
* @return valid GMMU VA start address in case of success.
* @return 0 in case of all possible failures.
* Possible Failure cases:
* - No free GPU VA space (GPU VA space full).
* - TLB invalidate timeout.
*/
u64 (*map)(struct vm_gk20a *vm,
u64 map_offset,
struct nvgpu_sgt *sgt,
u64 buffer_offset,
u64 size,
u32 pgsz_idx,
u8 kind_v,
u32 ctag_offset,
u32 flags,
enum gk20a_mem_rw_flag rw_flag,
bool clear_ctags,
bool sparse,
bool priv,
struct vm_gk20a_mapping_batch *batch,
enum nvgpu_aperture aperture);
/**
* @brief Unmap #vaddr into the GPU address space described by #vm.
*
* @param vm [in] Pointer to virtual memory
* structure.
* @param vaddr [in] GPU virtual address.
* @param size [in] Size of the buffer in bytes.
* @param pgsz_idx [in] Index into the page size table.
* - Min: GMMU_PAGE_SIZE_SMALL
* - Max: GMMU_PAGE_SIZE_KERNEL
* @param va_allocated [in] Indicates if gpu_va address is
* valid/allocated.
* @param rw_flag [in] Flag designates the requested
* GMMU mapping.
* @param sparse [in] True if the mapping should be
* sparse.
* @param batch [in] Mapping_batch handle. Structure
* which track whether the L2 flush
* and TLB invalidate are required
* or not during map/unmap.
*
* Locked version of GMMU Unmap routine:
* - Free the reserved GPU VA space staring at @gpu_va.
* #nvgpu_vm_free_va does free the GPU VA space.
* - Program PDE and PTE entry with default information which is
* internally frees up the GPU VA space.
* - Chip specific stuff is handled at the PTE/PDE
* programming HAL layer.
* GMMU level entry format will be different for
* each GPU family (i.e, gv11b).
* - Flush the GPU L2. gv11b_mm_l2_flush does the L2 flush.
* - Invalidates the GPU TLB, #gm20b_fb_tlb_invalidate() does
* the tlb invalidate.
*/
void (*unmap)(struct vm_gk20a *vm,
u64 vaddr,
u64 size,
u32 pgsz_idx,
bool va_allocated,
enum gk20a_mem_rw_flag rw_flag,
bool sparse,
struct vm_gk20a_mapping_batch *batch);
/**
* @brief HAL to get the available big page sizes.
*
* @param g [in] The GPU.
*
* Get the available big page sizes:
* - Bitwise OR of all available big page sizes.
* - Big page size will be different for each GPU family
* (i.e. gv11b, tu104).
*
* @return Valid bitwise OR of all available big page sizes
* if big page support is enabled.
* @return 0 if big page support is disabled.
*/
u32 (*get_big_page_sizes)(void);
/**
* @brief HAL to get the default big page size in bytes.
*
* @param g [in] The GPU.
*
* Default big page size:
* - Big page size will be different for each GPU family
* (i.e. gv11b, tu104).
*
* @return Valid big page size if big page support is enabled.
* @return 0 if big page support is disabled.
*/
u32 (*get_default_big_page_size)(void);
/**
* @brief HAL to get the iommu physical bit position.
*
* @param g [in] The GPU.
*
* This HAL is used to get the iommu physical bit position.
*
* @return iommu physical bit position.
*/
u32 (*get_iommu_bit)(struct gk20a *g);
/**
* @brief HAL to get the tegra_raw physical bit position.
*
* @param g [in] The GPU.
*
* This HAL is used to get the tegra_raw physical bit position.
*
* @return tegra_raw physical bit position.
*/
u32 (*get_gpu_phys_tegra_raw_bit)(struct gk20a *g);
/**
* @brief HAL to convert from tegra_phys to gpu_phys.
*
* @param g [in] The GPU.
* @param attrs [in] Pointer to gmmu attributes.
* @param phys [in] Tegra physical address.
*
* This HAL is used to convert from tegra_phys to gpu_phys
* for GMMU programming.
*
* Notes:
* On Volta the GPU determines whether to do L3 allocation
* for a mapping by checking bit 36 of the phsyical address.
* So if a mapping should allocate lines in the L3 then
* this bit must be set.
*
* @return gpu physical address for GMMU programming.
*/
u64 (*gpu_phys_addr)(struct gk20a *g,
struct nvgpu_gmmu_attrs *attrs,
u64 phys);
};
struct gops_mm {
/**
* @brief HAL to initialize an internal structure which is used to
* track pd_cache.
*
* @param g [in] The GPU.
*
* Initialize the pd_cache:
* - Allocates the zero initialized memory area for #nvgpu_pd_cache.
* - Initializes the mutexes and list nodes for pd_cache
* tracking.
*
* @return 0 in case of success.
* @return -ENOMEM (< 0) in case of failure.
* Possible failure case:
* - Insufficient system memory (ENOMEM).
*/
int (*pd_cache_init)(struct gk20a *g);
/**
* @brief This HAL function initializes the Memory Management unit.
* @sa #nvgpu_init_mm_support().
*
* @param g [in] The GPU.
*
* MM init:
* - MM S/W init:
* - Resets the current pramin window index to 0.
* - Initializes the vidmem page allocator with size, flags and etc.
* - Allocates vidmem memory for acr blob from bootstrap region.
* - Creates the CE vidmem clear thread for vidmem clear operations
* during vidmem free.
* - Allocates memory for sysmem flush operation.
* - Initializes the GMMU virtual memory region for BAR1.
* - Allocates and initializes the BAR1 instance block.
* - Initializes the GMMU virtual memory region for PMU.
* - Allocates and initializes the PMU instance block.
* - Initializes the GMMU virtual memory region for CE.
* - Allocates the GMMU debug write and read buffer (4K size).
* - MM H/W setup:
* - Configures the GMMU debug buffer location in fbhubmmu register.
* - Enables the fbhubmmu mc interrupt.
* - Binds the BAR1 inst block and checks whether the bind
* operation is successful.
* - Flushes the FB. Then, waits for completion (by polling)
* upto polling timeout.
* - Configures the GMMU fault buffer location in fbhubmmu register.
*
* @return 0 in case of success, < 0 in case of failure.
* @retval Error returned by setup_sw and setup_hw routines.
* Possible failure cases:
* - Insufficient system memory (ENOMEM).
* - CPU polling timeout during FB flush operation (-EBUSY).
*/
int (*init_mm_support)(struct gk20a *g);
/**
* @brief HAL to suspend the Memory Management unit.
*
* @param g [in] The GPU.
*
* Suspend MM unit:
* - Pause the CE vidmem clear thread.
* - Flushes the FB and L2. Then, waits for completion (by polling)
* upto polling timeout.
* - Invalidate L2.
* - Disable the fbhubmmu mc interrupt.
* - Disable the mmu fault buffer h/w setup.
*
* @return 0 in case of success, < 0 in case of failure.
* Possible failure case:
* - CPU polling timeout during FB or L2 flush operation (-EBUSY).
*/
int (*mm_suspend)(struct gk20a *g);
/**
* @brief HAL to bind the virtual memory context to the given channel.
*
* @param vm [in] Pointer to virtual memory context.
* @param ch [in] Pointer to nvgpu_channel.
*
* Bind a channel:
* - Increment reference count of virtual memory context.
* - Assign the virtual memory context to channel's virtual
* memory context.
* - Program the different hardware blocks of GPU with addresses
* associated with virtual memory context.
*
* @return 0, always.
*/
int (*vm_bind_channel)(struct vm_gk20a *vm, struct nvgpu_channel *ch);
/**
* @brief HAL to setup the Memory Management hardware.
*
* @param g [in] The GPU.
*
* MM hardware setup:
* - Configures the GMMU debug buffer location in fbhubmmu register.
* - Enables the fbhubmmu mc interrupt.
* - Binds the BAR1 inst block and checks whether the bind
* operation is successful.
* - Flushes the FB. Then, waits for completion (by polling)
* upto polling timeout.
* - Configures the GMMU fault buffer location in fbhubmmu register.
*
* @return 0 in case of success, < 0 in case of failure.
* Possible failure cases:
* - Insufficient system memory (ENOMEM).
* - CPU polling timeout during FB flush operation (-EBUSY).
*/
int (*setup_hw)(struct gk20a *g);
/**
* @brief HAL to get the BAR1 aperture availability status.
*
* @param g [in] The GPU.
*
* BAR1 status:
* - false for gv11b.
*
* @return True if BAR1 aperture support is available.
* @return False if BAR1 aperture support is not available.
*/
bool (*is_bar1_supported)(struct gk20a *g);
/**
* @brief HAL to get the BAR2 virtual memory size in bytes.
*
* @param g [in] The GPU.
*
* - Return size needed for mapping the engine method buffers, VAB and
* fault buffers in bar2 vm.
*
* @return size in bytes.
*/
u32 (*bar2_vm_size)(struct gk20a *g);
/**
* @brief HAL to initialize the BAR2 virtual memory.
*
* @param g [in] The GPU.
*
* Initialize BAR2:
* - Initializes the GMMU virtual memory region for BAR2.
* - Allocates and initializes the BAR2 instance block.
*
* @return 0 in case of success, < 0 in case of failure.
* Possible failure case:
* - Insufficient system memory (ENOMEM).
*/
int (*init_bar2_vm)(struct gk20a *g);
/**
* @brief HAL to free the BAR2 virtual memory.
*
* @param g [in] The GPU.
*
* Free BAR2 VM:
* - Free the BAR2 instance block.
* - Free the BAR2 GMMU virtual memory region.
*/
void (*remove_bar2_vm)(struct gk20a *g);
/**
* @brief HAL to initialize the instance block memory for user channels.
*
* @param inst_block [in] Pointer to channel instance block
* memory.
* @param vm [in] Pointer to virtual memory context.
* @param big_page_size [in] Big page size supported by GMMU.
*
* Initializes the instance block memory:
* - Configures the pdb base, big page size in channel's instance block
* memory.
*/
void (*init_inst_block)(struct nvgpu_mem *inst_block,
struct vm_gk20a *vm, u32 big_page_size);
/**
* @brief HAL to initialize the instance block memory for GPU contexts
* for h/w units (fecs, pmu, hwpm, bar1, bar2, sec2, gsp and
* perfbuf).
*
* @param inst_block [in] Pointer to instance block memory.
* @param vm [in] Pointer to virtual memory context.
* @param big_page_size [in] Big page size supported by GMMU.
*
* Initializes the instance block memory:
* - Configures the pdb base, big page size and
* 0th sub context's pdb base in context's instance block memory.
*/
int (*init_inst_block_core)(struct nvgpu_mem *inst_block,
struct vm_gk20a *vm, u32 big_page_size);
/**
* @brief HAL to get the maximum flush retry counts.
*
* @param g [in] The GPU.
* @param op [in] Requested operations on various unit.
*
* Get the maximum retry flush counts (retry timer) for the
* following operations:
* - Flush the Frame Buffer memory.
* - L2 Cache Flush.
*
* These retries are specific to GPU hardware and vary based on
* size of the frame buffer memory.
*
* @return Maximum flush retry counts for a specific h/w operation.
*/
u32 (*get_flush_retries)(struct gk20a *g, enum nvgpu_flush_op op);
/**
* @brief HAL to get default virtual memory sizes.
*
* @param aperture_size [in] Pointer to aperture size.
* @param user_size [in] Pointer to user size.
* @param kernel_size [in] Pointer to kernel size.
*
* Number of bits for virtual address space can vary. This HAL is used
* to get default values for virtual address spaces.
*/
void (*get_default_va_sizes)(u64 *aperture_size,
u64 *user_size, u64 *kernel_size);
/** @cond DOXYGEN_SHOULD_SKIP_THIS */
u64 (*bar1_map_userd)(struct gk20a *g, struct nvgpu_mem *mem,
u32 offset);
int (*vm_as_alloc_share)(struct gk20a *g, struct vm_gk20a *vm);
void (*vm_as_free_share)(struct vm_gk20a *vm);
/** @endcond DOXYGEN_SHOULD_SKIP_THIS */
struct gops_mm_mmu_fault mmu_fault;
struct gops_mm_cache cache;
struct gops_mm_gmmu gmmu;
};
#endif