gpu: nvgpu: update doxygen for common.ltc unit

Update the documentation as per SWUD feedback for common.ltc unit.

JIRA NVGPU-6982

Change-Id: I0a8406791bef2094bcd2804546db46378a269bb3
Signed-off-by: Sagar Kamble <skamble@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2608663
(cherry picked from commit dd1198870b4dbef5d4731fd5d292188c268967b6)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2633960
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Sagar Kamble
2021-10-11 17:08:47 +05:30
committed by mobile promotions
parent 6a6562cd4d
commit da3c83cd5e
3 changed files with 414 additions and 97 deletions

View File

@@ -51,18 +51,17 @@ static int gm20b_ltc_wait_for_clean(struct gk20a *g)
/*
* Use 5ms - this should be sufficient time to flush the cache.
* On tegra, rough EMC BW available for old tegra chips (newer
* chips are strictly faster) can be estimated as follows:
* On tegra, rough EMC BW available can be estimated as follows:
*
* Lowest reasonable EMC clock speed will be around 102MHz on
* t124 for display enabled boards and generally fixed to max
* Lowest reasonable EMC clock speed will be around 204MHz on
* t234 for display enabled boards and generally fixed to max
* for non-display boards (since they are generally plugged in).
*
* Thus, the available BW is 64b * 2 * 102MHz = 1.3GB/s. Of that
* Thus, the available BW is 128B * 2 * 204MHz = ~52GB/s. Of that
* BW the GPU will likely get about half (display and overhead/
* utilization inefficiency eating the rest) so 650MB/s at
* utilization inefficiency eating the rest) so 26GB/s at
* worst. Assuming at most 1MB of GPU L2 cache (less for most
* chips) worst case is we take 1MB/650MB/s = 1.5ms.
* chips) worst case is we take 1MB/26GB/s = 38us.
*
* So 5ms timeout here should be more than sufficient.
*/

View File

@@ -42,10 +42,174 @@ struct gops_ltc_intr {
/**
* @brief ISR for handling ltc interrupts.
*
* @param g [in] Pointer to GPU driver struct.
* @param ltc [in] LTC unit number
* @param g [in] - The GPU driver struct.
* - The function does not perform
* validation of g parameter.
* @param ltc [in] - Index of LTC.
* - The function validates that
* ltc < g->ltc->ltc_count.
*
* This function handles ltc related ecc interrupts.
* - For each ltc slice \a slice from 0 to g->ltc->slices_per_ltc - 1:
* -# The L2 has SEC-DED protection on its data RAM and parity protection on the
* byte enables RAM.
* -# See <a href="https:/p4viewer.nvidia.com/get//hw/doc/gpu/ampere/ampere/design/Functional_Descriptions/Resiliency/Ampere_gpu_resiliency_ECC.docx</a> for details.
* -# Following PRI registers are used for controlling parity ECC and
* getting the status and information of ECC.
* -# Control:
* -# ECC_CONTROL
* -# Error status and information:
* -# ECC_STATUS
* -# ECC_ADDRESS
* -# ECC_CORRECTED_ERR_COUNT
* -# ECC_UNCORRECTED_ERR_COUNT
* -# Detect and handle ECC PARITY errors and SEC-DED errors.
* SEC errors are reported as DSTG corrected errors and
* DED errors are reported as DSTG uncorrected errors.
* Below are the supported errors:
* -# UNCORRECTED_ERR_RSTG - signals a parity error in RSTG RAMS, for now only CBC RAMS
* -# UNCORRECTED_ERR_TSTG - signals a parity error in TSTG RAMS
* -# UNCORRECTED_ERR_DSTG - signals a parity error in DSTG RAMS, non-data RAMS
* and DED in data RAMS.
* -# CORRECTED_ERR_DSTG - signals an ecc corrected error in DSTG data RAMS (SEC)
* -# Read ltc_ltc0_lts0_intr3_r() register corresponding to the slice adding the offset:
* \f$(ltc * GPU\_LIT\_LTC\_STRIDE) + (slice * GPU\_LIT\_LTS\_STRIDE)\f$
* -# Check if ltc_ltcs_ltss_intr3_ecc_uncorrected_m() or
* ltc_ltcs_ltss_intr3_ecc_corrected_m() is set in
* ltc_ltc0_lts0_intr3_r() register read above.
* If so, handle as below:
* -# Read following registers for the slice:
* -# ecc status register: ltc_ltc0_lts0_l2_cache_ecc_status_r()
* -# ecc address register: ltc_ltc0_lts0_l2_cache_ecc_address_r()
* -# ecc uncorrected count register:
* ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r()
* -# ecc corrected count register:
* ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r()
* -# Calculate counter delta by applying
* ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_v()
* to uncorrected count register read above.
* -# Check if the uncorrected count overflow happened by AND'ing ecc status
* read above with ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_total_counter_overflow_m().
* -# Reset the counter ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r()
* to zero if the counter delta is non-zero or if there is overflow.
* -# Calculate counter delta by applying
* ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_v()
* to corrected count register read above.
* -# Check if the corrected count overflow happened by AND'ing ecc status
* read above with ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_total_counter_overflow_m().
* -# Reset the counter ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r() to zero if
* the counter delta is non-zero or if there is overflow.
* -# Reset the counter ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r() to zero if
* the counter delta is non-zero or if there is overflow.
* -# Write ltc_ltc0_lts0_l2_cache_ecc_status_reset_task_f() to
* ltc_ltc0_lts0_l2_cache_ecc_status_r() to reset the entire register.
* -# Add to the uncorrected counter delta
* BIT32(ltc_ltc0_lts0_l2_cache_ecc_ununcorrected_err_count_total_s())
* if there is overflow.
* -# Add to the corrected counter delta
* BIT32(ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_s())
* if there is overflow.
* -# Handle ecc errors for subunits (part of the L2 slice detected an error).
* There are three subunits. Pass below parameters to these units:
* -# \a g
* -# \a ltc
* -# \a slice
* -# ecc status read
* -# ecc address read
* -# uncorrected delta
* -# corrected delta (This is passed to only DSTG ECC handling function)
*
* ECC error handling for subunits is given below:
* -# r-stg : the input command queues and the compression bit cache
* -# If ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_rstg_m() is
* set in ecc status:
* -# Increment g->ecc.ltc.rstg_ecc_parity_count[\a ltc][\a slice].counter
* with uncorrected counter delta with
* \ref nvgpu_wrapping_add_u32 "nvgpu_wrapping_add_u32".
* -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_ecc_err
* "nvgpu_report_ecc_err" with following parameters:
* -# \a g
* -# \ref NVGPU_ERR_MODULE_LTC "NVGPU_ERR_MODULE_LTC"
* -# (\a ltc << 8U) | \a slice
* -# \ref GPU_LTC_CACHE_RSTG_ECC_UNCORRECTED
* "GPU_LTC_CACHE_RSTG_ECC_UNCORRECTED"
* -# ecc address read above
* -# g->ecc.ltc.rstg_ecc_parity_count[\a ltc][\a slice].counter
* -# If ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m() is
* set in ecc status, then it is considered as fatal error as it is not
* expected and call \ref BUG "BUG()".
* -# t-stg : tag lookup and miss fifos
* -# If ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_tstg_m() is
* set in ecc status:
* -# Increment g->ecc.ltc.tstg_ecc_parity_count[\a ltc][\a slice].counter
* with uncorrected counter delta with
* \ref nvgpu_wrapping_add_u32 "nvgpu_wrapping_add_u32".
* -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_ecc_err
* "nvgpu_report_ecc_err" with following parameters:
* -# \a g
* -# \ref NVGPU_ERR_MODULE_LTC "NVGPU_ERR_MODULE_LTC"
* -# (\a ltc << 8U) | \a slice
* -# \ref GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED
* "GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED"
* -# ecc address read above
* -# g->ecc.ltc.tstg_ecc_parity_count[\a ltc][\a slice].counter
* -# If ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_m() is
* set in ecc status, then it is considered as fatal error as it is not
* expected and call \ref BUG "BUG()".
* -# d-stg : sram data banks and write data queues
* -# If ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_dstg_m() is
* set in ecc status:
* -# The correctable data ram errors are SEC errors.
* -# Increment g->ecc.ltc.ecc_sec_count[\a ltc][\a slice].counter
* with corrected counter delta with
* \ref nvgpu_wrapping_add_u32 "nvgpu_wrapping_add_u32".
* -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_ecc_err
* "nvgpu_report_ecc_err" with following parameters:
* -# \a g
* -# \ref NVGPU_ERR_MODULE_LTC "NVGPU_ERR_MODULE_LTC"
* -# (\a ltc << 8U) | \a slice
* -# \ref GPU_LTC_CACHE_DSTG_ECC_CORRECTED
* "GPU_LTC_CACHE_DSTG_ECC_CORRECTED"
* -# ecc address read above.
* -# g->ecc.ltc.ecc_sec_count[\a ltc][\a slice].counter
* -# Flush the L2 cache by calling
* \ref gops_mm_cache.l2_flush "gops_mm_cache.l2_flush".
* -# If it fails then call \ref BUG "BUG()".
* -# If ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_dstg_m() is
* set in ecc status:
* -# The uncorrectable data ram errors are reported with the dstg non-data
* ram parity errors in the UNCORRECTED_ERR_DSTG field.
* -# Check if the ECC address corresponds to data ram:
* -# Increment g->ecc.ltc.ecc_ded_count[\a ltc][\a slice].counter
* with uncorrected counter delta with
* \ref nvgpu_wrapping_add_u32 "nvgpu_wrapping_add_u32".
* -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_ecc_err
* "nvgpu_report_ecc_err" with following parameters:
* -# \a g
* -# \ref NVGPU_ERR_MODULE_LTC "NVGPU_ERR_MODULE_LTC"
* -# (\a ltc << 8U) | \a slice
* -# \ref GPU_LTC_CACHE_DSTG_ECC_UNCORRECTED
* "GPU_LTC_CACHE_DSTG_ECC_UNCORRECTED"
* -# ecc address read above.
* -# g->ecc.ltc.ecc_ded_count[\a ltc][\a slice].counter
* -# Else if the ECC address correspongs to DSTG BE RAM:
* -# Increment g->ecc.ltc.dstg_be_ecc_parity_count[\a ltc][\a slice].counter
* with uncorrected counter delta with
* \ref nvgpu_wrapping_add_u32 "nvgpu_wrapping_add_u32".
* -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_ecc_err
* "nvgpu_report_ecc_err" with following parameters:
* -# \a g
* -# \ref NVGPU_ERR_MODULE_LTC "NVGPU_ERR_MODULE_LTC"
* -# (\a ltc << 8U) | \a slice
* -# \ref GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED
* "GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED"
* -# ecc address read above
* -# g->ecc.ltc.dstg_be_ecc_parity_count[\a ltc][\a slice].counter
* -# Else call \ref BUG "BUG()" as this type of ECC error is not supported.
* -# Clear the register ltc_ltc0_lts0_intr3_r() by writing the read value.
* - return 0
*
* @return 0 in case of success, < 0 in case of failure.
* @retval -ENODEV if invalid LTC number specified.
*/
void (*isr)(struct gk20a *g, u32 ltc);
@@ -68,13 +232,72 @@ struct gops_ltc_intr {
*/
struct gops_ltc {
/**
* @brief Initialize LTC support.
* @brief Initialize Level Two Cache (LTC) support.
*
* @param g [in] Pointer to GPU driver struct.
* @param g [in] - The GPU driver struct.
* - The function does not perform validation
* of g parameter.
*
* This function reads ltc unit info from GPU h/w and stores
* it in #nvgpu_ltc structure. This function also initializes
* LTC unit ecc counters.
* LTC unit ecc counters. Steps are given below:
*
* - Allocate memory for g->ltc.
* - Initialize LTC floorsweep state by calling the hal
* \ref gops_ltc.init_fs_state "gops_ltc.init_fs_state" with parameter \a g.
* - Initialize g->ltc->max_ltc_count with value returned by calling
* \ref gops_top.get_num_ltcs "g->ops.top.get_num_ltcs" with parameter \a g.
* - Initialize g->ltc->ltc_count with value returned by calling
* \ref gops_priv_ring.enum_ltc "g->ops.priv_ring.enum_ltc" with parameter \a g.
* - Initialize g->ltc->slices_per_ltc with value obtained by applying
* ltc_ltcs_ltss_cbc_param_slices_per_ltc_v() to register value read
* for the register ltc_ltcs_ltss_cbc_param_r().
* - Initialize g->ltc->cacheline_size with value obtained by shifting 512 to left by
* the shift value obtained by applying ltc_ltcs_ltss_cbc_param_cache_line_size_v()
* to register value read for the register ltc_ltcs_ltss_cbc_param_r().
* - The L2 cache (LTC) has SEC-DED ECC protection on its data RAM and parity protection
* for byte enables.
* - Initialize ECC counters for LTCs. On ga10b there are 2 LTC and each LTC has 2 slices.
* For each following counters are initialized:
* -# ECC SEC count
* -# ECC DED count
* -# RSTG ECC parity count
* -# TSTG ECC parity count
* -# DSTG BE ECC parity count
* See also \ref gops_ltc.intr.isr "gops_ltc.intr.isr".
* - Enable stalling interrupt for LTC unit.
* -# Enable interrupts at MC level: call #nvgpu_mc_intr_stall_unit_config by passing
* below parameters:
* -# \a g
* -# #MC_INTR_UNIT_LTC
* -# #MC_INTR_ENABLE
* -# Enable interrupts at unit level.
* The L2 interrupts controlled by ltc_ltcs_ltss_intr_r() register are only enabled
* by nvgpu. Various L2 interrupts are:
* -# IDLE_ERROR_CBC - flag if cbc gets a request while slcg clock is disabled
* -# IDLE_ERROR_TSTG - flag if tstg gets a request while slcg clock is disabled
* -# IDLE_ERROR_DSTG - flag if dstg gets a request while slcg clock is disabled
* -# EVICTED_CB - indicates that a CB was demoted. Normally this should not happen
* because the CBs should be flushed during context switch and/or
* invalidated when no longer used.
* -# ILLEGAL_COMPSTAT - indicates an unexpected compression status given the kind.
* -# BLOCKLINEAR_CB - indicates that a valid evict_last entry is accessed by a
* block linear transaction.
* -# ECC_SEC_ERROR - single bit error in data banks. Obsolete.
* -# ECC_DED_ERROR - double bit error in data banks. Obsolete.
* -# DEBUG - unused
* -# ATOMIC_TO_Z - atomic to packing Z or S8.
* -# ILLEGAL_ATOMIC - unsupported atomic op and/or size received.
* -# BLKACTIVITY_ERR - internal error in power sensing block activity monitor
* -# ILLEGAL_COMPSTAT_ACCESS - indicates that some memory access read/wrote into
* the memory space reserved for the compression bit
* carveout (Bug 942161)
* -# ILLEGAL_ROP_ACCESS - zwr or cwr is scrubbed
*
*
* Of these, EVICTED_CB and ILLEGAL_COMPSTAT_ACCESS are disabled to reduce noise
* and increase performance. Rest of the interrupts are kept in hardware
* initialized state.
*
* @return 0 in case of success, < 0 in case of failure.
* @retval -ENOMEM if memory allocation fails for #nvgpu_ltc.
@@ -84,20 +307,38 @@ struct gops_ltc {
/**
* @brief Remove LTC support.
*
* @param g [in] Pointer to GPU driver struct.
* @param g [in] - The GPU driver struct.
* - The function does not perform validation
* of g parameter.
*
* This function will free memory allocated for #nvgpu_ltc structure.
* Steps are given below:
*
* - If g->ltc is NULL return.
* - Free g->ltc.
* - Set g->ltc to NULL.
*/
void (*ltc_remove_support)(struct gk20a *g);
/**
* @brief Returns GPU L2 cache size.
*
* @param g [in] Pointer to GPU driver struct.
* @param g [in] - The GPU driver struct.
* - The function does not perform validation
* of g parameter.
*
* This function returns GPU L2 cache size by reading h/w ltc
* This function returns GPU L2 cache size by reading HW ltc
* config register.
*
* - Read register ltc_ltc0_lts0_tstg_info_1_r().
* - Get slice_size by applying ltc_ltc0_lts0_tstg_info_1_slice_size_in_kb_v()
* to the register value read above.
* - Get slices_per_l2 by applying ltc_ltc0_lts0_tstg_info_1_slices_per_l2_v()
* to the register value read in 1st step.
* - Calculate the size as:
* \f$ g->ltc->ltc\_count * slices\_per\_l2 * (slice\_size * 1024) \f$
* - Return the size.
*
* @return Size of L2 cache in bytes.
*/
u64 (*determine_L2_size_bytes)(struct gk20a *g);
@@ -105,17 +346,67 @@ struct gops_ltc {
/**
* @brief Flush GPU L2 cache.
*
* @param g [in] Pointer to GPU driver struct.
* @param g [in] - The GPU driver struct.
* - The function does not perform validation
* of g parameter.
*
* This function flushes all L2 cache data to main memory by cleaning
* and invaliding all cache sub-units. s/w will poll for completion of
* each ltc unit cache cleaning/invalidation for 5 msec. This 5 msec
* time out is based on following calculations:
* Lowest EMC clock rate will be around 102MHz and thus available
* bandwidth is 64b * 2 * 102MHz = 1.3GB/s. Of that bandwidth, GPU
* will likely get about half, so 650MB/s at worst. Assuming at most
* 1MB of GPU L2 cache, worst case it will take 1MB/650MB/s = 1.5ms.
* and invalidating all cache sub-units. SW will poll for completion
* of each ltc unit cache cleaning/invalidation for 5ms.
*
* The 5ms timeout is based on following calculations:
* Lowest EMC clock rate will be around 204MHz and thus available
* bandwidth is 128B (Cacheline size) * 2 (LTCs) * 204MHz = ~52GB/s.
* Of that bandwidth, GPU will likely get about half, so 26GB/s
* at worst. Assuming at most 1MB of GPU L2 cache, worst case
* it will take 1MB/26GB/s = 38us.
* So 5ms timeout here should be more than enough.
*
* - First stage is to clean the LTCs with the below write:
* \code
* nvgpu_writel(g, ltc_ltcs_ltss_tstg_cmgmt1_r(),
* ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_f() |
* ltc_ltcs_ltss_tstg_cmgmt1_max_cycles_between_cleans_3_f() |
* ltc_ltcs_ltss_tstg_cmgmt1_clean_wait_for_fb_to_pull_true_f() |
* ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_last_class_true_f() |
* ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_normal_class_true_f() |
* ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_first_class_true_f());
* \endcode
* - This cleans all LTCs.
* - For each LTC, wait for clean to finish for 5ms.
* -# Initialize poll timer with timeout of 5ms by calling
* \ref nvgpu_timeout_init "nvgpu_timeout_init"
* with below parameters:
* -# \a g
* -# local timeout variable
* -# 5
* -# \ref NVGPU_TIMER_CPU_TIMER "NVGPU_TIMER_CPU_TIMER"
* -# do while LTCs are not cleared or timeout is not expired
* -# Read ltc_ltc0_ltss_tstg_cmgmt1_r() corresponding to the LTC.
* The offset is calculated as:
* \f$ltc\_ltc0\_ltss\_tstg\_cmgmt1\_r() + (ltc * GPU\_LIT\_LTC\_STRIDE)\f$
* -# Check if ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f() is cleared.
* - Second stage is to invalidate the LTCs with the below write:
* \code
* nvgpu_writel(g, ltc_ltcs_ltss_tstg_cmgmt0_r(),
* ltc_ltcs_ltss_tstg_cmgmt0_invalidate_pending_f() |
* ltc_ltcs_ltss_tstg_cmgmt0_max_cycles_between_invalidates_3_f() |
* ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_last_class_true_f() |
* ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_normal_class_true_f() |
* ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_first_class_true_f());
* \endcode
* - This invalidates all LTCs.
* - For each LTC, wait for invalidate to finish for 5ms.
* -# Initialize poll timer with timeout of 5ms by calling
* \ref nvgpu_timeout_init "nvgpu_timeout_init"
* with below parameters:
* -# \a g
* -# local timeout variable
* -# 5
* -# \ref NVGPU_TIMER_CPU_TIMER "NVGPU_TIMER_CPU_TIMER"
* -# do while LTCs are not cleared or timeout is not expired
* -# Read ltc_ltc0_ltss_tstg_cmgmt0_r() corresponding to the LTC.
* -# Check if ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f() is cleared.
*/
void (*flush)(struct gk20a *g);

View File

@@ -55,6 +55,105 @@ struct nvgpu_ltc {
u32 cacheline_size;
};
/**
* @brief Get enumerated ltcs count.
*
* @param g [in] - The GPU driver struct.
* - The function does not perform validation
* of g parameter.
*
* This function returns enumerated number of ltcs after floorsweeping.
* After floorsweeping enumerated ltcs may be less than maximum ltcs available.
*
* - Return value of g->ltc->ltc_count.
*
* @return Number of enumerated ltc count.
*/
u32 nvgpu_ltc_get_ltc_count(struct gk20a *g);
/**
* @brief Get slices per ltc.
*
* @param g [in] - The GPU driver struct.
* - The function does not perform validation
* of g parameter.
*
* This function returns slices per ltc.
* Each ltc unit is constituted by h/w configured multiple physical slices.
* Clients can use slice size info to make their cache requirement to
* a slice for better bandwidth and/or utilization.
*
* - Return value of g->ltc->slices_per_ltc.
*
* @return Number of slices per ltc.
*/
u32 nvgpu_ltc_get_slices_per_ltc(struct gk20a *g);
/**
* @brief Get cacheline size.
*
* @param g [in] - The GPU driver struct.
* - The function does not perform validation
* of g parameter.
*
* This function returns cacheline size in bytes.
* Cacheline is chunk of memory that can be handled in one go by cache.
* Cacheline size is configured as multiple of 512 bytes in h/w.
*
* - Return value of g->ltc->cacheline_size.
*
* @return Cacheline size in bytes.
*/
u32 nvgpu_ltc_get_cacheline_size(struct gk20a *g);
#define NVGPU_L2_SECTOR_PROMOTE_FLAG_NONE (1U << 0U)
#define NVGPU_L2_SECTOR_PROMOTE_FLAG_64B (1U << 1U)
#define NVGPU_L2_SECTOR_PROMOTE_FLAG_128B (1U << 2U)
#define NVGPU_L2_SECTOR_PROMOTE_FLAG_INVALID (1U << 3U)
/**
* @brief Release all LTC ECC stats counters.
*
* @param g [in] - The GPU driver struct.
* - The function does not perform validation
* of g parameter.
*
* Frees all error counters associated with the LTC unit.
*
* - For each ltc from 0 to \ref nvgpu_ltc_get_ltc_count "nvgpu_ltc_get_ltc_count(g)" - 1:
* - Free dynamically allocated memory for following ECC counters for slices: SEC, DED,
* RSTG parity, TSTG parity, DSTG parity.
* - Free container of the ECC counters for the LTCs.
*
*/
void nvgpu_ltc_ecc_free(struct gk20a *g);
/** @cond DOXYGEN_SHOULD_SKIP_THIS */
/**
* @brief Initialize #nvgpu_ltc structure.
*
* @param g [in] Pointer to GPU driver struct.
*
* This function reads ltc unit info from GPU h/w and stores
* it in #nvgpu_ltc structure. This function allocates memory
* to track the ecc error counts for the LTC unit and enables
* LTC unit interrupts and stalling interrupt at MC level.
*
* @return 0 in case of success, < 0 in case of failure.
* @retval -ENOMEM if memory allocation for #nvgpu_ltc fails.
*/
int nvgpu_init_ltc_support(struct gk20a *g);
/**
* @brief Remove support for LTC.
*
* @param g [in] Pointer to GPU driver struct.
*
* This function will free memory allocated for #nvgpu_ltc structure.
* LTC unit data will be no longer accessible by s/w.
*/
void nvgpu_ltc_remove_support(struct gk20a *g);
/**
* @brief Allocate and initialize a error counters for all ltc-lts instances.
*
@@ -81,79 +180,7 @@ int nvgpu_ecc_counter_init_per_lts(struct gk20a *g,
#define NVGPU_ECC_COUNTER_INIT_PER_LTS(stat) \
nvgpu_ecc_counter_init_per_lts(g, &g->ecc.ltc.stat, #stat)
#define NVGPU_L2_SECTOR_PROMOTE_FLAG_NONE (1U << 0U)
#define NVGPU_L2_SECTOR_PROMOTE_FLAG_64B (1U << 1U)
#define NVGPU_L2_SECTOR_PROMOTE_FLAG_128B (1U << 2U)
#define NVGPU_L2_SECTOR_PROMOTE_FLAG_INVALID (1U << 3U)
/**
* @brief Release all LTC ECC stats counters.
*
* @param g [in] The GPU driver struct.
*
* Frees all error counters associated with the LTC unit.
*/
void nvgpu_ltc_ecc_free(struct gk20a *g);
/**
* @brief Initialize #nvgpu_ltc structure.
*
* @param g [in] Pointer to GPU driver struct.
*
* This function reads ltc unit info from GPU h/w and stores
* it in #nvgpu_ltc structure. This function allocates memory
* to track the ecc error counts for the LTC unit and enables
* LTC unit interrupts and stalling interrupt at MC level.
*
* @return 0 in case of success, < 0 in case of failure.
* @retval -ENOMEM if memory allocation for #nvgpu_ltc fails.
*/
int nvgpu_init_ltc_support(struct gk20a *g);
/**
* @brief Remove support for LTC.
*
* @param g [in] Pointer to GPU driver struct.
*
* This function will free memory allocated for #nvgpu_ltc structure.
* LTC unit data will be no longer accessible by s/w.
*/
void nvgpu_ltc_remove_support(struct gk20a *g);
/**
* @brief Get enumerated ltcs count.
*
* @param g [in] Pointer to GPU driver struct.
*
* This function returns enumerated number of ltcs after floorsweeping.
* After floorsweeping enumerated ltcs may be less than maximum ltcs available.
*
* @return Number of enumerated ltc count.
*/
u32 nvgpu_ltc_get_ltc_count(struct gk20a *g);
/**
* @brief Get slices per ltc.
*
* @param g [in] Pointer to GPU driver struct.
*
* This function returns slices per ltc.
* Each ltc unit is constituted by h/w configured multiple physical slices.
* Clients can use slice size info to make their cache requirement to
* a slice for better bandwidth and/or utilization.
*
* @return Number of slices per ltc.
*/
u32 nvgpu_ltc_get_slices_per_ltc(struct gk20a *g);
/**
* @brief Get cacheline size.
*
* @param g [in] Pointer to GPU driver struct.
*
* This function returns cacheline size in bytes.
* Cacheline is chunk of memory that can be handled in one go by cache.
* Cacheline size is configured as multiple of 512 bytes in h/w.
*
* @return Cacheline size in bytes.
*/
u32 nvgpu_ltc_get_cacheline_size(struct gk20a *g);
/** @endcond DOXYGEN_SHOULD_SKIP_THIS */
#if defined(CONFIG_NVGPU_NON_FUSA) || defined(CONFIG_NVGPU_KERNEL_MODE_SUBMIT)
/**