mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 09:12:24 +03:00
gpu: nvgpu: update doxygen for common.ltc unit
Update the documentation as per SWUD feedback for common.ltc unit. JIRA NVGPU-6982 Change-Id: I0a8406791bef2094bcd2804546db46378a269bb3 Signed-off-by: Sagar Kamble <skamble@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2608663 (cherry picked from commit dd1198870b4dbef5d4731fd5d292188c268967b6) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2633960 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
6a6562cd4d
commit
da3c83cd5e
@@ -51,18 +51,17 @@ static int gm20b_ltc_wait_for_clean(struct gk20a *g)
|
||||
|
||||
/*
|
||||
* Use 5ms - this should be sufficient time to flush the cache.
|
||||
* On tegra, rough EMC BW available for old tegra chips (newer
|
||||
* chips are strictly faster) can be estimated as follows:
|
||||
* On tegra, rough EMC BW available can be estimated as follows:
|
||||
*
|
||||
* Lowest reasonable EMC clock speed will be around 102MHz on
|
||||
* t124 for display enabled boards and generally fixed to max
|
||||
* Lowest reasonable EMC clock speed will be around 204MHz on
|
||||
* t234 for display enabled boards and generally fixed to max
|
||||
* for non-display boards (since they are generally plugged in).
|
||||
*
|
||||
* Thus, the available BW is 64b * 2 * 102MHz = 1.3GB/s. Of that
|
||||
* Thus, the available BW is 128B * 2 * 204MHz = ~52GB/s. Of that
|
||||
* BW the GPU will likely get about half (display and overhead/
|
||||
* utilization inefficiency eating the rest) so 650MB/s at
|
||||
* utilization inefficiency eating the rest) so 26GB/s at
|
||||
* worst. Assuming at most 1MB of GPU L2 cache (less for most
|
||||
* chips) worst case is we take 1MB/650MB/s = 1.5ms.
|
||||
* chips) worst case is we take 1MB/26GB/s = 38us.
|
||||
*
|
||||
* So 5ms timeout here should be more than sufficient.
|
||||
*/
|
||||
|
||||
@@ -42,10 +42,174 @@ struct gops_ltc_intr {
|
||||
/**
|
||||
* @brief ISR for handling ltc interrupts.
|
||||
*
|
||||
* @param g [in] Pointer to GPU driver struct.
|
||||
* @param ltc [in] LTC unit number
|
||||
* @param g [in] - The GPU driver struct.
|
||||
* - The function does not perform
|
||||
* validation of g parameter.
|
||||
* @param ltc [in] - Index of LTC.
|
||||
* - The function validates that
|
||||
* ltc < g->ltc->ltc_count.
|
||||
*
|
||||
* This function handles ltc related ecc interrupts.
|
||||
* - For each ltc slice \a slice from 0 to g->ltc->slices_per_ltc - 1:
|
||||
* -# The L2 has SEC-DED protection on its data RAM and parity protection on the
|
||||
* byte enables RAM.
|
||||
* -# See <a href="https:/p4viewer.nvidia.com/get//hw/doc/gpu/ampere/ampere/design/Functional_Descriptions/Resiliency/Ampere_gpu_resiliency_ECC.docx</a> for details.
|
||||
* -# Following PRI registers are used for controlling parity ECC and
|
||||
* getting the status and information of ECC.
|
||||
* -# Control:
|
||||
* -# ECC_CONTROL
|
||||
* -# Error status and information:
|
||||
* -# ECC_STATUS
|
||||
* -# ECC_ADDRESS
|
||||
* -# ECC_CORRECTED_ERR_COUNT
|
||||
* -# ECC_UNCORRECTED_ERR_COUNT
|
||||
* -# Detect and handle ECC PARITY errors and SEC-DED errors.
|
||||
* SEC errors are reported as DSTG corrected errors and
|
||||
* DED errors are reported as DSTG uncorrected errors.
|
||||
* Below are the supported errors:
|
||||
* -# UNCORRECTED_ERR_RSTG - signals a parity error in RSTG RAMS, for now only CBC RAMS
|
||||
* -# UNCORRECTED_ERR_TSTG - signals a parity error in TSTG RAMS
|
||||
* -# UNCORRECTED_ERR_DSTG - signals a parity error in DSTG RAMS, non-data RAMS
|
||||
* and DED in data RAMS.
|
||||
* -# CORRECTED_ERR_DSTG - signals an ecc corrected error in DSTG data RAMS (SEC)
|
||||
* -# Read ltc_ltc0_lts0_intr3_r() register corresponding to the slice adding the offset:
|
||||
* \f$(ltc * GPU\_LIT\_LTC\_STRIDE) + (slice * GPU\_LIT\_LTS\_STRIDE)\f$
|
||||
* -# Check if ltc_ltcs_ltss_intr3_ecc_uncorrected_m() or
|
||||
* ltc_ltcs_ltss_intr3_ecc_corrected_m() is set in
|
||||
* ltc_ltc0_lts0_intr3_r() register read above.
|
||||
* If so, handle as below:
|
||||
* -# Read following registers for the slice:
|
||||
* -# ecc status register: ltc_ltc0_lts0_l2_cache_ecc_status_r()
|
||||
* -# ecc address register: ltc_ltc0_lts0_l2_cache_ecc_address_r()
|
||||
* -# ecc uncorrected count register:
|
||||
* ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r()
|
||||
* -# ecc corrected count register:
|
||||
* ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r()
|
||||
* -# Calculate counter delta by applying
|
||||
* ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_v()
|
||||
* to uncorrected count register read above.
|
||||
* -# Check if the uncorrected count overflow happened by AND'ing ecc status
|
||||
* read above with ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_total_counter_overflow_m().
|
||||
* -# Reset the counter ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r()
|
||||
* to zero if the counter delta is non-zero or if there is overflow.
|
||||
* -# Calculate counter delta by applying
|
||||
* ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_v()
|
||||
* to corrected count register read above.
|
||||
* -# Check if the corrected count overflow happened by AND'ing ecc status
|
||||
* read above with ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_total_counter_overflow_m().
|
||||
* -# Reset the counter ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r() to zero if
|
||||
* the counter delta is non-zero or if there is overflow.
|
||||
* -# Reset the counter ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r() to zero if
|
||||
* the counter delta is non-zero or if there is overflow.
|
||||
* -# Write ltc_ltc0_lts0_l2_cache_ecc_status_reset_task_f() to
|
||||
* ltc_ltc0_lts0_l2_cache_ecc_status_r() to reset the entire register.
|
||||
* -# Add to the uncorrected counter delta
|
||||
* BIT32(ltc_ltc0_lts0_l2_cache_ecc_ununcorrected_err_count_total_s())
|
||||
* if there is overflow.
|
||||
* -# Add to the corrected counter delta
|
||||
* BIT32(ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_s())
|
||||
* if there is overflow.
|
||||
* -# Handle ecc errors for subunits (part of the L2 slice detected an error).
|
||||
* There are three subunits. Pass below parameters to these units:
|
||||
* -# \a g
|
||||
* -# \a ltc
|
||||
* -# \a slice
|
||||
* -# ecc status read
|
||||
* -# ecc address read
|
||||
* -# uncorrected delta
|
||||
* -# corrected delta (This is passed to only DSTG ECC handling function)
|
||||
*
|
||||
* ECC error handling for subunits is given below:
|
||||
* -# r-stg : the input command queues and the compression bit cache
|
||||
* -# If ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_rstg_m() is
|
||||
* set in ecc status:
|
||||
* -# Increment g->ecc.ltc.rstg_ecc_parity_count[\a ltc][\a slice].counter
|
||||
* with uncorrected counter delta with
|
||||
* \ref nvgpu_wrapping_add_u32 "nvgpu_wrapping_add_u32".
|
||||
* -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_ecc_err
|
||||
* "nvgpu_report_ecc_err" with following parameters:
|
||||
* -# \a g
|
||||
* -# \ref NVGPU_ERR_MODULE_LTC "NVGPU_ERR_MODULE_LTC"
|
||||
* -# (\a ltc << 8U) | \a slice
|
||||
* -# \ref GPU_LTC_CACHE_RSTG_ECC_UNCORRECTED
|
||||
* "GPU_LTC_CACHE_RSTG_ECC_UNCORRECTED"
|
||||
* -# ecc address read above
|
||||
* -# g->ecc.ltc.rstg_ecc_parity_count[\a ltc][\a slice].counter
|
||||
* -# If ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m() is
|
||||
* set in ecc status, then it is considered as fatal error as it is not
|
||||
* expected and call \ref BUG "BUG()".
|
||||
* -# t-stg : tag lookup and miss fifos
|
||||
* -# If ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_tstg_m() is
|
||||
* set in ecc status:
|
||||
* -# Increment g->ecc.ltc.tstg_ecc_parity_count[\a ltc][\a slice].counter
|
||||
* with uncorrected counter delta with
|
||||
* \ref nvgpu_wrapping_add_u32 "nvgpu_wrapping_add_u32".
|
||||
* -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_ecc_err
|
||||
* "nvgpu_report_ecc_err" with following parameters:
|
||||
* -# \a g
|
||||
* -# \ref NVGPU_ERR_MODULE_LTC "NVGPU_ERR_MODULE_LTC"
|
||||
* -# (\a ltc << 8U) | \a slice
|
||||
* -# \ref GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED
|
||||
* "GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED"
|
||||
* -# ecc address read above
|
||||
* -# g->ecc.ltc.tstg_ecc_parity_count[\a ltc][\a slice].counter
|
||||
* -# If ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_m() is
|
||||
* set in ecc status, then it is considered as fatal error as it is not
|
||||
* expected and call \ref BUG "BUG()".
|
||||
* -# d-stg : sram data banks and write data queues
|
||||
* -# If ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_dstg_m() is
|
||||
* set in ecc status:
|
||||
* -# The correctable data ram errors are SEC errors.
|
||||
* -# Increment g->ecc.ltc.ecc_sec_count[\a ltc][\a slice].counter
|
||||
* with corrected counter delta with
|
||||
* \ref nvgpu_wrapping_add_u32 "nvgpu_wrapping_add_u32".
|
||||
* -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_ecc_err
|
||||
* "nvgpu_report_ecc_err" with following parameters:
|
||||
* -# \a g
|
||||
* -# \ref NVGPU_ERR_MODULE_LTC "NVGPU_ERR_MODULE_LTC"
|
||||
* -# (\a ltc << 8U) | \a slice
|
||||
* -# \ref GPU_LTC_CACHE_DSTG_ECC_CORRECTED
|
||||
* "GPU_LTC_CACHE_DSTG_ECC_CORRECTED"
|
||||
* -# ecc address read above.
|
||||
* -# g->ecc.ltc.ecc_sec_count[\a ltc][\a slice].counter
|
||||
* -# Flush the L2 cache by calling
|
||||
* \ref gops_mm_cache.l2_flush "gops_mm_cache.l2_flush".
|
||||
* -# If it fails then call \ref BUG "BUG()".
|
||||
* -# If ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_dstg_m() is
|
||||
* set in ecc status:
|
||||
* -# The uncorrectable data ram errors are reported with the dstg non-data
|
||||
* ram parity errors in the UNCORRECTED_ERR_DSTG field.
|
||||
* -# Check if the ECC address corresponds to data ram:
|
||||
* -# Increment g->ecc.ltc.ecc_ded_count[\a ltc][\a slice].counter
|
||||
* with uncorrected counter delta with
|
||||
* \ref nvgpu_wrapping_add_u32 "nvgpu_wrapping_add_u32".
|
||||
* -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_ecc_err
|
||||
* "nvgpu_report_ecc_err" with following parameters:
|
||||
* -# \a g
|
||||
* -# \ref NVGPU_ERR_MODULE_LTC "NVGPU_ERR_MODULE_LTC"
|
||||
* -# (\a ltc << 8U) | \a slice
|
||||
* -# \ref GPU_LTC_CACHE_DSTG_ECC_UNCORRECTED
|
||||
* "GPU_LTC_CACHE_DSTG_ECC_UNCORRECTED"
|
||||
* -# ecc address read above.
|
||||
* -# g->ecc.ltc.ecc_ded_count[\a ltc][\a slice].counter
|
||||
* -# Else if the ECC address correspongs to DSTG BE RAM:
|
||||
* -# Increment g->ecc.ltc.dstg_be_ecc_parity_count[\a ltc][\a slice].counter
|
||||
* with uncorrected counter delta with
|
||||
* \ref nvgpu_wrapping_add_u32 "nvgpu_wrapping_add_u32".
|
||||
* -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_ecc_err
|
||||
* "nvgpu_report_ecc_err" with following parameters:
|
||||
* -# \a g
|
||||
* -# \ref NVGPU_ERR_MODULE_LTC "NVGPU_ERR_MODULE_LTC"
|
||||
* -# (\a ltc << 8U) | \a slice
|
||||
* -# \ref GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED
|
||||
* "GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED"
|
||||
* -# ecc address read above
|
||||
* -# g->ecc.ltc.dstg_be_ecc_parity_count[\a ltc][\a slice].counter
|
||||
* -# Else call \ref BUG "BUG()" as this type of ECC error is not supported.
|
||||
* -# Clear the register ltc_ltc0_lts0_intr3_r() by writing the read value.
|
||||
* - return 0
|
||||
*
|
||||
* @return 0 in case of success, < 0 in case of failure.
|
||||
* @retval -ENODEV if invalid LTC number specified.
|
||||
*/
|
||||
void (*isr)(struct gk20a *g, u32 ltc);
|
||||
|
||||
@@ -68,13 +232,72 @@ struct gops_ltc_intr {
|
||||
*/
|
||||
struct gops_ltc {
|
||||
/**
|
||||
* @brief Initialize LTC support.
|
||||
* @brief Initialize Level Two Cache (LTC) support.
|
||||
*
|
||||
* @param g [in] Pointer to GPU driver struct.
|
||||
* @param g [in] - The GPU driver struct.
|
||||
* - The function does not perform validation
|
||||
* of g parameter.
|
||||
*
|
||||
* This function reads ltc unit info from GPU h/w and stores
|
||||
* it in #nvgpu_ltc structure. This function also initializes
|
||||
* LTC unit ecc counters.
|
||||
* LTC unit ecc counters. Steps are given below:
|
||||
*
|
||||
* - Allocate memory for g->ltc.
|
||||
* - Initialize LTC floorsweep state by calling the hal
|
||||
* \ref gops_ltc.init_fs_state "gops_ltc.init_fs_state" with parameter \a g.
|
||||
* - Initialize g->ltc->max_ltc_count with value returned by calling
|
||||
* \ref gops_top.get_num_ltcs "g->ops.top.get_num_ltcs" with parameter \a g.
|
||||
* - Initialize g->ltc->ltc_count with value returned by calling
|
||||
* \ref gops_priv_ring.enum_ltc "g->ops.priv_ring.enum_ltc" with parameter \a g.
|
||||
* - Initialize g->ltc->slices_per_ltc with value obtained by applying
|
||||
* ltc_ltcs_ltss_cbc_param_slices_per_ltc_v() to register value read
|
||||
* for the register ltc_ltcs_ltss_cbc_param_r().
|
||||
* - Initialize g->ltc->cacheline_size with value obtained by shifting 512 to left by
|
||||
* the shift value obtained by applying ltc_ltcs_ltss_cbc_param_cache_line_size_v()
|
||||
* to register value read for the register ltc_ltcs_ltss_cbc_param_r().
|
||||
* - The L2 cache (LTC) has SEC-DED ECC protection on its data RAM and parity protection
|
||||
* for byte enables.
|
||||
* - Initialize ECC counters for LTCs. On ga10b there are 2 LTC and each LTC has 2 slices.
|
||||
* For each following counters are initialized:
|
||||
* -# ECC SEC count
|
||||
* -# ECC DED count
|
||||
* -# RSTG ECC parity count
|
||||
* -# TSTG ECC parity count
|
||||
* -# DSTG BE ECC parity count
|
||||
* See also \ref gops_ltc.intr.isr "gops_ltc.intr.isr".
|
||||
* - Enable stalling interrupt for LTC unit.
|
||||
* -# Enable interrupts at MC level: call #nvgpu_mc_intr_stall_unit_config by passing
|
||||
* below parameters:
|
||||
* -# \a g
|
||||
* -# #MC_INTR_UNIT_LTC
|
||||
* -# #MC_INTR_ENABLE
|
||||
* -# Enable interrupts at unit level.
|
||||
* The L2 interrupts controlled by ltc_ltcs_ltss_intr_r() register are only enabled
|
||||
* by nvgpu. Various L2 interrupts are:
|
||||
* -# IDLE_ERROR_CBC - flag if cbc gets a request while slcg clock is disabled
|
||||
* -# IDLE_ERROR_TSTG - flag if tstg gets a request while slcg clock is disabled
|
||||
* -# IDLE_ERROR_DSTG - flag if dstg gets a request while slcg clock is disabled
|
||||
* -# EVICTED_CB - indicates that a CB was demoted. Normally this should not happen
|
||||
* because the CBs should be flushed during context switch and/or
|
||||
* invalidated when no longer used.
|
||||
* -# ILLEGAL_COMPSTAT - indicates an unexpected compression status given the kind.
|
||||
* -# BLOCKLINEAR_CB - indicates that a valid evict_last entry is accessed by a
|
||||
* block linear transaction.
|
||||
* -# ECC_SEC_ERROR - single bit error in data banks. Obsolete.
|
||||
* -# ECC_DED_ERROR - double bit error in data banks. Obsolete.
|
||||
* -# DEBUG - unused
|
||||
* -# ATOMIC_TO_Z - atomic to packing Z or S8.
|
||||
* -# ILLEGAL_ATOMIC - unsupported atomic op and/or size received.
|
||||
* -# BLKACTIVITY_ERR - internal error in power sensing block activity monitor
|
||||
* -# ILLEGAL_COMPSTAT_ACCESS - indicates that some memory access read/wrote into
|
||||
* the memory space reserved for the compression bit
|
||||
* carveout (Bug 942161)
|
||||
* -# ILLEGAL_ROP_ACCESS - zwr or cwr is scrubbed
|
||||
*
|
||||
*
|
||||
* Of these, EVICTED_CB and ILLEGAL_COMPSTAT_ACCESS are disabled to reduce noise
|
||||
* and increase performance. Rest of the interrupts are kept in hardware
|
||||
* initialized state.
|
||||
*
|
||||
* @return 0 in case of success, < 0 in case of failure.
|
||||
* @retval -ENOMEM if memory allocation fails for #nvgpu_ltc.
|
||||
@@ -84,20 +307,38 @@ struct gops_ltc {
|
||||
/**
|
||||
* @brief Remove LTC support.
|
||||
*
|
||||
* @param g [in] Pointer to GPU driver struct.
|
||||
* @param g [in] - The GPU driver struct.
|
||||
* - The function does not perform validation
|
||||
* of g parameter.
|
||||
*
|
||||
* This function will free memory allocated for #nvgpu_ltc structure.
|
||||
* Steps are given below:
|
||||
*
|
||||
* - If g->ltc is NULL return.
|
||||
* - Free g->ltc.
|
||||
* - Set g->ltc to NULL.
|
||||
*/
|
||||
void (*ltc_remove_support)(struct gk20a *g);
|
||||
|
||||
/**
|
||||
* @brief Returns GPU L2 cache size.
|
||||
*
|
||||
* @param g [in] Pointer to GPU driver struct.
|
||||
* @param g [in] - The GPU driver struct.
|
||||
* - The function does not perform validation
|
||||
* of g parameter.
|
||||
*
|
||||
* This function returns GPU L2 cache size by reading h/w ltc
|
||||
* This function returns GPU L2 cache size by reading HW ltc
|
||||
* config register.
|
||||
*
|
||||
* - Read register ltc_ltc0_lts0_tstg_info_1_r().
|
||||
* - Get slice_size by applying ltc_ltc0_lts0_tstg_info_1_slice_size_in_kb_v()
|
||||
* to the register value read above.
|
||||
* - Get slices_per_l2 by applying ltc_ltc0_lts0_tstg_info_1_slices_per_l2_v()
|
||||
* to the register value read in 1st step.
|
||||
* - Calculate the size as:
|
||||
* \f$ g->ltc->ltc\_count * slices\_per\_l2 * (slice\_size * 1024) \f$
|
||||
* - Return the size.
|
||||
*
|
||||
* @return Size of L2 cache in bytes.
|
||||
*/
|
||||
u64 (*determine_L2_size_bytes)(struct gk20a *g);
|
||||
@@ -105,17 +346,67 @@ struct gops_ltc {
|
||||
/**
|
||||
* @brief Flush GPU L2 cache.
|
||||
*
|
||||
* @param g [in] Pointer to GPU driver struct.
|
||||
* @param g [in] - The GPU driver struct.
|
||||
* - The function does not perform validation
|
||||
* of g parameter.
|
||||
*
|
||||
* This function flushes all L2 cache data to main memory by cleaning
|
||||
* and invaliding all cache sub-units. s/w will poll for completion of
|
||||
* each ltc unit cache cleaning/invalidation for 5 msec. This 5 msec
|
||||
* time out is based on following calculations:
|
||||
* Lowest EMC clock rate will be around 102MHz and thus available
|
||||
* bandwidth is 64b * 2 * 102MHz = 1.3GB/s. Of that bandwidth, GPU
|
||||
* will likely get about half, so 650MB/s at worst. Assuming at most
|
||||
* 1MB of GPU L2 cache, worst case it will take 1MB/650MB/s = 1.5ms.
|
||||
* and invalidating all cache sub-units. SW will poll for completion
|
||||
* of each ltc unit cache cleaning/invalidation for 5ms.
|
||||
*
|
||||
* The 5ms timeout is based on following calculations:
|
||||
* Lowest EMC clock rate will be around 204MHz and thus available
|
||||
* bandwidth is 128B (Cacheline size) * 2 (LTCs) * 204MHz = ~52GB/s.
|
||||
* Of that bandwidth, GPU will likely get about half, so 26GB/s
|
||||
* at worst. Assuming at most 1MB of GPU L2 cache, worst case
|
||||
* it will take 1MB/26GB/s = 38us.
|
||||
* So 5ms timeout here should be more than enough.
|
||||
*
|
||||
* - First stage is to clean the LTCs with the below write:
|
||||
* \code
|
||||
* nvgpu_writel(g, ltc_ltcs_ltss_tstg_cmgmt1_r(),
|
||||
* ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_f() |
|
||||
* ltc_ltcs_ltss_tstg_cmgmt1_max_cycles_between_cleans_3_f() |
|
||||
* ltc_ltcs_ltss_tstg_cmgmt1_clean_wait_for_fb_to_pull_true_f() |
|
||||
* ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_last_class_true_f() |
|
||||
* ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_normal_class_true_f() |
|
||||
* ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_first_class_true_f());
|
||||
* \endcode
|
||||
* - This cleans all LTCs.
|
||||
* - For each LTC, wait for clean to finish for 5ms.
|
||||
* -# Initialize poll timer with timeout of 5ms by calling
|
||||
* \ref nvgpu_timeout_init "nvgpu_timeout_init"
|
||||
* with below parameters:
|
||||
* -# \a g
|
||||
* -# local timeout variable
|
||||
* -# 5
|
||||
* -# \ref NVGPU_TIMER_CPU_TIMER "NVGPU_TIMER_CPU_TIMER"
|
||||
* -# do while LTCs are not cleared or timeout is not expired
|
||||
* -# Read ltc_ltc0_ltss_tstg_cmgmt1_r() corresponding to the LTC.
|
||||
* The offset is calculated as:
|
||||
* \f$ltc\_ltc0\_ltss\_tstg\_cmgmt1\_r() + (ltc * GPU\_LIT\_LTC\_STRIDE)\f$
|
||||
* -# Check if ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f() is cleared.
|
||||
* - Second stage is to invalidate the LTCs with the below write:
|
||||
* \code
|
||||
* nvgpu_writel(g, ltc_ltcs_ltss_tstg_cmgmt0_r(),
|
||||
* ltc_ltcs_ltss_tstg_cmgmt0_invalidate_pending_f() |
|
||||
* ltc_ltcs_ltss_tstg_cmgmt0_max_cycles_between_invalidates_3_f() |
|
||||
* ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_last_class_true_f() |
|
||||
* ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_normal_class_true_f() |
|
||||
* ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_first_class_true_f());
|
||||
* \endcode
|
||||
* - This invalidates all LTCs.
|
||||
* - For each LTC, wait for invalidate to finish for 5ms.
|
||||
* -# Initialize poll timer with timeout of 5ms by calling
|
||||
* \ref nvgpu_timeout_init "nvgpu_timeout_init"
|
||||
* with below parameters:
|
||||
* -# \a g
|
||||
* -# local timeout variable
|
||||
* -# 5
|
||||
* -# \ref NVGPU_TIMER_CPU_TIMER "NVGPU_TIMER_CPU_TIMER"
|
||||
* -# do while LTCs are not cleared or timeout is not expired
|
||||
* -# Read ltc_ltc0_ltss_tstg_cmgmt0_r() corresponding to the LTC.
|
||||
* -# Check if ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f() is cleared.
|
||||
*/
|
||||
void (*flush)(struct gk20a *g);
|
||||
|
||||
|
||||
@@ -55,6 +55,105 @@ struct nvgpu_ltc {
|
||||
u32 cacheline_size;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Get enumerated ltcs count.
|
||||
*
|
||||
* @param g [in] - The GPU driver struct.
|
||||
* - The function does not perform validation
|
||||
* of g parameter.
|
||||
*
|
||||
* This function returns enumerated number of ltcs after floorsweeping.
|
||||
* After floorsweeping enumerated ltcs may be less than maximum ltcs available.
|
||||
*
|
||||
* - Return value of g->ltc->ltc_count.
|
||||
*
|
||||
* @return Number of enumerated ltc count.
|
||||
*/
|
||||
u32 nvgpu_ltc_get_ltc_count(struct gk20a *g);
|
||||
|
||||
/**
|
||||
* @brief Get slices per ltc.
|
||||
*
|
||||
* @param g [in] - The GPU driver struct.
|
||||
* - The function does not perform validation
|
||||
* of g parameter.
|
||||
*
|
||||
* This function returns slices per ltc.
|
||||
* Each ltc unit is constituted by h/w configured multiple physical slices.
|
||||
* Clients can use slice size info to make their cache requirement to
|
||||
* a slice for better bandwidth and/or utilization.
|
||||
*
|
||||
* - Return value of g->ltc->slices_per_ltc.
|
||||
*
|
||||
* @return Number of slices per ltc.
|
||||
*/
|
||||
u32 nvgpu_ltc_get_slices_per_ltc(struct gk20a *g);
|
||||
|
||||
/**
|
||||
* @brief Get cacheline size.
|
||||
*
|
||||
* @param g [in] - The GPU driver struct.
|
||||
* - The function does not perform validation
|
||||
* of g parameter.
|
||||
*
|
||||
* This function returns cacheline size in bytes.
|
||||
* Cacheline is chunk of memory that can be handled in one go by cache.
|
||||
* Cacheline size is configured as multiple of 512 bytes in h/w.
|
||||
*
|
||||
* - Return value of g->ltc->cacheline_size.
|
||||
*
|
||||
* @return Cacheline size in bytes.
|
||||
*/
|
||||
u32 nvgpu_ltc_get_cacheline_size(struct gk20a *g);
|
||||
|
||||
#define NVGPU_L2_SECTOR_PROMOTE_FLAG_NONE (1U << 0U)
|
||||
#define NVGPU_L2_SECTOR_PROMOTE_FLAG_64B (1U << 1U)
|
||||
#define NVGPU_L2_SECTOR_PROMOTE_FLAG_128B (1U << 2U)
|
||||
#define NVGPU_L2_SECTOR_PROMOTE_FLAG_INVALID (1U << 3U)
|
||||
|
||||
/**
|
||||
* @brief Release all LTC ECC stats counters.
|
||||
*
|
||||
* @param g [in] - The GPU driver struct.
|
||||
* - The function does not perform validation
|
||||
* of g parameter.
|
||||
*
|
||||
* Frees all error counters associated with the LTC unit.
|
||||
*
|
||||
* - For each ltc from 0 to \ref nvgpu_ltc_get_ltc_count "nvgpu_ltc_get_ltc_count(g)" - 1:
|
||||
* - Free dynamically allocated memory for following ECC counters for slices: SEC, DED,
|
||||
* RSTG parity, TSTG parity, DSTG parity.
|
||||
* - Free container of the ECC counters for the LTCs.
|
||||
*
|
||||
*/
|
||||
void nvgpu_ltc_ecc_free(struct gk20a *g);
|
||||
|
||||
/** @cond DOXYGEN_SHOULD_SKIP_THIS */
|
||||
|
||||
/**
|
||||
* @brief Initialize #nvgpu_ltc structure.
|
||||
*
|
||||
* @param g [in] Pointer to GPU driver struct.
|
||||
*
|
||||
* This function reads ltc unit info from GPU h/w and stores
|
||||
* it in #nvgpu_ltc structure. This function allocates memory
|
||||
* to track the ecc error counts for the LTC unit and enables
|
||||
* LTC unit interrupts and stalling interrupt at MC level.
|
||||
*
|
||||
* @return 0 in case of success, < 0 in case of failure.
|
||||
* @retval -ENOMEM if memory allocation for #nvgpu_ltc fails.
|
||||
*/
|
||||
int nvgpu_init_ltc_support(struct gk20a *g);
|
||||
/**
|
||||
* @brief Remove support for LTC.
|
||||
*
|
||||
* @param g [in] Pointer to GPU driver struct.
|
||||
*
|
||||
* This function will free memory allocated for #nvgpu_ltc structure.
|
||||
* LTC unit data will be no longer accessible by s/w.
|
||||
*/
|
||||
void nvgpu_ltc_remove_support(struct gk20a *g);
|
||||
|
||||
/**
|
||||
* @brief Allocate and initialize a error counters for all ltc-lts instances.
|
||||
*
|
||||
@@ -81,79 +180,7 @@ int nvgpu_ecc_counter_init_per_lts(struct gk20a *g,
|
||||
#define NVGPU_ECC_COUNTER_INIT_PER_LTS(stat) \
|
||||
nvgpu_ecc_counter_init_per_lts(g, &g->ecc.ltc.stat, #stat)
|
||||
|
||||
#define NVGPU_L2_SECTOR_PROMOTE_FLAG_NONE (1U << 0U)
|
||||
#define NVGPU_L2_SECTOR_PROMOTE_FLAG_64B (1U << 1U)
|
||||
#define NVGPU_L2_SECTOR_PROMOTE_FLAG_128B (1U << 2U)
|
||||
#define NVGPU_L2_SECTOR_PROMOTE_FLAG_INVALID (1U << 3U)
|
||||
|
||||
/**
|
||||
* @brief Release all LTC ECC stats counters.
|
||||
*
|
||||
* @param g [in] The GPU driver struct.
|
||||
*
|
||||
* Frees all error counters associated with the LTC unit.
|
||||
*/
|
||||
void nvgpu_ltc_ecc_free(struct gk20a *g);
|
||||
|
||||
/**
|
||||
* @brief Initialize #nvgpu_ltc structure.
|
||||
*
|
||||
* @param g [in] Pointer to GPU driver struct.
|
||||
*
|
||||
* This function reads ltc unit info from GPU h/w and stores
|
||||
* it in #nvgpu_ltc structure. This function allocates memory
|
||||
* to track the ecc error counts for the LTC unit and enables
|
||||
* LTC unit interrupts and stalling interrupt at MC level.
|
||||
*
|
||||
* @return 0 in case of success, < 0 in case of failure.
|
||||
* @retval -ENOMEM if memory allocation for #nvgpu_ltc fails.
|
||||
*/
|
||||
int nvgpu_init_ltc_support(struct gk20a *g);
|
||||
/**
|
||||
* @brief Remove support for LTC.
|
||||
*
|
||||
* @param g [in] Pointer to GPU driver struct.
|
||||
*
|
||||
* This function will free memory allocated for #nvgpu_ltc structure.
|
||||
* LTC unit data will be no longer accessible by s/w.
|
||||
*/
|
||||
void nvgpu_ltc_remove_support(struct gk20a *g);
|
||||
/**
|
||||
* @brief Get enumerated ltcs count.
|
||||
*
|
||||
* @param g [in] Pointer to GPU driver struct.
|
||||
*
|
||||
* This function returns enumerated number of ltcs after floorsweeping.
|
||||
* After floorsweeping enumerated ltcs may be less than maximum ltcs available.
|
||||
*
|
||||
* @return Number of enumerated ltc count.
|
||||
*/
|
||||
u32 nvgpu_ltc_get_ltc_count(struct gk20a *g);
|
||||
/**
|
||||
* @brief Get slices per ltc.
|
||||
*
|
||||
* @param g [in] Pointer to GPU driver struct.
|
||||
*
|
||||
* This function returns slices per ltc.
|
||||
* Each ltc unit is constituted by h/w configured multiple physical slices.
|
||||
* Clients can use slice size info to make their cache requirement to
|
||||
* a slice for better bandwidth and/or utilization.
|
||||
*
|
||||
* @return Number of slices per ltc.
|
||||
*/
|
||||
u32 nvgpu_ltc_get_slices_per_ltc(struct gk20a *g);
|
||||
/**
|
||||
* @brief Get cacheline size.
|
||||
*
|
||||
* @param g [in] Pointer to GPU driver struct.
|
||||
*
|
||||
* This function returns cacheline size in bytes.
|
||||
* Cacheline is chunk of memory that can be handled in one go by cache.
|
||||
* Cacheline size is configured as multiple of 512 bytes in h/w.
|
||||
*
|
||||
* @return Cacheline size in bytes.
|
||||
*/
|
||||
u32 nvgpu_ltc_get_cacheline_size(struct gk20a *g);
|
||||
/** @endcond DOXYGEN_SHOULD_SKIP_THIS */
|
||||
|
||||
#if defined(CONFIG_NVGPU_NON_FUSA) || defined(CONFIG_NVGPU_KERNEL_MODE_SUBMIT)
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user