gpu: nvgpu: add recovery capability

Add NVGPU_SUPPORT_RECOVERY and NVGPU_FLAGS_GPU_SUPPORT_RECOVERY,
to indicate if recovery is supported.

When true, an engine reset is performed in order to recover from an
uncorrectable error. When false, the driver enters SW quiesce state.

Jira NVGPU-3896

Change-Id: Iea809c13a844641e31ce6306fbd1630ef622bfe9
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2175447
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com>
Reviewed-by: Philip Elcan <pelcan@nvidia.com>
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Thomas Fleury
2019-08-12 12:51:00 -04:00
committed by Alex Waterman
parent a9f8b321b1
commit 9f0dff4a03
4 changed files with 12 additions and 2 deletions

View File

@@ -249,7 +249,10 @@ int nvgpu_finalize_poweron(struct gk20a *g)
g->power_on = true;
#ifndef CONFIG_NVGPU_RECOVERY
#ifdef CONFIG_NVGPU_RECOVERY
nvgpu_set_enabled(g, NVGPU_SUPPORT_RECOVERY, true);
#else
nvgpu_set_enabled(g, NVGPU_SUPPORT_RECOVERY, false);
err = nvgpu_sw_quiesce_init_support(g);
if (err != 0) {
nvgpu_err(g, "failed to init sw-quiesce support");

View File

@@ -207,10 +207,13 @@ struct gk20a;
/* DGPU Thermal Alert */
#define NVGPU_SUPPORT_DGPU_THERMAL_ALERT 79U
/* Recovery support */
#define NVGPU_SUPPORT_RECOVERY 80U
/*
* Must be greater than the largest bit offset in the above list.
*/
#define NVGPU_MAX_ENABLED_BITS 80U
#define NVGPU_MAX_ENABLED_BITS 81U
/**
* nvgpu_is_enabled - Check if the passed flag is enabled.

View File

@@ -245,6 +245,8 @@ static struct nvgpu_flags_mapping flags_mapping[] = {
NVGPU_DRIVER_REDUCED_PROFILE},
{NVGPU_GPU_FLAGS_SUPPORT_SET_CTX_MMU_DEBUG_MODE,
NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE},
{NVGPU_GPU_FLAGS_SUPPORT_RECOVERY,
NVGPU_SUPPORT_RECOVERY}
};
static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g)

View File

@@ -170,6 +170,8 @@ struct nvgpu_gpu_zbc_query_table_args {
#define NVGPU_GPU_FLAGS_DRIVER_REDUCED_PROFILE (1ULL << 31)
/* Set MMU debug mode is available */
#define NVGPU_GPU_FLAGS_SUPPORT_SET_CTX_MMU_DEBUG_MODE (1ULL << 32)
/* Recovery is enabled */
#define NVGPU_GPU_FLAGS_SUPPORT_RECOVERY (1ULL << 33)
/* SM LRF ECC is enabled */
#define NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF (1ULL << 60)
/* SM SHM ECC is enabled */