From 9f0dff4a03e1ef4804b2705643a0c5c50d69e864 Mon Sep 17 00:00:00 2001 From: Thomas Fleury Date: Mon, 12 Aug 2019 12:51:00 -0400 Subject: [PATCH] gpu: nvgpu: add recovery capability Add NVGPU_SUPPORT_RECOVERY and NVGPU_FLAGS_GPU_SUPPORT_RECOVERY, to indicate if recovery is supported. When true, an engine reset is performed in order to recover from an uncorrectable error. When false, the driver enters SW quiesce state. Jira NVGPU-3896 Change-Id: Iea809c13a844641e31ce6306fbd1630ef622bfe9 Signed-off-by: Thomas Fleury Reviewed-on: https://git-master.nvidia.com/r/2175447 Reviewed-by: svc-mobile-coverity Reviewed-by: svc-mobile-misra Reviewed-by: Philip Elcan Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/init/nvgpu_init.c | 5 ++++- drivers/gpu/nvgpu/include/nvgpu/enabled.h | 5 ++++- drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c | 2 ++ include/uapi/linux/nvgpu.h | 2 ++ 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/nvgpu/common/init/nvgpu_init.c b/drivers/gpu/nvgpu/common/init/nvgpu_init.c index 587a3f311..23c236c89 100644 --- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c +++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c @@ -249,7 +249,10 @@ int nvgpu_finalize_poweron(struct gk20a *g) g->power_on = true; -#ifndef CONFIG_NVGPU_RECOVERY +#ifdef CONFIG_NVGPU_RECOVERY + nvgpu_set_enabled(g, NVGPU_SUPPORT_RECOVERY, true); +#else + nvgpu_set_enabled(g, NVGPU_SUPPORT_RECOVERY, false); err = nvgpu_sw_quiesce_init_support(g); if (err != 0) { nvgpu_err(g, "failed to init sw-quiesce support"); diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h index befef3c78..220225406 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h +++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h @@ -207,10 +207,13 @@ struct gk20a; /* DGPU Thermal Alert */ #define NVGPU_SUPPORT_DGPU_THERMAL_ALERT 79U +/* Recovery support */ +#define NVGPU_SUPPORT_RECOVERY 80U + /* * Must be greater than the largest bit offset in the above list. */ -#define NVGPU_MAX_ENABLED_BITS 80U +#define NVGPU_MAX_ENABLED_BITS 81U /** * nvgpu_is_enabled - Check if the passed flag is enabled. diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c index 53c0e7e0f..bb531e972 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c @@ -245,6 +245,8 @@ static struct nvgpu_flags_mapping flags_mapping[] = { NVGPU_DRIVER_REDUCED_PROFILE}, {NVGPU_GPU_FLAGS_SUPPORT_SET_CTX_MMU_DEBUG_MODE, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE}, + {NVGPU_GPU_FLAGS_SUPPORT_RECOVERY, + NVGPU_SUPPORT_RECOVERY} }; static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g) diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index f5f4c6280..d831bd8d7 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -170,6 +170,8 @@ struct nvgpu_gpu_zbc_query_table_args { #define NVGPU_GPU_FLAGS_DRIVER_REDUCED_PROFILE (1ULL << 31) /* Set MMU debug mode is available */ #define NVGPU_GPU_FLAGS_SUPPORT_SET_CTX_MMU_DEBUG_MODE (1ULL << 32) +/* Recovery is enabled */ +#define NVGPU_GPU_FLAGS_SUPPORT_RECOVERY (1ULL << 33) /* SM LRF ECC is enabled */ #define NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF (1ULL << 60) /* SM SHM ECC is enabled */