gpu: nvgpu: handle chip specific erratas

Currently, there are few chip specific erratas present in nvgpu code.
For better traceability of the erratas and corresponding fixes,
introduce flags to indicate existing erratas on a chip. These flags
decide if a corresponding solution is applied to the chip(s).

This patch introduces below functions to handle errata flags:
- nvgpu_init_errata_flags
- nvgpu_set_errata
- nvgpu_is_errata_present
- nvgpu_print_errata_flags
- nvgpu_free_errata_flags

nvgpu_print_errata_flags: print below details of erratas present in chip
1. errata flag name
2. chip where the errata was first discovered
3. short description of the errata

Flags corresponding to erratas present in a chip are set during chip hal
init sequence.

JIRA NVGPU-6510

Change-Id: Id5a8fb627222ac0a585aba071af052950f4de965
Signed-off-by: Vedashree Vidwans <vvidwans@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2498095
Reviewed-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-by: Vaibhav Kachore <vkachore@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Vedashree Vidwans
2021-04-13 21:55:47 -07:00
committed by mobile promotions
parent 6222ebeaea
commit aba26fa082
28 changed files with 410 additions and 38 deletions

View File

@@ -237,15 +237,17 @@ sim:
utils:
safe: yes
owner: Terje B
owner: Alex W
sources: [ include/nvgpu/utils.h,
include/nvgpu/worker.h,
include/nvgpu/rbtree.h,
include/nvgpu/enabled.h,
include/nvgpu/errata.h,
common/utils/string.c,
common/utils/worker.c,
common/utils/rbtree.c,
common/utils/enabled.c ]
common/utils/enabled.c,
common/utils/errata.c ]
##
## Common elements.

View File

@@ -200,6 +200,7 @@ nvgpu-y += \
nvgpu-y += \
common/device.o \
common/utils/enabled.o \
common/utils/errata.o \
common/utils/rbtree.o \
common/utils/string.o \
common/utils/worker.o \

View File

@@ -92,6 +92,7 @@ endif
srcs += common/device.c \
common/utils/enabled.c \
common/utils/errata.c \
common/utils/rbtree.c \
common/utils/string.c \
common/utils/worker.c \

View File

@@ -22,6 +22,7 @@
#include <nvgpu/soc.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/errata.h>
#include <nvgpu/runlist.h>
#include <nvgpu/types.h>
#include <nvgpu/channel.h>
@@ -54,8 +55,11 @@ int nvgpu_fifo_preempt_tsg(struct gk20a *g, struct nvgpu_tsg *tsg)
nvgpu_mutex_acquire(&tsg->runlist->runlist_lock);
if (nvgpu_is_errata_present(g, NVGPU_ERRATA_2016608)) {
nvgpu_runlist_set_state(g, BIT32(tsg->runlist->id),
RUNLIST_DISABLED);
}
#ifdef CONFIG_NVGPU_LS_PMU
mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
PMU_MUTEX_ID_FIFO, &token);
@@ -77,8 +81,10 @@ int nvgpu_fifo_preempt_tsg(struct gk20a *g, struct nvgpu_tsg *tsg)
}
}
#endif
if (nvgpu_is_errata_present(g, NVGPU_ERRATA_2016608)) {
nvgpu_runlist_set_state(g, BIT32(tsg->runlist->id),
RUNLIST_ENABLED);
}
nvgpu_mutex_release(&tsg->runlist->runlist_lock);

View File

@@ -30,6 +30,7 @@
#include <nvgpu/semaphore.h>
#include <nvgpu/pramin.h>
#include <nvgpu/enabled.h>
#include <nvgpu/errata.h>
#include <nvgpu/ce_app.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/engines.h>
@@ -177,7 +178,7 @@ static void nvgpu_remove_mm_support(struct mm_gk20a *mm)
#ifdef CONFIG_NVGPU_DGPU
nvgpu_vidmem_destroy(g);
if (g->ops.ramin.deinit_pdb_cache_war != NULL) {
if (nvgpu_is_errata_present(g, NVGPU_ERRATA_INIT_PDB_CACHE)) {
g->ops.ramin.deinit_pdb_cache_war(g);
}
#endif
@@ -197,7 +198,7 @@ static int nvgpu_init_system_vm(struct mm_gk20a *mm)
* For some reason the maxwell PMU code is dependent on the large page
* size. No reason AFAICT for this. Probably a bug somewhere.
*/
if (nvgpu_is_enabled(g, NVGPU_MM_FORCE_128K_PMU_VM)) {
if (nvgpu_is_errata_present(g, NVGPU_ERRATA_MM_FORCE_128K_PMU_VM)) {
big_page_size = nvgpu_safe_cast_u64_to_u32(SZ_128K);
}
@@ -587,14 +588,14 @@ static int nvgpu_init_mm_pdb_cache_war(struct gk20a *g)
{
int err;
if (g->ops.ramin.init_pdb_cache_war != NULL) {
if (nvgpu_is_errata_present(g, NVGPU_ERRATA_INIT_PDB_CACHE)) {
err = g->ops.ramin.init_pdb_cache_war(g);
if (err != 0) {
return err;
}
}
if (g->ops.fb.apply_pdb_cache_war != NULL) {
if (nvgpu_is_errata_present(g, NVGPU_ERRATA_FB_PDB_CACHE)) {
err = g->ops.fb.apply_pdb_cache_war(g);
if (err != 0) {
return err;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -27,6 +27,7 @@
#include <nvgpu/device.h>
#include <nvgpu/nvlink_bios.h>
#include <nvgpu/device.h>
#include <nvgpu/errata.h>
#ifdef CONFIG_NVGPU_NVLINK
@@ -104,7 +105,7 @@ static int nvgpu_nvlink_enable_links_post_top(struct gk20a *g,
for_each_set_bit(bit, &enabled_links, NVLINK_MAX_LINKS_SW) {
link_id = (u32)bit;
if (g->ops.nvlink.set_sw_war != NULL) {
if (nvgpu_is_errata_present(g, NVGPU_ERRATA_1888034)) {
g->ops.nvlink.set_sw_war(g, link_id);
}
g->ops.nvlink.intr.init_link_err_intr(g, link_id);
@@ -264,7 +265,10 @@ int nvgpu_nvlink_early_init(struct gk20a *g)
* on the GPU. This is temporary WAR while we get the VBIOS updated with
* correct mask.
*/
g->ops.nvlink.get_connected_link_mask(&(g->nvlink.connected_links));
if (nvgpu_is_errata_present(g, NVGPU_ERRATA_VBIOS_NVLINK_MASK)) {
g->ops.nvlink.get_connected_link_mask(
&(g->nvlink.connected_links));
}
nvgpu_log(g, gpu_dbg_nvlink, "connected_links = 0x%08x",
g->nvlink.connected_links);

View File

@@ -1,7 +1,7 @@
/*
* GK20A Channel Synchronization Abstraction
*
* Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -28,6 +28,7 @@
#include <nvgpu/kmem.h>
#include <nvgpu/log.h>
#include <nvgpu/errata.h>
#include <nvgpu/atomic.h>
#include <nvgpu/bug.h>
#include <nvgpu/list.h>
@@ -378,8 +379,12 @@ nvgpu_channel_sync_syncpt_create(struct nvgpu_channel *c)
* Once nvhost update the return value as NVGPU_INVALID_SYNCPT_ID,
* we can remove the zero check.
*/
if ((sp->id == 0U) ||
(sp->id == NVGPU_INVALID_SYNCPT_ID)) {
if ((nvgpu_is_errata_present(c->g, NVGPU_ERRATA_SYNCPT_INVALID_ID_0)) &&
(sp->id == 0U)) {
nvgpu_err(c->g, "failed to get free syncpt");
goto err_free;
}
if (sp->id == NVGPU_INVALID_SYNCPT_ID) {
nvgpu_err(c->g, "failed to get free syncpt");
goto err_free;
}

View File

@@ -27,6 +27,7 @@
#include <nvgpu/channel.h>
#include <nvgpu/channel_user_syncpt.h>
#include <nvgpu/string.h>
#include <nvgpu/errata.h>
#include "channel_user_syncpt_priv.h"
static int user_sync_build_debug_name(struct nvgpu_channel *ch,
@@ -93,8 +94,12 @@ nvgpu_channel_user_syncpt_create(struct nvgpu_channel *ch)
* Once nvhost update the return value as NVGPU_INVALID_SYNCPT_ID,
* we can remove the zero check.
*/
if ((s->syncpt_id == 0U) ||
(s->syncpt_id == NVGPU_INVALID_SYNCPT_ID)) {
if ((nvgpu_is_errata_present(g, NVGPU_ERRATA_SYNCPT_INVALID_ID_0)) &&
(s->syncpt_id == 0U)) {
nvgpu_err(g, "failed to get free syncpt");
goto err_free;
}
if (s->syncpt_id == NVGPU_INVALID_SYNCPT_ID) {
nvgpu_err(g, "failed to get free syncpt");
goto err_free;
}

View File

@@ -0,0 +1,125 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/errata.h>
#include <nvgpu/bitops.h>
#include <nvgpu/log.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/static_analysis.h>
#include <nvgpu/utils.h>
/**
* Array of flag names
*/
#define DEFINE_ERRATA(flag, chip, desc) [flag] = nvgpu_stringify(flag)
static const char *errata_flag_names[NVGPU_MAX_ERRATA_BITS + 1U] = {
ERRATA_FLAGS_NEXT
ERRATA_FLAGS
};
#undef DEFINE_ERRATA
/**
* Array of flag descriptions
*/
#define DEFINE_ERRATA(flag, chip, desc) [flag] = desc
static const char *errata_flag_desc[NVGPU_MAX_ERRATA_BITS + 1U] = {
ERRATA_FLAGS_NEXT
ERRATA_FLAGS
};
#undef DEFINE_ERRATA
/**
* Array of chips where errata was first discovered
*/
#define DEFINE_ERRATA(flag, chip, desc) [flag] = chip
static const char *errata_flag_chip[NVGPU_MAX_ERRATA_BITS + 1U] = {
ERRATA_FLAGS_NEXT
ERRATA_FLAGS
};
#undef DEFINE_ERRATA
void nvgpu_print_errata_flags(struct gk20a *g)
{
u32 i;
nvgpu_log(g, gpu_dbg_info, "NVGPU Erratas present in chip");
nvgpu_log(g, gpu_dbg_info, "%-55.55s %-5.5s %s",
"Flag", "Chip", "Description");
nvgpu_log(g, gpu_dbg_info, "%-55.55s %-5.5s %s",
"----", "-----", "-----------");
for (i = 0U; i < U32(NVGPU_MAX_ERRATA_BITS); i++) {
/* Only print erratas present in chip */
if (nvgpu_is_errata_present(g, i)) {
nvgpu_log(g, gpu_dbg_info, "%-55.55s %-5.5s %s",
errata_flag_names[i],
errata_flag_chip[i],
errata_flag_desc[i]);
}
}
}
int nvgpu_init_errata_flags(struct gk20a *g)
{
/*
* Zero all flags initially. Flags that should be set to non-zero states
* can be done so during hal init.
*/
g->errata_flags = nvgpu_kzalloc(g,
BITS_TO_LONGS(U32(NVGPU_MAX_ERRATA_BITS)) *
sizeof(unsigned long));
if (g->errata_flags == NULL) {
return -ENOMEM;
}
return 0;
}
/*
* Call this on driver shutdown!
*/
void nvgpu_free_errata_flags(struct gk20a *g)
{
nvgpu_kfree(g, g->errata_flags);
}
bool nvgpu_is_errata_present(struct gk20a *g, u32 flag)
{
if (flag < NVGPU_MAX_ERRATA_BITS) {
return nvgpu_test_bit(flag, g->errata_flags);
} else {
return 0;
}
}
void nvgpu_set_errata(struct gk20a *g, u32 flag, bool state)
{
if (flag >= NVGPU_MAX_ERRATA_BITS) {
return;
}
if (state) {
nvgpu_set_bit(flag, g->errata_flags);
} else {
nvgpu_clear_bit(flag, g->errata_flags);
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -23,6 +23,7 @@
#include <nvgpu/gk20a.h>
#include <nvgpu/io.h>
#include <nvgpu/class.h>
#include <nvgpu/errata.h>
#include <nvgpu/channel.h>
#include <nvgpu/static_analysis.h>
@@ -173,11 +174,14 @@ int gp10b_gr_intr_handle_sm_exception(struct gk20a *g,
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr,
"Single bit error detected in SM LRF!");
if (nvgpu_is_errata_present(g,
NVGPU_ERRATA_LRF_ECC_OVERCOUNT)) {
gr_gp10b_sm_lrf_ecc_overcount_war(true,
lrf_ecc_sed_status,
lrf_ecc_ded_status,
&lrf_single_count_delta,
lrf_double_count_delta);
}
g->ecc.gr.sm_lrf_ecc_single_err_count[gpc][tpc].counter =
nvgpu_safe_add_u32(
g->ecc.gr.sm_lrf_ecc_single_err_count[gpc][tpc].counter,
@@ -187,11 +191,14 @@ int gp10b_gr_intr_handle_sm_exception(struct gk20a *g,
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr,
"Double bit error detected in SM LRF!");
if (nvgpu_is_errata_present(g,
NVGPU_ERRATA_LRF_ECC_OVERCOUNT)) {
gr_gp10b_sm_lrf_ecc_overcount_war(false,
lrf_ecc_sed_status,
lrf_ecc_ded_status,
&lrf_double_count_delta,
lrf_single_count_delta);
}
g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter =
nvgpu_safe_add_u32(
g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter,

View File

@@ -22,6 +22,7 @@
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/ptimer.h>
#include <nvgpu/errata.h>
#include <nvgpu/error_notifier.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/debugger.h>
@@ -1168,6 +1169,10 @@ int gm20b_init_hal(struct gk20a *g)
gops->get_litter_value = gm20b_get_litter_value;
gops->semaphore_wakeup = nvgpu_channel_semaphore_wakeup;
nvgpu_set_errata(g, NVGPU_ERRATA_1547668, true);
nvgpu_set_errata(g, NVGPU_ERRATA_MM_FORCE_128K_PMU_VM, true);
nvgpu_set_errata(g, NVGPU_ERRATA_SYNCPT_INVALID_ID_0, true);
nvgpu_set_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP, true);
#ifdef CONFIG_NVGPU_FECS_TRACE
nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, false);

View File

@@ -22,6 +22,7 @@
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/ptimer.h>
#include <nvgpu/errata.h>
#include <nvgpu/error_notifier.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/debugger.h>
@@ -1253,6 +1254,10 @@ int gp10b_init_hal(struct gk20a *g)
gops->get_litter_value = gp10b_get_litter_value;
gops->semaphore_wakeup = nvgpu_channel_semaphore_wakeup;
nvgpu_set_errata(g, NVGPU_ERRATA_LRF_ECC_OVERCOUNT, true);
nvgpu_set_errata(g, NVGPU_ERRATA_200391931, true);
nvgpu_set_errata(g, NVGPU_ERRATA_SYNCPT_INVALID_ID_0, true);
nvgpu_set_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP, true);
#ifdef CONFIG_NVGPU_FECS_TRACE
nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, false);

View File

@@ -22,6 +22,7 @@
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/errata.h>
#include <nvgpu/acr.h>
#include <nvgpu/ce.h>
#include <nvgpu/ce_app.h>
@@ -1519,6 +1520,10 @@ int gv11b_init_hal(struct gk20a *g)
gops->get_litter_value = gv11b_get_litter_value;
gops->semaphore_wakeup = nvgpu_channel_semaphore_wakeup;
nvgpu_set_errata(g, NVGPU_ERRATA_2016608, true);
nvgpu_set_errata(g, NVGPU_ERRATA_200391931, true);
nvgpu_set_errata(g, NVGPU_ERRATA_SYNCPT_INVALID_ID_0, true);
nvgpu_set_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP, false);
/* Read fuses to check if gpu needs to boot in secure/non-secure mode */

View File

@@ -22,6 +22,7 @@
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/preempt.h>
#include <nvgpu/errata.h>
#include "hal/mm/mm_gm20b.h"
#include "hal/mm/mm_gp10b.h"
@@ -1723,6 +1724,12 @@ int tu104_init_hal(struct gk20a *g)
gops->get_litter_value = tu104_get_litter_value;
gops->semaphore_wakeup = nvgpu_channel_semaphore_wakeup;
nvgpu_set_errata(g, NVGPU_ERRATA_INIT_PDB_CACHE, true);
nvgpu_set_errata(g, NVGPU_ERRATA_FB_PDB_CACHE, true);
nvgpu_set_errata(g, NVGPU_ERRATA_VBIOS_NVLINK_MASK, true);
nvgpu_set_errata(g, NVGPU_ERRATA_200391931, true);
nvgpu_set_errata(g, NVGPU_ERRATA_SYNCPT_INVALID_ID_0, true);
nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, true);
nvgpu_set_enabled(g, NVGPU_SEC_SECUREGPCCS, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, true);

View File

@@ -92,6 +92,7 @@
#include "common/clk_arb/clk_arb_gp10b.h"
#include <nvgpu/gk20a.h>
#include <nvgpu/errata.h>
#include <nvgpu/gr/gr.h>
#include <nvgpu/gr/gr_intr.h>
#include <nvgpu/vgpu/vgpu.h>
@@ -1063,6 +1064,10 @@ int vgpu_gv11b_init_hal(struct gk20a *g)
gops->top = vgpu_gv11b_ops_top;
gops->grmgr = vgpu_gv11b_ops_grmgr;
nvgpu_set_errata(g, NVGPU_ERRATA_2016608, true);
nvgpu_set_errata(g, NVGPU_ERRATA_200391931, true);
nvgpu_set_errata(g, NVGPU_ERRATA_SYNCPT_INVALID_ID_0, true);
#ifdef CONFIG_NVGPU_FECS_TRACE
nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);
#endif

View File

@@ -86,7 +86,6 @@ struct gk20a;
"Use coherent aperture for sysmem"), \
DEFINE_FLAG(NVGPU_MM_USE_PHYSICAL_SG, \
"Use physical scatter tables instead of IOMMU"), \
DEFINE_FLAG(NVGPU_MM_FORCE_128K_PMU_VM, "WAR for gm20b chips"), \
DEFINE_FLAG(NVGPU_MM_BYPASSES_IOMMU, \
"Some chips (using nvlink) bypass the IOMMU on tegra"), \
/* Host Flags */ \

View File

@@ -0,0 +1,137 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_ERRATA_H
#define NVGPU_ERRATA_H
struct gk20a;
#include <nvgpu/types.h>
/**
* @defgroup errata
* @ingroup unit-common-utils
* @{
*/
/** @cond DOXYGEN_SHOULD_SKIP_THIS */
#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT)
#include "include/nvgpu/nvgpu_next_errata.h"
#else
#define ERRATA_FLAGS_NEXT
#endif
/** @endcond DOXYGEN_SHOULD_SKIP_THIS */
/*
* Available flags that describes an errata with details about where the issues
* were first discovered. Each flag here is defined by it's offset
* in a bitmap.
*/
#define ERRATA_FLAGS \
/* GM20B */ \
DEFINE_ERRATA(NVGPU_ERRATA_MM_FORCE_128K_PMU_VM, "GM20B", "MM"),\
DEFINE_ERRATA(NVGPU_ERRATA_1547668, "GM20B", "CG"), \
/* GP10B */ \
DEFINE_ERRATA(NVGPU_ERRATA_LRF_ECC_OVERCOUNT, "GP10B", "GR ECC"), \
DEFINE_ERRATA(NVGPU_ERRATA_200391931, "GP10B", "GR Perf"), \
/* GV11B */ \
DEFINE_ERRATA(NVGPU_ERRATA_2016608, "GV11B", "FIFO Runlist preempt"), \
/* GV100 */ \
DEFINE_ERRATA(NVGPU_ERRATA_1888034, "GV100", "Nvlink"), \
/* TU104 */ \
DEFINE_ERRATA(NVGPU_ERRATA_INIT_PDB_CACHE, "TU104", "MM PDB"), \
DEFINE_ERRATA(NVGPU_ERRATA_FB_PDB_CACHE, "TU104", "FB PDB"), \
DEFINE_ERRATA(NVGPU_ERRATA_VBIOS_NVLINK_MASK, "TU104", "Nvlink VBIOS"),\
/* NvGPU Driver */ \
DEFINE_ERRATA(NVGPU_ERRATA_SYNCPT_INVALID_ID_0, "SW", "Syncpt ID"),\
DEFINE_ERRATA(NVGPU_MAX_ERRATA_BITS, "NA", "Marks max number of flags"),
/**
* Enumerated array of flags
*/
#define DEFINE_ERRATA(flag, chip, desc) flag
enum enum_errata_flags {
ERRATA_FLAGS_NEXT
ERRATA_FLAGS
};
#undef DEFINE_ERRATA
/**
* @brief Check if the passed flag is enabled.
*
* @param g [in] The GPU.
* @param flag [in] Which flag to check.
*
* @return Boolean value to indicate the status of the bit.
*
* @retval TRUE if given errata is present.
* @retval FALSE if given errata is absent.
*/
bool nvgpu_is_errata_present(struct gk20a *g, u32 flag);
/**
* @brief Initialize and allocate memory for errata flags.
*
* @param g [in] The GPU pointer.
*
* @return 0 for success, < 0 for error.
*
* @retval -ENOMEM if fails to allocate the necessary memory.
*/
int nvgpu_init_errata_flags(struct gk20a *g);
/**
* @brief Free errata flags memory. Called during driver exit.
*
* @param g [in] The GPU pointer.
*/
void nvgpu_free_errata_flags(struct gk20a *g);
/**
* @brief Print errata flags value.
*
* @param g [in] The GPU pointer.
*/
void nvgpu_print_errata_flags(struct gk20a *g);
/**
* @brief Set state of a errata flag.
*
* @param g [in] The GPU.
* @param flag [in] Flag index.
* @param state [in] The state to set the \a flag to.
*
* Set state of the given \a flag index to \a state.
*
* This is generally a somewhat low level operation with lots of potential
* side effects. Be weary about where and when you use this. Typically a bunch
* of calls to this early in the driver boot sequence makes sense (as
* information is determined about the GPU at run time). Calling this in steady
* state operation is probably an incorrect thing to do.
*/
void nvgpu_set_errata(struct gk20a *g, u32 flag, bool state);
/**
* @}
*/
#endif /* NVGPU_ERRATA_H */

View File

@@ -357,6 +357,11 @@ struct gk20a {
*/
struct nvgpu_nvhost_dev *nvhost;
/**
* Used by <nvgpu/errata.h>. Do not access directly!
*/
unsigned long *errata_flags;
/**
* Used by <nvgpu/enabled.h>. Do not access directly!
*/

View File

@@ -15,6 +15,7 @@
*/
#include <linux/reboot.h>
#include <nvgpu/errata.h>
#include <linux/dma-mapping.h>
#include <linux/mm.h>
#include <linux/slab.h>
@@ -256,7 +257,7 @@ static void nvgpu_init_mm_vars(struct gk20a *g)
platform->unified_memory);
nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES,
platform->unify_address_spaces);
nvgpu_set_enabled(g, NVGPU_MM_FORCE_128K_PMU_VM,
nvgpu_set_errata(g, NVGPU_ERRATA_MM_FORCE_128K_PMU_VM,
platform->force_128K_pmu_vm);
nvgpu_mutex_init(&g->mm.tlb_lock);

View File

@@ -47,6 +47,7 @@
#include <nvgpu/soc.h>
#include <nvgpu/fbp.h>
#include <nvgpu/enabled.h>
#include <nvgpu/errata.h>
#include <nvgpu/debug.h>
#include <nvgpu/vidmem.h>
#include <nvgpu/sim.h>
@@ -1009,6 +1010,7 @@ void gk20a_remove_support(struct gk20a *g)
nvgpu_remove_usermode_support(g);
nvgpu_free_enabled_flags(g);
nvgpu_free_errata_flags(g);
gk20a_lockout_registers(g);
}
@@ -1616,9 +1618,13 @@ static int gk20a_probe(struct platform_device *dev)
nvgpu_kmem_init(gk20a);
err = nvgpu_init_errata_flags(gk20a);
if (err)
goto return_err_platform;
err = nvgpu_init_enabled_flags(gk20a);
if (err)
goto return_err;
goto return_err_errata;
np = nvgpu_get_node(gk20a);
if (of_dma_is_coherent(np)) {
@@ -1730,6 +1736,9 @@ static int gk20a_probe(struct platform_device *dev)
return_err:
nvgpu_free_enabled_flags(gk20a);
return_err_errata:
nvgpu_free_errata_flags(gk20a);
return_err_platform:
/*
* Last since the above allocs may use data structures in here.

View File

@@ -25,6 +25,7 @@
#include <nvgpu/kmem.h>
#include <nvgpu/mc.h>
#include <nvgpu/enabled.h>
#include <nvgpu/errata.h>
#include <nvgpu/nvlink_probe.h>
#include <nvgpu/soc.h>
#include <nvgpu/sim.h>
@@ -542,10 +543,15 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
pci_set_drvdata(pdev, platform);
err = nvgpu_init_enabled_flags(g);
err = nvgpu_init_errata_flags(g);
if (err)
goto err_free_platform;
err = nvgpu_init_enabled_flags(g);
if (err) {
goto err_free_errata;
}
platform->g = g;
l->dev = &pdev->dev;
@@ -690,6 +696,9 @@ err_disable_msi:
if (g->msi_enabled)
pci_disable_msi(pdev);
#endif
nvgpu_free_enabled_flags(g);
err_free_errata:
nvgpu_free_errata_flags(g);
err_free_platform:
nvgpu_kfree(g, platform);
err_free_l:

View File

@@ -61,6 +61,7 @@
#include <nvgpu/pmu/pmu_perfmon.h>
#include <nvgpu/linux/dma.h>
#include <nvgpu/soc.h>
#include <nvgpu/errata.h>
#include "hal/clk/clk_gm20b.h"
@@ -864,7 +865,7 @@ static int gk20a_tegra_probe(struct device *dev)
}
platform->g->clk.gpc_pll.id = GK20A_GPC_PLL;
if (platform->platform_chip_id == TEGRA_210) {
if (nvgpu_is_errata_present(g, NVGPU_ERRATA_1547668)) {
/* WAR for bug 1547668: Disable railgating and scaling
irrespective of platform data if the rework was not made. */
np = of_find_node_by_path("/gpu-dvfs-rework");

View File

@@ -19,6 +19,7 @@
#include <linux/fb.h>
#include <linux/version.h>
#include <nvgpu/errata.h>
#include <nvgpu/kmem.h>
#include <nvgpu/nvhost.h>
#include <nvgpu/ptimer.h>
@@ -492,6 +493,10 @@ static ssize_t ldiv_slowdown_factor_store(struct device *dev,
unsigned long val = 0;
int err;
if (!nvgpu_is_errata_present(g, NVGPU_ERRATA_200391931)) {
return 0;
}
if (kstrtoul(buf, 10, &val) < 0) {
nvgpu_err(g, "parse error for input SLOWDOWN factor\n");
return -EINVAL;

View File

@@ -34,6 +34,7 @@
#include <nvgpu/kmem.h>
#include <nvgpu/bug.h>
#include <nvgpu/enabled.h>
#include <nvgpu/errata.h>
#include <nvgpu/debug.h>
#include <nvgpu/soc.h>
#include <nvgpu/defaults.h>
@@ -341,8 +342,15 @@ int vgpu_probe(struct platform_device *pdev)
nvgpu_kmem_init(gk20a);
err = nvgpu_init_errata_flags(gk20a);
if (err) {
kfree(gk20a);
return err;
}
err = nvgpu_init_enabled_flags(gk20a);
if (err) {
nvgpu_free_errata_flags(gk20a);
kfree(gk20a);
return err;
}

View File

@@ -33,6 +33,7 @@
#include <nvgpu/os_sched.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/enabled.h>
#include <nvgpu/errata.h>
#include <nvgpu/posix/probe.h>
#include <nvgpu/posix/mock-regs.h>
@@ -267,6 +268,10 @@ struct gk20a *nvgpu_posix_probe(void)
goto fail_kmem;
}
if (nvgpu_init_errata_flags(g) != 0) {
goto fail_errata_flags;
}
if (nvgpu_init_enabled_flags(g) != 0) {
goto fail_enabled_flags;
}
@@ -297,6 +302,8 @@ struct gk20a *nvgpu_posix_probe(void)
return g;
fail_enabled_flags:
nvgpu_free_errata_flags(g);
fail_errata_flags:
nvgpu_kmem_fini(g, 0);
fail_kmem:
free(p);

View File

@@ -500,6 +500,7 @@ nvgpu_gr_suspend
nvgpu_gr_sw_ready
nvgpu_has_syncpoints
nvgpu_init_enabled_flags
nvgpu_init_errata_flags
nvgpu_init_hal
nvgpu_init_ltc_support
nvgpu_init_mm_support
@@ -510,6 +511,7 @@ nvgpu_iommuable
nvgpu_free_inst_block
nvgpu_inst_block_ptr
nvgpu_is_enabled
nvgpu_is_errata_present
nvgpu_kcalloc_impl
nvgpu_kfree_impl
nvgpu_kmalloc_impl
@@ -703,6 +705,7 @@ nvgpu_tsg_unbind_channel_check_hw_state
nvgpu_tsg_unbind_channel_check_ctx_reload
nvgpu_set_bit
nvgpu_set_enabled
nvgpu_set_errata
nvgpu_set_power_state
nvgpu_set_pte
nvgpu_sgt_alignment

View File

@@ -515,6 +515,7 @@ nvgpu_gr_suspend
nvgpu_gr_sw_ready
nvgpu_has_syncpoints
nvgpu_init_enabled_flags
nvgpu_init_errata_flags
nvgpu_init_fb_support
nvgpu_init_hal
nvgpu_init_ltc_support
@@ -526,6 +527,7 @@ nvgpu_iommuable
nvgpu_free_inst_block
nvgpu_inst_block_ptr
nvgpu_is_enabled
nvgpu_is_errata_present
nvgpu_kcalloc_impl
nvgpu_kfree_impl
nvgpu_kmalloc_impl
@@ -719,6 +721,7 @@ nvgpu_tsg_unbind_channel_check_hw_state
nvgpu_tsg_unbind_channel_check_ctx_reload
nvgpu_set_bit
nvgpu_set_enabled
nvgpu_set_errata
nvgpu_set_power_state
nvgpu_set_pte
nvgpu_sgt_alignment

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -24,6 +24,7 @@
#include <unit/io.h>
#include <unit/unit.h>
#include <unit/core.h>
#include <nvgpu/errata.h>
#include <nvgpu/nvgpu_init.h>
#include <nvgpu/posix/io.h>
@@ -125,7 +126,7 @@ static void init_platform(struct unit_module *m, struct gk20a *g, bool is_iGPU)
/* Enable extra features to increase line coverage */
nvgpu_set_enabled(g, NVGPU_SUPPORT_SEC2_VM, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_GSP_VM, true);
nvgpu_set_enabled(g, NVGPU_MM_FORCE_128K_PMU_VM, true);
nvgpu_set_errata(g, NVGPU_ERRATA_MM_FORCE_128K_PMU_VM, true);
}
/*
@@ -289,7 +290,7 @@ int test_nvgpu_init_mm(struct unit_module *m, struct gk20a *g, void *args)
*/
nvgpu_set_enabled(g, NVGPU_SUPPORT_SEC2_VM, false);
nvgpu_set_enabled(g, NVGPU_SUPPORT_GSP_VM, false);
nvgpu_set_enabled(g, NVGPU_MM_FORCE_128K_PMU_VM, false);
nvgpu_set_errata(g, NVGPU_ERRATA_MM_FORCE_128K_PMU_VM, false);
g->has_cde = false;
errors += nvgpu_init_mm_support_inject_error(m, g, ERROR_TYPE_HAL, 1,
@@ -297,7 +298,7 @@ int test_nvgpu_init_mm(struct unit_module *m, struct gk20a *g, void *args)
nvgpu_set_enabled(g, NVGPU_SUPPORT_SEC2_VM, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_GSP_VM, true);
nvgpu_set_enabled(g, NVGPU_MM_FORCE_128K_PMU_VM, true);
nvgpu_set_errata(g, NVGPU_ERRATA_MM_FORCE_128K_PMU_VM, true);
g->has_cde = true;
/*