gpu: nvgpu: fix tex rd coalesce disable logic

NETLIST_REGIONID_SW_CTX_LOAD writes update gr_gpcs_tpcs_tex_m_dbg2_r to default value that keeps rd coalesce enabled for LG & SU. Disable rd coalesce for tex, lg and su after NETLIST_REGIONID_SW_CTX_LOAD writes during gr init and golden ctx init for it to take effect. For gr sw method handling, don't update the tex rd coalesce on interrupt with offset *_SET_RD_COALESCE as we want to keep rd coalescing disabled. Bug 3881919 Change-Id: Ie7e6616d48f84547ce3380bfa395910b7995c05b Signed-off-by: Sagar Kamble <skamble@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2857141 (cherry picked from commit b2c8827c65) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2859538 Reviewed-by: svcacv <svcacv@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: Ramalingam C <ramalingamc@nvidia.com> Reviewed-by: Ankur Kishore <ankkishore@nvidia.com> GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2023-02-27 15:44:51 +05:30
parent 3af0fe510e
commit 2acfb55780
8 changed files with 44 additions and 20 deletions
--- a/drivers/gpu/nvgpu/common/gr/obj_ctx.c
+++ b/drivers/gpu/nvgpu/common/gr/obj_ctx.c
@@ -22,6 +22,7 @@

 #include <nvgpu/class.h>
 #include <nvgpu/gk20a.h>
+#include <nvgpu/errata.h>
 #include <nvgpu/log.h>
 #include <nvgpu/io.h>
 #include <nvgpu/mm.h>
@@ -657,6 +658,11 @@ static int nvgpu_gr_obj_ctx_init_hw_state(struct gk20a *g,
 	}
 	nvgpu_log_info(g, "end: netlist: sw_ctx_load: register writes");

+	if (nvgpu_is_errata_present(g, NVGPU_ERRATA_200314091) &&
+	    (g->ops.gr.init.disable_rd_coalesce != NULL)) {
+		g->ops.gr.init.disable_rd_coalesce(g);
+	}
+
 	nvgpu_log_info(g, "configure sm_hww_esr_report mask after sw_ctx_load");
 	g->ops.gr.intr.set_hww_esr_report_mask(g);

--- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.c
@@ -1,7 +1,7 @@
 /*
 * GM20B GPC MMU
 *
- * Copyright (c) 2011-2022, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2023, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -375,6 +375,8 @@ int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
 		gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r()));
 	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x",
 		gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPCS_TEX_M_DBG2: 0x%x",
+		gk20a_readl(g, gr_gpcs_tpcs_tex_m_dbg2_r()));

 	return 0;
 }
--- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h
+++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -41,6 +41,7 @@ struct nvgpu_gr_config;

 void gm20b_gr_init_lg_coalesce(struct gk20a *g, u32 data);
 void gm20b_gr_init_su_coalesce(struct gk20a *g, u32 data);
+void gm20a_gr_disable_rd_coalesce(struct gk20a *g);
 void gm20b_gr_init_pes_vsc_stream(struct gk20a *g);

 void gm20b_gr_init_fifo_access(struct gk20a *g, bool enable);
--- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b_fusa.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -47,27 +47,36 @@

 void gm20b_gr_init_lg_coalesce(struct gk20a *g, u32 data)
 {
-	u32 val;
+	(void) data;

-	nvgpu_log_fn(g, " ");
-
-	val = nvgpu_readl(g, gr_gpcs_tpcs_tex_m_dbg2_r());
-	val = set_field(val,
-			gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_m(),
-			gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_f(data));
-	nvgpu_writel(g, gr_gpcs_tpcs_tex_m_dbg2_r(), val);
+	nvgpu_log_info(g, "Not updating rd coalesce");
 }

 void gm20b_gr_init_su_coalesce(struct gk20a *g, u32 data)
 {
-	u32 reg;
+	(void) data;

-	reg = nvgpu_readl(g, gr_gpcs_tpcs_tex_m_dbg2_r());
-	reg = set_field(reg,
-			gr_gpcs_tpcs_tex_m_dbg2_su_rd_coalesce_en_m(),
-			gr_gpcs_tpcs_tex_m_dbg2_su_rd_coalesce_en_f(data));
+	nvgpu_log_info(g, "Not updating rd coalesce");
+}

-	nvgpu_writel(g, gr_gpcs_tpcs_tex_m_dbg2_r(), reg);
+/*
+ * Disable surface, LG and tex rd coalesce.
+ */
+void gm20a_gr_disable_rd_coalesce(struct gk20a *g)
+{
+	u32 dbg2_reg;
+
+	dbg2_reg = gk20a_readl(g, gr_gpcs_tpcs_tex_m_dbg2_r());
+	dbg2_reg = set_field(dbg2_reg,
+			     gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_m(),
+			     gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_f(0));
+	dbg2_reg = set_field(dbg2_reg,
+			     gr_gpcs_tpcs_tex_m_dbg2_su_rd_coalesce_en_m(),
+			     gr_gpcs_tpcs_tex_m_dbg2_su_rd_coalesce_en_f(0));
+	dbg2_reg = set_field(dbg2_reg,
+			     gr_gpcs_tpcs_tex_m_dbg2_tex_rd_coalesce_en_m(),
+			     gr_gpcs_tpcs_tex_m_dbg2_tex_rd_coalesce_en_f(0));
+	gk20a_writel(g, gr_gpcs_tpcs_tex_m_dbg2_r(), dbg2_reg);
 }

 void gm20b_gr_init_pes_vsc_stream(struct gk20a *g)
--- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
@@ -328,6 +328,7 @@ static const struct gops_gr_init gm20b_ops_gr_init = {
 	.ecc_scrub_reg = NULL,
 	.lg_coalesce = gm20b_gr_init_lg_coalesce,
 	.su_coalesce = gm20b_gr_init_su_coalesce,
+	.disable_rd_coalesce = gm20a_gr_disable_rd_coalesce,
 	.pes_vsc_stream = gm20b_gr_init_pes_vsc_stream,
 	.gpc_mmu = gm20b_gr_init_gpc_mmu,
 	.fifo_access = gm20b_gr_init_fifo_access,
@@ -1213,6 +1214,7 @@ int gm20b_init_hal(struct gk20a *g)
 	nvgpu_set_errata(g, NVGPU_ERRATA_1547668, true);
 	nvgpu_set_errata(g, NVGPU_ERRATA_MM_FORCE_128K_PMU_VM, true);
 	nvgpu_set_errata(g, NVGPU_ERRATA_SYNCPT_INVALID_ID_0, true);
+	nvgpu_set_errata(g, NVGPU_ERRATA_200314091, true);

 	nvgpu_set_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP, true);
 #ifdef CONFIG_NVGPU_FECS_TRACE
--- a/drivers/gpu/nvgpu/include/nvgpu/errata.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/errata.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -52,6 +52,7 @@ struct gk20a;
 	/* GP10B */							\
 	DEFINE_ERRATA(NVGPU_ERRATA_LRF_ECC_OVERCOUNT, "GP10B", "GR ECC"),	\
 	DEFINE_ERRATA(NVGPU_ERRATA_200391931, "GP10B", "GR Perf"),	\
+	DEFINE_ERRATA(NVGPU_ERRATA_200314091, "GM20B & GP10B", "GR RD Coalescing"),	\
 	/* GV11B */							\
 	DEFINE_ERRATA(NVGPU_ERRATA_2016608, "GV11B", "FIFO Runlist preempt"), \
 	DEFINE_ERRATA(NVGPU_ERRATA_3524791, "GV11B", \
--- a/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -794,6 +794,7 @@ struct gops_gr_init {
 			      struct nvgpu_gr_config *gr_config);
 	void (*lg_coalesce)(struct gk20a *g, u32 data);
 	void (*su_coalesce)(struct gk20a *g, u32 data);
+	void (*disable_rd_coalesce)(struct gk20a *g);
 	void (*pes_vsc_stream)(struct gk20a *g);
 	void (*gpc_mmu)(struct gk20a *g);
 	u32 (*get_sm_id_size)(void);
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_gr_gm20b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_gr_gm20b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2023, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -669,6 +669,8 @@
 #define gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_m()          (U32(0x1U) << 2U)
 #define gr_gpcs_tpcs_tex_m_dbg2_su_rd_coalesce_en_f(v)   ((U32(v) & 0x1U) << 4U)
 #define gr_gpcs_tpcs_tex_m_dbg2_su_rd_coalesce_en_m()          (U32(0x1U) << 4U)
+#define gr_gpcs_tpcs_tex_m_dbg2_tex_rd_coalesce_en_f(v)  ((U32(v) & 0x1U) << 5U)
+#define gr_gpcs_tpcs_tex_m_dbg2_tex_rd_coalesce_en_m()         (U32(0x1U) << 5U)
 #define gr_gpccs_falcon_addr_r()                                   (0x0041a0acU)
 #define gr_gpccs_falcon_addr_lsb_s()                                        (6U)
 #define gr_gpccs_falcon_addr_lsb_f(v)                   ((U32(v) & 0x3fU) << 0U)