gpu: nvgpu: Add CDE bits in FECS header

In case of CDE channel, T1 (Tex) unit needs to be promoted to 128B aligned, otherwise causes a HW deadlock. Gpu driver makes changes in FECS header which FECS uses to configure the T1 promotions to aligned 128B accesses. Bug 200096226 Change-Id: I8a8deaf6fb91f4bbceacd491db7eb6f7bca5001b Signed-off-by: sujeet baranwal <sbaranwal@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Signed-off-by: sujeet baranwal <sbaranwal@nvidia.com> Reviewed-on: http://git-master/r/804625 Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
2025-12-23 09:57:08 +03:00 · 2015-09-28 15:26:23 -07:00
parent 39e8bff2fc
commit ab93322b25
7 changed files with 31 additions and 1 deletions
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -493,6 +493,9 @@ static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx,
 	alloc_obj_ctx.class_num = required_class;
 	alloc_obj_ctx.flags = 0;

+	/* CDE enabled */
+	cde_ctx->ch->cde = true;
+
 	err = gk20a_alloc_obj_ctx(cde_ctx->ch, &alloc_obj_ctx);
 	if (err) {
 		gk20a_warn(&cde_ctx->pdev->dev, "cde: failed to allocate ctx. err=%d",
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -100,6 +100,7 @@ struct channel_gk20a {
 	bool bound;
 	bool first_init;
 	bool vpr;
+	bool cde;
 	pid_t pid;
 	struct mutex ioctl_lock;

--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -180,6 +180,7 @@ struct gpu_ops {
 		int (*wait_empty)(struct gk20a *g, unsigned long end_jiffies,
 		       u32 expect_delay);
 		void (*init_cyclestats)(struct gk20a *g);
+		void (*enable_cde_in_fecs)(void *ctx_ptr);
 	} gr;
 	const char *name;
 	struct {
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -1632,6 +1632,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
 	if (!ctx_ptr)
 		return -ENOMEM;

+	if (g->ops.gr.enable_cde_in_fecs && c->cde)
+		g->ops.gr.enable_cde_in_fecs(ctx_ptr);
+
 	for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++)
 		gk20a_mem_wr32(ctx_ptr, i, gr->ctx_vars.local_golden_image[i]);

--- a/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2012-2015, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1052,6 +1052,15 @@ static void gr_gm20b_init_cyclestats(struct gk20a *g)
 #endif
 }

+void gr_gm20b_enable_cde_in_fecs(void *ctx_ptr)
+{
+	u32 cde_v;
+	
+	cde_v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0);
+	cde_v |=  ctxsw_prog_main_image_ctl_cde_enabled_f();
+	gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0, cde_v);
+}
+
 void gm20b_init_gr(struct gpu_ops *gops)
 {
 	gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;
@@ -1107,4 +1116,5 @@ void gm20b_init_gr(struct gpu_ops *gops)
 	gops->gr.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info;
 	gops->gr.wait_empty = gr_gk20a_wait_idle;
 	gops->gr.init_cyclestats = gr_gm20b_init_cyclestats;
+	gops->gr.enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs;
 }
--- a/drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h
@@ -58,6 +58,18 @@ static inline u32 ctxsw_prog_main_image_num_gpcs_o(void)
 {
 	return 0x00000008;
 }
+static inline u32 ctxsw_prog_main_image_ctl_o(void)
+{
+	return 0x0000000c;
+}
+static inline u32 ctxsw_prog_main_image_ctl_cde_enabled_f(void)
+{
+	return 0x400;
+}
+static inline u32 ctxsw_prog_main_image_ctl_cde_disabled_f(void)
+{
+	return 0x0;
+}
 static inline u32 ctxsw_prog_main_image_patch_count_o(void)
 {
 	return 0x00000010;