gpu: nvgpu: ga10b: update cbc divisor and top reg

Currently, cbc init and compression tests are failing because MMU marks cbc to be not safe. - Modify cbc.get_base_divisor hal to use ltc_count = 1 for Tegra devices - Update fb.cbc_configure to write compbit_backing_size value to fb_mmu_cbc_top register. - After config confirm that CBC is marked safe. Bug 3353418 Change-Id: I1e9c27f47f7bfcf476f2499231951382e2e8653d Signed-off-by: Vedashree Vidwans <vvidwans@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2570550 Reviewed-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: V M S Seeta Rama Raju Mudundi <srajum@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit
2025-12-24 10:34:43 +03:00 · 2021-08-04 07:11:16 -07:00
parent 00e67e0798
commit a7a2e1e263
5 changed files with 49 additions and 10 deletions
--- a/drivers/gpu/nvgpu/hal/cbc/cbc_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/cbc/cbc_ga10b.c
@@ -33,6 +33,17 @@

 #include <nvgpu/hw/ga10b/hw_ltc_ga10b.h>

+u64 ga10b_cbc_get_base_divisor(struct gk20a *g)
+{
+	/*
+	 * For Tegra, the addressing works differently. Unlike DGPU, all
+	 * partitions talk to the same memory.
+	 */
+	u64 ltc_count = 1ULL;
+
+	return ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v();
+}
+
 int ga10b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc)
 {
 	/*
@@ -108,8 +119,8 @@ int ga10b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc)
 			gobs_per_comptagline_per_slice), compstatus_per_gob);

 	/* aligned to 2KB * ltc_count */
-	compbit_backing_size +=
-		ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v();
+	compbit_backing_size += nvgpu_safe_cast_u64_to_u32(
+		g->ops.cbc.get_base_divisor(g));

 	/* must be a multiple of 64KB */
 	compbit_backing_size = round_up(compbit_backing_size, SZ_64K);
--- a/drivers/gpu/nvgpu/hal/cbc/cbc_ga10b.h
+++ b/drivers/gpu/nvgpu/hal/cbc/cbc_ga10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -30,6 +30,7 @@
 struct gk20a;
 struct nvgpu_cbc;

+u64 ga10b_cbc_get_base_divisor(struct gk20a *g);
 int ga10b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc);
 void ga10b_cbc_init(struct gk20a *g, struct nvgpu_cbc *cbc);

--- a/drivers/gpu/nvgpu/hal/fb/fb_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/fb/fb_ga10b.c
@@ -41,28 +41,51 @@ void ga10b_fb_cbc_configure(struct gk20a *g, struct nvgpu_cbc *cbc)
 	u64 base_divisor;
 	u64 compbit_store_base;
 	u64 compbit_store_pa;
-	u32 cbc_max;
+	u64 combit_top_size;
+	u32 cbc_max_rval;

-	compbit_store_pa = nvgpu_mem_get_addr(g, &cbc->compbit_store.mem);
+	/*
+	 * Update CBC registers
+	 * Note: CBC Base value should be updated after CBC MAX
+	 */
 	base_divisor = g->ops.cbc.get_base_divisor(g);
-	compbit_store_base = DIV_ROUND_UP(compbit_store_pa, base_divisor);
+	combit_top_size = cbc->compbit_backing_size;
+	combit_top_size = round_up(combit_top_size, base_divisor);
+	nvgpu_assert(combit_top_size < U64(U32_MAX));
+	nvgpu_writel(g, fb_mmu_cbc_top_r(),
+		fb_mmu_cbc_top_address_f(U32(combit_top_size)));

-	cbc_max = nvgpu_readl(g, fb_mmu_cbc_max_r());
-	cbc_max = set_field(cbc_max,
+	cbc_max_rval = nvgpu_readl(g, fb_mmu_cbc_max_r());
+	cbc_max_rval = set_field(cbc_max_rval,
 		  fb_mmu_cbc_max_comptagline_m(),
 		  fb_mmu_cbc_max_comptagline_f(cbc->max_comptag_lines));
-	nvgpu_writel(g, fb_mmu_cbc_max_r(), cbc_max);
+	nvgpu_writel(g, fb_mmu_cbc_max_r(), cbc_max_rval);
+
+	compbit_store_pa = nvgpu_mem_get_addr(g, &cbc->compbit_store.mem);
+	compbit_store_base = round_down(compbit_store_pa, base_divisor);

 	nvgpu_assert(compbit_store_base < U64(U32_MAX));
 	nvgpu_writel(g, fb_mmu_cbc_base_r(),
 		fb_mmu_cbc_base_address_f(U32(compbit_store_base)));

+	nvgpu_log(g, gpu_dbg_info | gpu_dbg_map_v | gpu_dbg_pte,
+		"compbit top size: 0x%x,%08x \n",
+		(u32)(combit_top_size >> 32),
+		(u32)(combit_top_size & 0xffffffffU));
+
 	nvgpu_log(g, gpu_dbg_info | gpu_dbg_map_v | gpu_dbg_pte,
 		"compbit base.pa: 0x%x,%08x cbc_base:0x%llx\n",
 		(u32)(compbit_store_pa >> 32),
 		(u32)(compbit_store_pa & 0xffffffffU),
 		compbit_store_base);

+	/* Make sure cbc is marked safe by MMU */
+	cbc_max_rval = nvgpu_readl(g, fb_mmu_cbc_max_r());
+	if ((cbc_max_rval & fb_mmu_cbc_max_safe_m()) !=
+		fb_mmu_cbc_max_safe_true_f()) {
+		nvgpu_err(g, "CBC marked unsafe by MMU, check cbc config");
+	}
+
 	cbc->compbit_store.base_hw = compbit_store_base;
 }
 #endif
--- a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
@@ -365,7 +365,7 @@ static const struct gops_cbc ga10b_ops_cbc = {
 	.cbc_init_support = nvgpu_cbc_init_support,
 	.cbc_remove_support = nvgpu_cbc_remove_support,
 	.init = ga10b_cbc_init,
-	.get_base_divisor = tu104_cbc_get_base_divisor,
+	.get_base_divisor = ga10b_cbc_get_base_divisor,
 	.alloc_comptags = ga10b_cbc_alloc_comptags,
 	.ctrl = tu104_cbc_ctrl,
 };
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_fb_ga10b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_fb_ga10b.h
@@ -281,8 +281,12 @@
 #define fb_mmu_cbc_max_r()                                         (0x00100eccU)
 #define fb_mmu_cbc_max_comptagline_f(v)             ((U32(v) & 0xffffffU) << 0U)
 #define fb_mmu_cbc_max_comptagline_m()                    (U32(0xffffffU) << 0U)
+#define fb_mmu_cbc_max_safe_m()                               (U32(0x1U) << 31U)
+#define fb_mmu_cbc_max_safe_true_f()                               (0x80000000U)
 #define fb_mmu_cbc_base_r()                                        (0x00100ec4U)
 #define fb_mmu_cbc_base_address_f(v)               ((U32(v) & 0x3ffffffU) << 0U)
+#define fb_mmu_cbc_top_r()                                         (0x00100ec8U)
+#define fb_mmu_cbc_top_address_f(v)                   ((U32(v) & 0x7fffU) << 0U)
 #define fb_mmu_vpr_mode_r()                                        (0x001fa800U)
 #define fb_mmu_vpr_mode_fetch_v(r)                          (((r) >> 2U) & 0x1U)
 #define fb_mmu_vpr_mode_fetch_false_v()                            (0x00000000U)