From 321145b37ecfcb8d81271d529ca92a696161704b Mon Sep 17 00:00:00 2001
From: Prathap Kumar Valsan <prathapk@nvidia.com>
Date: Mon, 22 May 2023 08:49:19 +0000
Subject: [PATCH] gpu: nvgpu: Enable raw mode for compression

In raw addressing mode of CBC backing storage, comptaglines are not
required to be allocated or need to programmed in the ptes. Introduce a
flag to detect if the hardware supports raw mode and use that to skip
all the comptagline allocations and respective page table programming.

JIRA NVGPU-9717

Change-Id: I0a16881fc3e897c3c408b30d1835f30564649dad
Signed-off-by: Prathap Kumar Valsan <prathapk@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2908278
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/mm/gmmu/page_table.c | 30 +++++++------
 drivers/gpu/nvgpu/common/mm/gmmu/pte.c        | 34 ++++++++-------
 drivers/gpu/nvgpu/common/mm/vm.c              | 12 ++++++
 drivers/gpu/nvgpu/common/mm/vm_remap.c        | 11 +++--
 drivers/gpu/nvgpu/include/nvgpu/gk20a.h       |  7 +++
 drivers/gpu/nvgpu/os/linux/comptags.c         | 12 +++++-
 drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c       | 43 +++++++++++++------
 drivers/gpu/nvgpu/os/linux/vm.c               | 15 ++++---
 drivers/gpu/nvgpu/os/linux/vm_remap.c         | 28 ++++++------
 9 files changed, 125 insertions(+), 67 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c
index 7a3b03ef0..f748d630a 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c
@@ -1186,23 +1186,25 @@ u64 nvgpu_gmmu_map_locked(struct vm_gk20a *vm,
 		.aperture  = aperture,
 	};
 #ifdef CONFIG_NVGPU_COMPRESSION
-	u64 ctag_granularity = g->ops.fb.compression_page_size(g);
+	if (!g->cbc_use_raw_mode) {
+		u64 ctag_granularity = g->ops.fb.compression_page_size(g);
 
-	attrs.ctag      = (u64)ctag_offset * ctag_granularity;
-	/*
-	 * We need to add the buffer_offset within compression_page_size so that
-	 * the programmed ctagline gets increased at compression_page_size
-	 * boundaries.
-	 */
-	if (attrs.ctag != 0ULL) {
-		nvgpu_assert(ctag_granularity >= 1ULL);
-		attrs.ctag = nvgpu_safe_add_u64(attrs.ctag,
-				buffer_offset & (ctag_granularity - U64(1)));
-	}
+		attrs.ctag      = (u64)ctag_offset * ctag_granularity;
+		/*
+		 * We need to add the buffer_offset within compression_page_size so that
+		 * the programmed ctagline gets increased at compression_page_size
+		 * boundaries.
+		 */
+		if (attrs.ctag != 0ULL) {
+			nvgpu_assert(ctag_granularity >= 1ULL);
+			attrs.ctag = nvgpu_safe_add_u64(attrs.ctag,
+							buffer_offset & (ctag_granularity - U64(1)));
+		}
 
-	attrs.cbc_comptagline_mode =
-		g->ops.fb.is_comptagline_mode_enabled != NULL ?
+		attrs.cbc_comptagline_mode =
+			g->ops.fb.is_comptagline_mode_enabled != NULL ?
 			g->ops.fb.is_comptagline_mode_enabled(g) : true;
+	}
 #endif
 
 	attrs.l3_alloc  = ((flags & NVGPU_VM_MAP_L3_ALLOC)  != 0U);
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu/pte.c b/drivers/gpu/nvgpu/common/mm/gmmu/pte.c
index 19510525b..73edb00f8 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu/pte.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu/pte.c
@@ -72,24 +72,26 @@ void nvgpu_pte_dbg_print(struct gk20a *g,
 	const char *aperture_str = nvgpu_aperture_str(attrs->aperture);
 	const char *perm_str = nvgpu_gmmu_perm_str(attrs->rw_flag);
 #ifdef CONFIG_NVGPU_COMPRESSION
-	u64 ctag_tmp = attrs->ctag;
-	u32 str_len = 0U;
-	u32 ctag_num = 0U;
+	if (!g->cbc_use_raw_mode) {
+		u64 ctag_tmp = attrs->ctag;
+		u32 str_len = 0U;
+		u32 ctag_num = 0U;
 
-	/*
-	 * attrs->ctag is incremented to count current page size as well.
-	 * Subtract to get this page's ctag line number.
-	 */
-	if (ctag_tmp != 0ULL) {
-		ctag_tmp = nvgpu_safe_sub_u64(ctag_tmp, page_size);
+		/*
+		 * attrs->ctag is incremented to count current page size as well.
+		 * Subtract to get this page's ctag line number.
+		 */
+		if (ctag_tmp != 0ULL) {
+			ctag_tmp = nvgpu_safe_sub_u64(ctag_tmp, page_size);
+		}
+
+		ctag_num = nvgpu_safe_cast_u64_to_u32(ctag_tmp /
+						      g->ops.fb.compression_page_size(g));
+		(void)strcpy(ctag_str, "ctag=0x\0");
+		str_len = (u32)strlen(ctag_str);
+		(void)nvgpu_strnadd_u32(ctag_str + str_len, ctag_num,
+					nvgpu_safe_sub_u32(31U, str_len), 16U);
 	}
-
-	ctag_num = nvgpu_safe_cast_u64_to_u32(ctag_tmp /
-					g->ops.fb.compression_page_size(g));
-	(void)strcpy(ctag_str, "ctag=0x\0");
-	str_len = (u32)strlen(ctag_str);
-	(void)nvgpu_strnadd_u32(ctag_str + str_len, ctag_num,
-		nvgpu_safe_sub_u32(31U, str_len), 16U);
 #endif
 	(void)map_attrs_to_str(attrs_str, attrs);
 	pte_dbg(g, attrs,
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c
index 3fee0a961..594fd2ecc 100644
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -1187,6 +1187,17 @@ static int nvgpu_vm_do_map(struct vm_gk20a *vm,
 	(void)os_buf;
 	(void)flags;
 #ifdef CONFIG_NVGPU_COMPRESSION
+	if (g->cbc_use_raw_mode) {
+		if (binfo_ptr->compr_kind != NVGPU_KIND_INVALID) {
+			pte_kind = (u8)binfo_ptr->compr_kind;
+		} else if ((binfo_ptr->incompr_kind >= 0)) {
+			pte_kind = (u8)binfo_ptr->incompr_kind;
+		} else {
+			err = -EINVAL;
+			goto ret_err;
+		}
+		goto kind_done;
+	}
 	err = nvgpu_vm_compute_compression(vm, binfo_ptr);
 	if (err != 0) {
 		nvgpu_err(g, "failure setting up compression");
@@ -1305,6 +1316,7 @@ static int nvgpu_vm_do_map(struct vm_gk20a *vm,
 		goto ret_err;
 	}
 
+kind_done:
 #ifdef CONFIG_NVGPU_COMPRESSION
 	if (clear_ctags) {
 		clear_ctags = gk20a_comptags_start_clear(os_buf);
diff --git a/drivers/gpu/nvgpu/common/mm/vm_remap.c b/drivers/gpu/nvgpu/common/mm/vm_remap.c
index e08cfbf4c..93f30b8fd 100644
--- a/drivers/gpu/nvgpu/common/mm/vm_remap.c
+++ b/drivers/gpu/nvgpu/common/mm/vm_remap.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2022, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2023, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -202,6 +202,7 @@ static int nvgpu_vm_remap_validate_map(struct vm_gk20a *vm,
 			struct nvgpu_vm_remap_os_buffer *remap_os_buf)
 {
 	u64 page_size = nvgpu_vm_remap_page_size(op);
+	struct gk20a *g = gk20a_from_vm(vm);
 	u64 map_offset;
 	u64 map_size;
 	u64 os_buf_size;
@@ -216,9 +217,9 @@ static int nvgpu_vm_remap_validate_map(struct vm_gk20a *vm,
 	}
 
 #ifdef CONFIG_NVGPU_COMPRESSION
-	if (op->compr_kind != NVGPU_KIND_INVALID) {
+	if (op->compr_kind != NVGPU_KIND_INVALID &&
+	    !g->cbc_use_raw_mode) {
 
-		struct gk20a *g = gk20a_from_vm(vm);
 		struct gk20a_comptags comptags = { 0 };
 
 		/*
@@ -360,6 +361,10 @@ static u64 nvgpu_vm_remap_get_ctag_offset(struct vm_gk20a *vm,
 					page_size);
 	u64 compression_page_size;
 
+	if (g->cbc_use_raw_mode) {
+		return 0;
+	}
+
 	gk20a_get_comptags(os_buf, &comptags);
 
 	if (comptags.lines != 0) {
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
index 51621e0d3..2fef09af6 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -752,6 +752,13 @@ struct gk20a {
 
 	u32 max_comptag_mem; /* max memory size (MB) for comptag */
 	struct nvgpu_cbc *cbc;
+	/*
+	 * In raw mode, L2 calculates the CBC backing storage address from
+	 * physical address of the compressible surface. The comptag lines are
+	 * not used in the calculation, so nvgpu doesn't need to allocate comptag
+	 * lines in the pagetable.
+	 */
+	bool cbc_use_raw_mode;
 #endif
 
 #ifdef CONFIG_NVGPU_NON_FUSA
diff --git a/drivers/gpu/nvgpu/os/linux/comptags.c b/drivers/gpu/nvgpu/os/linux/comptags.c
index 6d248296e..6feb83920 100644
--- a/drivers/gpu/nvgpu/os/linux/comptags.c
+++ b/drivers/gpu/nvgpu/os/linux/comptags.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2022, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2023, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -51,6 +51,16 @@ int gk20a_alloc_comptags(struct gk20a *g, struct nvgpu_os_buffer *buf,
 	u32 lines = 0;
 	int err;
 
+	/*
+	 * In raw mode, raw address is used by the hardware to map the
+	 * compressible memory address to CBC address, comptaglines are never
+	 * used.
+	 */
+	if (g->cbc_use_raw_mode) {
+		nvgpu_err(g, "comptags should not be allocated in raw mode\n");
+		return -EINVAL;
+	}
+
 	ctag_granularity = g->ops.fb.compression_page_size(g);
 	lines = DIV_ROUND_UP_ULL(buf->dmabuf->size, ctag_granularity);
 
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
index bee0eb4c5..91ded2fdb 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
@@ -428,10 +428,13 @@ static long gk20a_ctrl_ioctl_gpu_characteristics(
 #ifdef CONFIG_NVGPU_COMPRESSION
 	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_COMPRESSION)) {
 		gpu.compression_page_size = g->ops.fb.compression_page_size(g);
-		gpu.gr_compbit_store_base_hw = g->cbc->compbit_store.base_hw;
-		gpu.gr_gobs_per_comptagline_per_slice =
-			g->cbc->gobs_per_comptagline_per_slice;
-		gpu.cbc_comptags_per_line = g->cbc->comptags_per_cacheline;
+
+		if (!g->cbc_use_raw_mode) {
+			gpu.gr_compbit_store_base_hw = g->cbc->compbit_store.base_hw;
+			gpu.gr_gobs_per_comptagline_per_slice =
+				g->cbc->gobs_per_comptagline_per_slice;
+			gpu.cbc_comptags_per_line = g->cbc->comptags_per_cacheline;
+		}
 	}
 #endif
 
@@ -2256,16 +2259,28 @@ static int nvgpu_handle_comptags_control(struct gk20a *g,
 	os_buf.dmabuf = dmabuf;
 	os_buf.dev = dev_from_gk20a(g);
 
-	err = gk20a_alloc_comptags(g, &os_buf, &g->cbc->comp_tags);
-	if (err != 0) {
-		if (comptags_alloc_control ==
-				NVGPU_GPU_COMPTAGS_ALLOC_REQUIRED) {
-			nvgpu_err(g, "Comptags allocation (required) failed (%d)",
-				  err);
-		} else {
-			nvgpu_err(g, "Comptags allocation (requested) failed (%d)",
-				  err);
-			err = 0;
+	/*
+	 * In raw mode, comptaglines are not used to map the compressible memory
+	 * address to the CBC address, instead raw address is used by the
+	 * hardrdware to offset to the CBC address. Mark comptags as enabled and
+	 * return. In the comptagline mode, comptags will be marked as enabled
+	 * once the comptags are successfuly allocated in the
+	 * gk20a_alloc_comptags().
+	 */
+	if (g->cbc_use_raw_mode) {
+		priv->comptags.enabled = true;
+	} else {
+		err = gk20a_alloc_comptags(g, &os_buf, &g->cbc->comp_tags);
+		if (err != 0) {
+			if (comptags_alloc_control ==
+			    NVGPU_GPU_COMPTAGS_ALLOC_REQUIRED) {
+				nvgpu_err(g, "Comptags allocation (required) failed (%d)",
+					  err);
+			} else {
+				nvgpu_err(g, "Comptags allocation (requested) failed (%d)",
+					  err);
+				err = 0;
+			}
 		}
 	}
 
diff --git a/drivers/gpu/nvgpu/os/linux/vm.c b/drivers/gpu/nvgpu/os/linux/vm.c
index 1b5773daa..ba91ebe38 100644
--- a/drivers/gpu/nvgpu/os/linux/vm.c
+++ b/drivers/gpu/nvgpu/os/linux/vm.c
@@ -524,7 +524,8 @@ int nvgpu_vm_mapping_modify(struct vm_gk20a *vm,
 	 * If we support compression and there's a compressible kind, use it.
 	 */
 #ifdef CONFIG_NVGPU_COMPRESSION
-	if (mapped_buffer->ctag_offset != 0) {
+	if (mapped_buffer->ctag_offset != 0 ||
+	    g->cbc_use_raw_mode) {
 		if (compr_kind == NV_KIND_INVALID) {
 			kind = incompr_kind;
 		} else {
@@ -549,13 +550,15 @@ int nvgpu_vm_mapping_modify(struct vm_gk20a *vm,
 	}
 
 #ifdef CONFIG_NVGPU_COMPRESSION
-	ctag_offset = mapped_buffer->ctag_offset;
+	if (!g->cbc_use_raw_mode) {
+		ctag_offset = mapped_buffer->ctag_offset;
 
-	compression_page_size = g->ops.fb.compression_page_size(g);
-	nvgpu_assert(compression_page_size > 0ULL);
+		compression_page_size = g->ops.fb.compression_page_size(g);
+		nvgpu_assert(compression_page_size > 0ULL);
 
-	ctag_offset += (u32)(buffer_offset >>
-			nvgpu_ilog2(compression_page_size));
+		ctag_offset += (u32)(buffer_offset >>
+				     nvgpu_ilog2(compression_page_size));
+	}
 #endif
 
 	if (g->ops.mm.gmmu.map(vm,
diff --git a/drivers/gpu/nvgpu/os/linux/vm_remap.c b/drivers/gpu/nvgpu/os/linux/vm_remap.c
index 17b607295..2f5975d84 100644
--- a/drivers/gpu/nvgpu/os/linux/vm_remap.c
+++ b/drivers/gpu/nvgpu/os/linux/vm_remap.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2022, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2023, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -127,19 +127,21 @@ void nvgpu_vm_remap_os_buf_put(struct vm_gk20a *vm,
 		remap_os_buf->os_priv.attachment, remap_os_buf->os_priv.sgt);
 
 #ifdef CONFIG_NVGPU_COMPRESSION
-	gk20a_get_comptags(&remap_os_buf->os_buf, &comptags);
+	if (!g->cbc_use_raw_mode) {
+		gk20a_get_comptags(&remap_os_buf->os_buf, &comptags);
 
-	/*
-	 * Flush compression bit cache before releasing the physical
-	 * memory buffer reference.
-	 */
-	if (comptags.offset != 0) {
-		g->ops.cbc.ctrl(g, nvgpu_cbc_op_clean, 0, 0);
-		err = nvgpu_pg_elpg_ms_protected_call(g,
-				g->ops.mm.cache.l2_flush(g, true));
-		if (err != 0) {
-			nvgpu_err(g, "l2 flush failed");
-			return;
+		/*
+		 * Flush compression bit cache before releasing the physical
+		 * memory buffer reference.
+		 */
+		if (comptags.offset != 0) {
+			g->ops.cbc.ctrl(g, nvgpu_cbc_op_clean, 0, 0);
+			err = nvgpu_pg_elpg_ms_protected_call(g,
+							      g->ops.mm.cache.l2_flush(g, true));
+			if (err != 0) {
+				nvgpu_err(g, "l2 flush failed");
+				return;
+			}
 		}
 	}
 #endif