gpu: nvgpu: Cleanup usage of bypass_smmu

The GPU has multiple different operating modes in respect to IOMMU'ability. As such there needs to be a clean way to tell the driver whether it is IOMMU'able or not. This state also does not always reflect what is possible: all becasue the GPU can generate IOMMU'ed memory requests doesn't mean it wants to. The nvgpu_iommuable() API has now existed for a little while which is a useful way to convey whether nvgpu should consider the GPU as IOMMU'able. However, there is also the g->mm.bypass_smmu flag which used to be able to override what the GPU decided it should do. Typically it was assigned the same value as nvgpu_iommuable() but that was not necessarily a requirment. This patch removes all the usages of g->mm.bypass_smmu and instead uses the nvgpu_iommuable() function. All places where the check against g->mm.bypass_smmu have been replaced with nvgpu_iommuable(). The code should now be much cleaner. Subsequently other checks can also be placed in the nvgpu_iommuable() function. For example, when NVLINK comes online and the GPU should no longer consider DMA addresses and instead use scatter-gather lists directly the ngpu_iommuable() function will be able to check the state of NVLINK and then act accordingly. Change-Id: I0da6262386de15709decac89d63d3eecfec20cd7 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1648332 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-23 09:57:08 +03:00 · 2018-01-29 16:21:30 -08:00
parent ac5b3d9640
commit 98da3f8eed
12 changed files with 18 additions and 25 deletions
--- a/drivers/gpu/nvgpu/common/linux/cde_gp10b.c
+++ b/drivers/gpu/nvgpu/common/linux/cde_gp10b.c
@@ -26,6 +26,7 @@
 #include "cde_gp10b.h"

 #include <nvgpu/log.h>
+#include <nvgpu/dma.h>

 enum gp10b_programs {
 	GP10B_PROG_HPASS              = 0,
@@ -56,10 +57,10 @@ void gp10b_cde_get_program_numbers(struct gk20a *g,
 			hprog = GP10B_PROG_HPASS_DEBUG;
 			vprog = GP10B_PROG_VPASS_DEBUG;
 		}
-		if (g->mm.bypass_smmu) {
+		if (!nvgpu_iommuable(g)) {
 			if (!g->mm.disable_bigpage) {
 				nvgpu_warn(g,
-					   "when bypass_smmu is 1, disable_bigpage must be 1 too");
+					   "When no IOMMU big pages cannot be used");
 			}
 			hprog |= 1;
 			vprog |= 1;
@@ -72,7 +73,7 @@ void gp10b_cde_get_program_numbers(struct gk20a *g,

 bool gp10b_need_scatter_buffer(struct gk20a *g)
 {
-	return g->mm.bypass_smmu;
+	return !nvgpu_iommuable(g);
 }

 static u8 parity(u32 a)
--- a/drivers/gpu/nvgpu/common/linux/debug.c
+++ b/drivers/gpu/nvgpu/common/linux/debug.c
@@ -332,11 +332,6 @@ void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink)
 					l->debugfs,
 					&g->timeouts_enabled);

-	l->debugfs_bypass_smmu =
-			debugfs_create_bool("bypass_smmu",
-					S_IRUGO,
-					l->debugfs,
-					&g->mm.bypass_smmu);
 	l->debugfs_disable_bigpage =
 			debugfs_create_file("disable_bigpage",
 					S_IRUGO|S_IWUSR,
--- a/drivers/gpu/nvgpu/common/linux/dma.c
+++ b/drivers/gpu/nvgpu/common/linux/dma.c
@@ -637,8 +637,13 @@ bool nvgpu_iommuable(struct gk20a *g)
 #ifdef CONFIG_TEGRA_GK20A
 	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);

-	return device_is_iommuable(l->dev);
-#else
-	return true;
+	/*
+	 * Check against the nvgpu device to see if it's been marked as
+	 * IOMMU'able.
+	 */
+	if (!device_is_iommuable(l->dev))
+		return false;
 #endif
+
+	return true;
 }
--- a/drivers/gpu/nvgpu/common/linux/driver_common.c
+++ b/drivers/gpu/nvgpu/common/linux/driver_common.c
@@ -184,7 +184,6 @@ static void nvgpu_init_mm_vars(struct gk20a *g)
 {
 	struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));

-	g->mm.bypass_smmu = platform->bypass_smmu;
 	g->mm.disable_bigpage = platform->disable_bigpage;
 	__nvgpu_set_enabled(g, NVGPU_MM_HONORS_APERTURE,
 			    platform->honors_aperture);
--- a/drivers/gpu/nvgpu/common/linux/os_linux.h
+++ b/drivers/gpu/nvgpu/common/linux/os_linux.h
@@ -123,7 +123,6 @@ struct nvgpu_os_linux {
 	struct dentry *debugfs_ltc_enabled;
 	struct dentry *debugfs_timeouts_enabled;
 	struct dentry *debugfs_gr_idle_timeout_default;
-	struct dentry *debugfs_bypass_smmu;
 	struct dentry *debugfs_disable_bigpage;
 	struct dentry *debugfs_gr_default_attrib_cb_size;

--- a/drivers/gpu/nvgpu/common/linux/platform_gk20a.h
+++ b/drivers/gpu/nvgpu/common/linux/platform_gk20a.h
@@ -106,9 +106,6 @@ struct gk20a_platform {
 	/* Timeout for per-channel watchdog (in mS) */
 	u32 ch_wdt_timeout_ms;

-	/* Enable SMMU bypass by default */
-	bool bypass_smmu;
-
 	/* Disable big page support */
 	bool disable_bigpage;

--- a/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c
@@ -137,8 +137,7 @@ static int gp10b_tegra_probe(struct device *dev)
 		return ret;
 #endif

-	platform->bypass_smmu = !device_is_iommuable(dev);
-	platform->disable_bigpage = platform->bypass_smmu;
+	platform->disable_bigpage = !device_is_iommuable(dev);

 	platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
 		= false;
--- a/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c
+++ b/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c
@@ -81,8 +81,7 @@ static int gv11b_tegra_probe(struct device *dev)
 	g->has_syncpoints = false;
 #endif

-	platform->bypass_smmu = !device_is_iommuable(dev);
-	platform->disable_bigpage = platform->bypass_smmu;
+	platform->disable_bigpage = !device_is_iommuable(dev);

 	platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
 		= false;
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_mm_gp10b.c
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_mm_gp10b.c
@@ -23,10 +23,10 @@
 #include "gk20a/mm_gk20a.h"

 #include <nvgpu/bug.h>
+#include <nvgpu/dma.h>

 int vgpu_gp10b_init_mm_setup_hw(struct gk20a *g)
 {
-	g->mm.bypass_smmu = true;
 	g->mm.disable_bigpage = true;
 	return 0;
 }
@@ -77,7 +77,7 @@ u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,

 	/* FIXME: add support for sparse mappings */

-	if (WARN_ON(!sgt) || WARN_ON(!g->mm.bypass_smmu))
+	if (WARN_ON(!sgt) || WARN_ON(nvgpu_iommuable(g)))
 		return 0;

 	if (space_to_skip & (page_size - 1))
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -501,7 +501,7 @@ static int __nvgpu_gmmu_do_update_page_table(struct vm_gk20a *vm,
 	 * mapping is simple since the "physical" address is actually a virtual
 	 * IO address and will be contiguous.
 	 */
-	if (attrs->aperture == APERTURE_SYSMEM && !g->mm.bypass_smmu) {
+	if (attrs->aperture == APERTURE_SYSMEM && nvgpu_iommuable(g)) {
 		u64 io_addr = nvgpu_sgt_get_gpu_addr(g, sgt, sgt->sgl, attrs);

 		io_addr += space_to_skip;
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -96,7 +96,7 @@ u64 nvgpu_sgt_alignment(struct gk20a *g, struct nvgpu_sgt *sgt)
 	 * and double check length of buffer later. Also, since there's an
 	 * IOMMU we know that this DMA address is contiguous.
 	 */
-	if (!g->mm.bypass_smmu &&
+	if (nvgpu_iommuable(g) &&
 	    nvgpu_sgt_iommuable(g, sgt) &&
 	    nvgpu_sgt_get_dma(sgt, sgt->sgl))
 		return 1ULL << __ffs(nvgpu_sgt_get_dma(sgt, sgt->sgl));
--- a/drivers/gpu/nvgpu/include/nvgpu/mm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/mm.h
@@ -144,7 +144,6 @@ struct mm_gk20a {
 	bool use_full_comp_tag_line;
 	bool ltc_enabled_current;
 	bool ltc_enabled_target;
-	bool bypass_smmu;
 	bool disable_bigpage;
 	bool has_physical_mode;