gpu: nvgpu: Cleanup usage of bypass_smmu

The GPU has multiple different operating modes in respect to IOMMU'ability.
As such there needs to be a clean way to tell the driver whether it is
IOMMU'able or not. This state also does not always reflect what is possible:
all becasue the GPU can generate IOMMU'ed memory requests doesn't mean it
wants to.

The nvgpu_iommuable() API has now existed for a little while which is a
useful way to convey whether nvgpu should consider the GPU as IOMMU'able.
However, there is also the g->mm.bypass_smmu flag which used to be able to
override what the GPU decided it should do. Typically it was assigned
the same value as nvgpu_iommuable() but that was not necessarily a
requirment.

This patch removes all the usages of g->mm.bypass_smmu and instead uses the
nvgpu_iommuable() function. All places where the check against
g->mm.bypass_smmu have been replaced with nvgpu_iommuable(). The code
should now be much cleaner.

Subsequently other checks can also be placed in the nvgpu_iommuable()
function. For example, when NVLINK comes online and the GPU should no
longer consider DMA addresses and instead use scatter-gather lists
directly the ngpu_iommuable() function will be able to check the state of
NVLINK and then act accordingly.

Change-Id: I0da6262386de15709decac89d63d3eecfec20cd7
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1648332
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Alex Waterman
2018-01-29 16:21:30 -08:00
committed by mobile promotions
parent ac5b3d9640
commit 98da3f8eed
12 changed files with 18 additions and 25 deletions

View File

@@ -26,6 +26,7 @@
#include "cde_gp10b.h" #include "cde_gp10b.h"
#include <nvgpu/log.h> #include <nvgpu/log.h>
#include <nvgpu/dma.h>
enum gp10b_programs { enum gp10b_programs {
GP10B_PROG_HPASS = 0, GP10B_PROG_HPASS = 0,
@@ -56,10 +57,10 @@ void gp10b_cde_get_program_numbers(struct gk20a *g,
hprog = GP10B_PROG_HPASS_DEBUG; hprog = GP10B_PROG_HPASS_DEBUG;
vprog = GP10B_PROG_VPASS_DEBUG; vprog = GP10B_PROG_VPASS_DEBUG;
} }
if (g->mm.bypass_smmu) { if (!nvgpu_iommuable(g)) {
if (!g->mm.disable_bigpage) { if (!g->mm.disable_bigpage) {
nvgpu_warn(g, nvgpu_warn(g,
"when bypass_smmu is 1, disable_bigpage must be 1 too"); "When no IOMMU big pages cannot be used");
} }
hprog |= 1; hprog |= 1;
vprog |= 1; vprog |= 1;
@@ -72,7 +73,7 @@ void gp10b_cde_get_program_numbers(struct gk20a *g,
bool gp10b_need_scatter_buffer(struct gk20a *g) bool gp10b_need_scatter_buffer(struct gk20a *g)
{ {
return g->mm.bypass_smmu; return !nvgpu_iommuable(g);
} }
static u8 parity(u32 a) static u8 parity(u32 a)

View File

@@ -332,11 +332,6 @@ void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink)
l->debugfs, l->debugfs,
&g->timeouts_enabled); &g->timeouts_enabled);
l->debugfs_bypass_smmu =
debugfs_create_bool("bypass_smmu",
S_IRUGO,
l->debugfs,
&g->mm.bypass_smmu);
l->debugfs_disable_bigpage = l->debugfs_disable_bigpage =
debugfs_create_file("disable_bigpage", debugfs_create_file("disable_bigpage",
S_IRUGO|S_IWUSR, S_IRUGO|S_IWUSR,

View File

@@ -637,8 +637,13 @@ bool nvgpu_iommuable(struct gk20a *g)
#ifdef CONFIG_TEGRA_GK20A #ifdef CONFIG_TEGRA_GK20A
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
return device_is_iommuable(l->dev); /*
#else * Check against the nvgpu device to see if it's been marked as
return true; * IOMMU'able.
*/
if (!device_is_iommuable(l->dev))
return false;
#endif #endif
return true;
} }

View File

@@ -184,7 +184,6 @@ static void nvgpu_init_mm_vars(struct gk20a *g)
{ {
struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
g->mm.bypass_smmu = platform->bypass_smmu;
g->mm.disable_bigpage = platform->disable_bigpage; g->mm.disable_bigpage = platform->disable_bigpage;
__nvgpu_set_enabled(g, NVGPU_MM_HONORS_APERTURE, __nvgpu_set_enabled(g, NVGPU_MM_HONORS_APERTURE,
platform->honors_aperture); platform->honors_aperture);

View File

@@ -123,7 +123,6 @@ struct nvgpu_os_linux {
struct dentry *debugfs_ltc_enabled; struct dentry *debugfs_ltc_enabled;
struct dentry *debugfs_timeouts_enabled; struct dentry *debugfs_timeouts_enabled;
struct dentry *debugfs_gr_idle_timeout_default; struct dentry *debugfs_gr_idle_timeout_default;
struct dentry *debugfs_bypass_smmu;
struct dentry *debugfs_disable_bigpage; struct dentry *debugfs_disable_bigpage;
struct dentry *debugfs_gr_default_attrib_cb_size; struct dentry *debugfs_gr_default_attrib_cb_size;

View File

@@ -106,9 +106,6 @@ struct gk20a_platform {
/* Timeout for per-channel watchdog (in mS) */ /* Timeout for per-channel watchdog (in mS) */
u32 ch_wdt_timeout_ms; u32 ch_wdt_timeout_ms;
/* Enable SMMU bypass by default */
bool bypass_smmu;
/* Disable big page support */ /* Disable big page support */
bool disable_bigpage; bool disable_bigpage;

View File

@@ -137,8 +137,7 @@ static int gp10b_tegra_probe(struct device *dev)
return ret; return ret;
#endif #endif
platform->bypass_smmu = !device_is_iommuable(dev); platform->disable_bigpage = !device_is_iommuable(dev);
platform->disable_bigpage = platform->bypass_smmu;
platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
= false; = false;

View File

@@ -81,8 +81,7 @@ static int gv11b_tegra_probe(struct device *dev)
g->has_syncpoints = false; g->has_syncpoints = false;
#endif #endif
platform->bypass_smmu = !device_is_iommuable(dev); platform->disable_bigpage = !device_is_iommuable(dev);
platform->disable_bigpage = platform->bypass_smmu;
platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
= false; = false;

View File

@@ -23,10 +23,10 @@
#include "gk20a/mm_gk20a.h" #include "gk20a/mm_gk20a.h"
#include <nvgpu/bug.h> #include <nvgpu/bug.h>
#include <nvgpu/dma.h>
int vgpu_gp10b_init_mm_setup_hw(struct gk20a *g) int vgpu_gp10b_init_mm_setup_hw(struct gk20a *g)
{ {
g->mm.bypass_smmu = true;
g->mm.disable_bigpage = true; g->mm.disable_bigpage = true;
return 0; return 0;
} }
@@ -77,7 +77,7 @@ u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
/* FIXME: add support for sparse mappings */ /* FIXME: add support for sparse mappings */
if (WARN_ON(!sgt) || WARN_ON(!g->mm.bypass_smmu)) if (WARN_ON(!sgt) || WARN_ON(nvgpu_iommuable(g)))
return 0; return 0;
if (space_to_skip & (page_size - 1)) if (space_to_skip & (page_size - 1))

View File

@@ -501,7 +501,7 @@ static int __nvgpu_gmmu_do_update_page_table(struct vm_gk20a *vm,
* mapping is simple since the "physical" address is actually a virtual * mapping is simple since the "physical" address is actually a virtual
* IO address and will be contiguous. * IO address and will be contiguous.
*/ */
if (attrs->aperture == APERTURE_SYSMEM && !g->mm.bypass_smmu) { if (attrs->aperture == APERTURE_SYSMEM && nvgpu_iommuable(g)) {
u64 io_addr = nvgpu_sgt_get_gpu_addr(g, sgt, sgt->sgl, attrs); u64 io_addr = nvgpu_sgt_get_gpu_addr(g, sgt, sgt->sgl, attrs);
io_addr += space_to_skip; io_addr += space_to_skip;

View File

@@ -96,7 +96,7 @@ u64 nvgpu_sgt_alignment(struct gk20a *g, struct nvgpu_sgt *sgt)
* and double check length of buffer later. Also, since there's an * and double check length of buffer later. Also, since there's an
* IOMMU we know that this DMA address is contiguous. * IOMMU we know that this DMA address is contiguous.
*/ */
if (!g->mm.bypass_smmu && if (nvgpu_iommuable(g) &&
nvgpu_sgt_iommuable(g, sgt) && nvgpu_sgt_iommuable(g, sgt) &&
nvgpu_sgt_get_dma(sgt, sgt->sgl)) nvgpu_sgt_get_dma(sgt, sgt->sgl))
return 1ULL << __ffs(nvgpu_sgt_get_dma(sgt, sgt->sgl)); return 1ULL << __ffs(nvgpu_sgt_get_dma(sgt, sgt->sgl));

View File

@@ -144,7 +144,6 @@ struct mm_gk20a {
bool use_full_comp_tag_line; bool use_full_comp_tag_line;
bool ltc_enabled_current; bool ltc_enabled_current;
bool ltc_enabled_target; bool ltc_enabled_target;
bool bypass_smmu;
bool disable_bigpage; bool disable_bigpage;
bool has_physical_mode; bool has_physical_mode;