gpu: nvgpu: split vidmem_is_vidmem

As the vidmem_is_vidmem flag has got two separate meanings in one bit,
split it in two bits into the enabled() API:

Add NVGPU_MM_HONORS_APERTURE bit, which is the same as vidmem_is_vidmem
with its original meaning, and use it to test which aperture bits to
write to hardware.

Add NVGPU_MM_UNIFIED_MEMORY bit, which has the opposite meaning: that
the GPU shares the SoC memory. When this flag is false, the GPU has its
own local video memory.

Jira NVGPU-86

Change-Id: I2d0bed3b1ede5a712be99323d3035b154bb23c3a
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1496080
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Konsta Holtta
2017-06-05 17:40:44 +03:00
committed by mobile promotions
parent 26487b82df
commit 80197d2c9d
11 changed files with 33 additions and 25 deletions

View File

@@ -21,6 +21,7 @@
#include <nvgpu/lock.h> #include <nvgpu/lock.h>
#include <nvgpu/bug.h> #include <nvgpu/bug.h>
#include <nvgpu/gmmu.h> #include <nvgpu/gmmu.h>
#include <nvgpu/enabled.h>
#include <nvgpu/linux/dma.h> #include <nvgpu/linux/dma.h>
@@ -69,7 +70,7 @@ int nvgpu_dma_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
int nvgpu_dma_alloc_flags(struct gk20a *g, unsigned long flags, size_t size, int nvgpu_dma_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
struct nvgpu_mem *mem) struct nvgpu_mem *mem)
{ {
if (g->mm.vidmem_is_vidmem) { if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
/* /*
* Force the no-kernel-mapping flag on because we don't support * Force the no-kernel-mapping flag on because we don't support
* the lack of it for vidmem - the user should not care when * the lack of it for vidmem - the user should not care when
@@ -251,7 +252,7 @@ int nvgpu_dma_alloc_map(struct vm_gk20a *vm, size_t size,
int nvgpu_dma_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags, int nvgpu_dma_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags,
size_t size, struct nvgpu_mem *mem) size_t size, struct nvgpu_mem *mem)
{ {
if (vm->mm->vidmem_is_vidmem) { if (!nvgpu_is_enabled(gk20a_from_vm(vm), NVGPU_MM_UNIFIED_MEMORY)) {
/* /*
* Force the no-kernel-mapping flag on because we don't support * Force the no-kernel-mapping flag on because we don't support
* the lack of it for vidmem - the user should not care when * the lack of it for vidmem - the user should not care when

View File

@@ -133,8 +133,10 @@ static void nvgpu_init_mm_vars(struct gk20a *g)
g->mm.bypass_smmu = platform->bypass_smmu; g->mm.bypass_smmu = platform->bypass_smmu;
g->mm.disable_bigpage = platform->disable_bigpage; g->mm.disable_bigpage = platform->disable_bigpage;
g->mm.vidmem_is_vidmem = platform->vidmem_is_vidmem; __nvgpu_set_enabled(g, NVGPU_MM_HONORS_APERTURE,
platform->honors_aperture);
__nvgpu_set_enabled(g, NVGPU_MM_UNIFIED_MEMORY,
platform->unified_memory);
__nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES, __nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES,
platform->unify_address_spaces); platform->unify_address_spaces);

View File

@@ -19,6 +19,7 @@
#include <nvgpu/page_allocator.h> #include <nvgpu/page_allocator.h>
#include <nvgpu/log.h> #include <nvgpu/log.h>
#include <nvgpu/bug.h> #include <nvgpu/bug.h>
#include <nvgpu/enabled.h>
#include <nvgpu/linux/dma.h> #include <nvgpu/linux/dma.h>
@@ -30,8 +31,9 @@ u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
{ {
switch (aperture) { switch (aperture) {
case APERTURE_SYSMEM: case APERTURE_SYSMEM:
/* sysmem for dgpus; some igpus consider system memory vidmem */ /* some igpus consider system memory vidmem */
return g->mm.vidmem_is_vidmem ? sysmem_mask : vidmem_mask; return nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE)
? sysmem_mask : vidmem_mask;
case APERTURE_VIDMEM: case APERTURE_VIDMEM:
/* for dgpus only */ /* for dgpus only */
return vidmem_mask; return vidmem_mask;

View File

@@ -86,7 +86,7 @@ static struct gk20a_platform nvgpu_pci_device[] = {
.ch_wdt_timeout_ms = 7000, .ch_wdt_timeout_ms = 7000,
.vidmem_is_vidmem = true, .honors_aperture = true,
.vbios_min_version = 0x86063000, .vbios_min_version = 0x86063000,
.hardcode_sw_threshold = true, .hardcode_sw_threshold = true,
.ina3221_dcb_index = 0, .ina3221_dcb_index = 0,
@@ -121,7 +121,7 @@ static struct gk20a_platform nvgpu_pci_device[] = {
.ch_wdt_timeout_ms = 7000, .ch_wdt_timeout_ms = 7000,
.vidmem_is_vidmem = true, .honors_aperture = true,
.vbios_min_version = 0x86062d00, .vbios_min_version = 0x86062d00,
.hardcode_sw_threshold = true, .hardcode_sw_threshold = true,
.ina3221_dcb_index = 0, .ina3221_dcb_index = 0,
@@ -156,7 +156,7 @@ static struct gk20a_platform nvgpu_pci_device[] = {
.ch_wdt_timeout_ms = 7000, .ch_wdt_timeout_ms = 7000,
.vidmem_is_vidmem = true, .honors_aperture = true,
.vbios_min_version = 0x86063000, .vbios_min_version = 0x86063000,
.hardcode_sw_threshold = true, .hardcode_sw_threshold = true,
.ina3221_dcb_index = 0, .ina3221_dcb_index = 0,
@@ -191,7 +191,7 @@ static struct gk20a_platform nvgpu_pci_device[] = {
.ch_wdt_timeout_ms = 7000, .ch_wdt_timeout_ms = 7000,
.vidmem_is_vidmem = true, .honors_aperture = true,
.vbios_min_version = 0x86065600, .vbios_min_version = 0x86065600,
.hardcode_sw_threshold = false, .hardcode_sw_threshold = false,
.ina3221_dcb_index = 1, .ina3221_dcb_index = 1,

View File

@@ -665,7 +665,8 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
* this requires fixed allocations in vidmem which must be * this requires fixed allocations in vidmem which must be
* allocated before all other buffers * allocated before all other buffers
*/ */
if (g->ops.pmu.alloc_blob_space && g->mm.vidmem_is_vidmem) { if (g->ops.pmu.alloc_blob_space
&& !nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
err = g->ops.pmu.alloc_blob_space(g, 0, &g->acr.ucode_blob); err = g->ops.pmu.alloc_blob_space(g, 0, &g->acr.ucode_blob);
if (err) if (err)
return err; return err;
@@ -1234,10 +1235,12 @@ enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g,
struct dma_buf *dmabuf) struct dma_buf *dmabuf)
{ {
struct gk20a *buf_owner = gk20a_vidmem_buf_owner(dmabuf); struct gk20a *buf_owner = gk20a_vidmem_buf_owner(dmabuf);
bool unified_memory = nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY);
if (buf_owner == NULL) { if (buf_owner == NULL) {
/* Not nvgpu-allocated, assume system memory */ /* Not nvgpu-allocated, assume system memory */
return APERTURE_SYSMEM; return APERTURE_SYSMEM;
} else if (WARN_ON(buf_owner == g && !g->mm.vidmem_is_vidmem)) { } else if (WARN_ON(buf_owner == g && unified_memory)) {
/* Looks like our video memory, but this gpu doesn't support /* Looks like our video memory, but this gpu doesn't support
* it. Warn about a bug and bail out */ * it. Warn about a bug and bail out */
nvgpu_warn(g, nvgpu_warn(g,

View File

@@ -256,8 +256,6 @@ struct mm_gk20a {
bool disable_bigpage; bool disable_bigpage;
#endif #endif
bool has_physical_mode; bool has_physical_mode;
/* false if vidmem aperture actually points to sysmem */
bool vidmem_is_vidmem;
struct nvgpu_mem sysmem_flush; struct nvgpu_mem sysmem_flush;

View File

@@ -215,8 +215,10 @@ struct gk20a_platform {
/* soc name for finding firmware files */ /* soc name for finding firmware files */
const char *soc_name; const char *soc_name;
/* if vidmem aperture actually points to vidmem*/ /* false if vidmem aperture actually points to sysmem */
bool vidmem_is_vidmem; bool honors_aperture;
/* unified or split memory with separate vidmem? */
bool unified_memory;
/* minimum supported VBIOS version */ /* minimum supported VBIOS version */
u32 vbios_min_version; u32 vbios_min_version;

View File

@@ -68,8 +68,8 @@ int channel_gp10b_commit_userd(struct channel_gk20a *c)
nvgpu_mem_wr32(g, &c->inst_block, nvgpu_mem_wr32(g, &c->inst_block,
ram_in_ramfc_w() + ram_fc_userd_w(), ram_in_ramfc_w() + ram_fc_userd_w(),
(g->mm.vidmem_is_vidmem ? nvgpu_aperture_mask(g, &g->fifo.userd,
pbdma_userd_target_sys_mem_ncoh_f() : pbdma_userd_target_sys_mem_ncoh_f(),
pbdma_userd_target_vid_mem_f()) | pbdma_userd_target_vid_mem_f()) |
pbdma_userd_addr_f(addr_lo)); pbdma_userd_addr_f(addr_lo));

View File

@@ -32,6 +32,10 @@ struct gk20a;
* MM flags. * MM flags.
*/ */
#define NVGPU_MM_UNIFY_ADDRESS_SPACES 16 #define NVGPU_MM_UNIFY_ADDRESS_SPACES 16
/* false if vidmem aperture actually points to sysmem */
#define NVGPU_MM_HONORS_APERTURE 17
/* unified or split memory with separate vidmem? */
#define NVGPU_MM_UNIFIED_MEMORY 18
/* /*
* Must be greater than the largest bit offset in the above list. * Must be greater than the largest bit offset in the above list.

View File

@@ -934,8 +934,6 @@ static int gk20a_tegra_probe(struct device *dev)
if (tegra_get_chip_id() == TEGRA132) if (tegra_get_chip_id() == TEGRA132)
platform->soc_name = "tegra13x"; platform->soc_name = "tegra13x";
platform->g->mm.vidmem_is_vidmem = platform->vidmem_is_vidmem;
gk20a_tegra_get_clocks(dev); gk20a_tegra_get_clocks(dev);
nvgpu_linux_init_clk_support(platform->g); nvgpu_linux_init_clk_support(platform->g);
gk20a_tegra_init_secure_alloc(platform->g); gk20a_tegra_init_secure_alloc(platform->g);
@@ -1051,7 +1049,7 @@ struct gk20a_platform gk20a_tegra_platform = {
.soc_name = "tegra12x", .soc_name = "tegra12x",
.vidmem_is_vidmem = false, .unified_memory = true,
}; };
struct gk20a_platform gm20b_tegra_platform = { struct gk20a_platform gm20b_tegra_platform = {
@@ -1123,5 +1121,5 @@ struct gk20a_platform gm20b_tegra_platform = {
.soc_name = "tegra21x", .soc_name = "tegra21x",
.vidmem_is_vidmem = false, .unified_memory = true,
}; };

View File

@@ -161,8 +161,6 @@ static int gp10b_tegra_probe(struct device *dev)
platform->g->gr.t18x.ctx_vars.force_preemption_gfxp = false; platform->g->gr.t18x.ctx_vars.force_preemption_gfxp = false;
platform->g->gr.t18x.ctx_vars.force_preemption_cilp = false; platform->g->gr.t18x.ctx_vars.force_preemption_cilp = false;
platform->g->mm.vidmem_is_vidmem = platform->vidmem_is_vidmem;
gp10b_tegra_get_clocks(dev); gp10b_tegra_get_clocks(dev);
nvgpu_linux_init_clk_support(platform->g); nvgpu_linux_init_clk_support(platform->g);
gk20a_tegra_init_secure_alloc(platform->g); gk20a_tegra_init_secure_alloc(platform->g);
@@ -436,7 +434,7 @@ struct gk20a_platform gp10b_tegra_platform = {
.soc_name = "tegra18x", .soc_name = "tegra18x",
.vidmem_is_vidmem = false, .unified_memory = true,
}; };