gpu: nvgpu: allocate from coherent pool

Maps memory coherently on devices that are connected to a coherent bus.

(1) Add code to be able to get the platform device node.
(2) Create a new flag to mark if the device is connected to a coherent bus
(3) Map memory coherently on coherent devices.

bug 2040331

Change-Id: Ide83a9261acdbbc6e9fef4fc5f38d6f9d0e5ab5b
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1633985
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
David Nieto
2018-01-04 11:09:57 -08:00
committed by mobile promotions
parent 82f253b7c1
commit 6dde9e67d9
7 changed files with 38 additions and 4 deletions

View File

@@ -24,6 +24,8 @@
#include <linux/pm_runtime.h>
#include <linux/reset.h>
#include <linux/platform/tegra/common.h>
#include <linux/pci.h>
#include <uapi/linux/nvgpu.h>
#include <dt-bindings/soc/gm20b-fuse.h>
#include <dt-bindings/soc/gp10b-fuse.h>
@@ -70,6 +72,23 @@
#define CREATE_TRACE_POINTS
#include <trace/events/gk20a.h>
struct device_node *nvgpu_get_node(struct gk20a *g)
{
struct device *dev = dev_from_gk20a(g);
if (dev_is_pci(dev)) {
struct pci_bus *bus = to_pci_dev(dev)->bus;
while (!pci_is_root_bus(bus))
bus = bus->parent;
return bus->bridge->parent->of_node;
}
return dev->of_node;
}
void gk20a_busy_noresume(struct gk20a *g)
{
pm_runtime_get_noresume(dev_from_gk20a(g));
@@ -1042,7 +1061,7 @@ static inline void set_gk20a(struct platform_device *pdev, struct gk20a *gk20a)
static int nvgpu_read_fuse_overrides(struct gk20a *g)
{
struct device_node *np = dev_from_gk20a(g)->of_node;
struct device_node *np = nvgpu_get_node(g);
u32 *fuses;
int count, i;

View File

@@ -28,6 +28,7 @@ void gk20a_driver_start_unload(struct gk20a *g);
int nvgpu_quiesce(struct gk20a *g);
int nvgpu_remove(struct device *dev, struct class *class);
void nvgpu_free_irq(struct gk20a *g);
struct device_node *nvgpu_get_node(struct gk20a *g);
extern struct class nvgpu_class;

View File

@@ -61,6 +61,8 @@ u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
{
void *cpu_va;
pgprot_t prot = nvgpu_is_enabled(g, NVGPU_DMA_COHERENT) ? PAGE_KERNEL :
pgprot_writecombine(PAGE_KERNEL);
if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin)
return 0;
@@ -80,7 +82,7 @@ int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
cpu_va = vmap(mem->priv.pages,
PAGE_ALIGN(mem->size) >> PAGE_SHIFT,
0, pgprot_writecombine(PAGE_KERNEL));
0, prot);
if (WARN_ON(!cpu_va))
return -ENOMEM;

View File

@@ -24,10 +24,11 @@
#include "gk20a/gk20a.h"
#include "os_linux.h"
#include "module.h"
int nvgpu_get_nvhost_dev(struct gk20a *g)
{
struct device_node *np = dev_from_gk20a(g)->of_node;
struct device_node *np = nvgpu_get_node(g);
struct platform_device *host1x_pdev = NULL;
const __be32 *host1x_ptr;

View File

@@ -21,6 +21,8 @@
#include <nvgpu/nvgpu_common.h>
#include <nvgpu/kmem.h>
#include <nvgpu/enabled.h>
#include <linux/of_platform.h>
#include <linux/of_address.h>
#include "gk20a/gk20a.h"
#include "clk/clk.h"
@@ -525,6 +527,7 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
struct gk20a *g;
int err;
char nodefmt[64];
struct device_node *np;
/* make sure driver_data is a sane index */
if (pent->driver_data >= sizeof(nvgpu_pci_device) /
@@ -632,6 +635,11 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
g->mm.has_physical_mode = false;
np = nvgpu_get_node(g);
if (of_dma_is_coherent(np))
__nvgpu_set_enabled(g, NVGPU_DMA_COHERENT, true);
return 0;
}

View File

@@ -540,7 +540,7 @@ static const struct gpu_ops gv100_ops = {
.vm_bind_channel = gk20a_vm_bind_channel,
.fb_flush = gk20a_mm_fb_flush,
.l2_invalidate = gk20a_mm_l2_invalidate,
.l2_flush = gk20a_mm_l2_flush,
.l2_flush = gv11b_mm_l2_flush,
.cbc_clean = gk20a_mm_cbc_clean,
.set_big_page_size = gm20b_mm_set_big_page_size,
.get_big_page_sizes = gm20b_mm_get_big_page_sizes,

View File

@@ -75,6 +75,9 @@ struct gk20a;
#define NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL 24
/* Support batch mapping */
#define NVGPU_SUPPORT_MAP_BUFFER_BATCH 25
/* Support DMA coherence */
#define NVGPU_DMA_COHERENT 26
/*
* Host flags