gpu: nvgpu: allocate from coherent pool

Maps memory coherently on devices that are connected to a coherent bus. (1) Add code to be able to get the platform device node. (2) Create a new flag to mark if the device is connected to a coherent bus (3) Map memory coherently on coherent devices. bug 2040331 Change-Id: Ide83a9261acdbbc6e9fef4fc5f38d6f9d0e5ab5b Signed-off-by: David Nieto <dmartineznie@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1633985 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Alex Waterman <alexw@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-23 18:16:01 +03:00 · 2018-01-04 11:09:57 -08:00
parent 82f253b7c1
commit 6dde9e67d9
7 changed files with 38 additions and 4 deletions
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -24,6 +24,8 @@
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
 #include <linux/platform/tegra/common.h>
+#include <linux/pci.h>
+
 #include <uapi/linux/nvgpu.h>
 #include <dt-bindings/soc/gm20b-fuse.h>
 #include <dt-bindings/soc/gp10b-fuse.h>
@@ -70,6 +72,23 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/gk20a.h>

+
+struct device_node *nvgpu_get_node(struct gk20a *g)
+{
+	struct device *dev = dev_from_gk20a(g);
+
+	if (dev_is_pci(dev)) {
+		struct pci_bus *bus = to_pci_dev(dev)->bus;
+
+		while (!pci_is_root_bus(bus))
+			bus = bus->parent;
+
+		return bus->bridge->parent->of_node;
+	}
+
+	return dev->of_node;
+}
+
 void gk20a_busy_noresume(struct gk20a *g)
 {
 	pm_runtime_get_noresume(dev_from_gk20a(g));
@@ -1042,7 +1061,7 @@ static inline void set_gk20a(struct platform_device *pdev, struct gk20a *gk20a)

 static int nvgpu_read_fuse_overrides(struct gk20a *g)
 {
-	struct device_node *np = dev_from_gk20a(g)->of_node;
+	struct device_node *np = nvgpu_get_node(g);
 	u32 *fuses;
 	int count, i;

--- a/drivers/gpu/nvgpu/common/linux/module.h
+++ b/drivers/gpu/nvgpu/common/linux/module.h
@@ -28,6 +28,7 @@ void gk20a_driver_start_unload(struct gk20a *g);
 int nvgpu_quiesce(struct gk20a *g);
 int nvgpu_remove(struct device *dev, struct class *class);
 void nvgpu_free_irq(struct gk20a *g);
+struct device_node *nvgpu_get_node(struct gk20a *g);

 extern struct class nvgpu_class;

--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -61,6 +61,8 @@ u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
 int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
 {
 	void *cpu_va;
+	pgprot_t prot = nvgpu_is_enabled(g, NVGPU_DMA_COHERENT) ? PAGE_KERNEL :
+		pgprot_writecombine(PAGE_KERNEL);

 	if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin)
 		return 0;
@@ -80,7 +82,7 @@ int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)

 	cpu_va = vmap(mem->priv.pages,
 			PAGE_ALIGN(mem->size) >> PAGE_SHIFT,
-			0, pgprot_writecombine(PAGE_KERNEL));
+			0, prot);

 	if (WARN_ON(!cpu_va))
 		return -ENOMEM;
--- a/drivers/gpu/nvgpu/common/linux/nvhost.c
+++ b/drivers/gpu/nvgpu/common/linux/nvhost.c
@@ -24,10 +24,11 @@

 #include "gk20a/gk20a.h"
 #include "os_linux.h"
+#include "module.h"

 int nvgpu_get_nvhost_dev(struct gk20a *g)
 {
-	struct device_node *np = dev_from_gk20a(g)->of_node;
+	struct device_node *np = nvgpu_get_node(g);
 	struct platform_device *host1x_pdev = NULL;
 	const __be32 *host1x_ptr;

--- a/drivers/gpu/nvgpu/common/linux/pci.c
+++ b/drivers/gpu/nvgpu/common/linux/pci.c
@@ -21,6 +21,8 @@
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/kmem.h>
 #include <nvgpu/enabled.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>

 #include "gk20a/gk20a.h"
 #include "clk/clk.h"
@@ -525,6 +527,7 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
 	struct gk20a *g;
 	int err;
 	char nodefmt[64];
+	struct device_node *np;

 	/* make sure driver_data is a sane index */
 	if (pent->driver_data >= sizeof(nvgpu_pci_device) /
@@ -632,6 +635,11 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,

 	g->mm.has_physical_mode = false;

+	np = nvgpu_get_node(g);
+
+	if (of_dma_is_coherent(np))
+		__nvgpu_set_enabled(g, NVGPU_DMA_COHERENT, true);
+
 	return 0;
 }

--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -540,7 +540,7 @@ static const struct gpu_ops gv100_ops = {
 		.vm_bind_channel = gk20a_vm_bind_channel,
 		.fb_flush = gk20a_mm_fb_flush,
 		.l2_invalidate = gk20a_mm_l2_invalidate,
-		.l2_flush = gk20a_mm_l2_flush,
+		.l2_flush = gv11b_mm_l2_flush,
 		.cbc_clean = gk20a_mm_cbc_clean,
 		.set_big_page_size = gm20b_mm_set_big_page_size,
 		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
--- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
@@ -75,6 +75,9 @@ struct gk20a;
 #define NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL	24
 /* Support batch mapping */
 #define NVGPU_SUPPORT_MAP_BUFFER_BATCH		25
+/* Support DMA coherence */
+#define NVGPU_DMA_COHERENT			26
+

 /*
 * Host flags