gpu: nvgpu: userd allocation from sysmem

When bar1 memory is not supported then userd will be allocated from sysmem. Functions gp_get and gp_put are updated accordingly. JIRA GV11B-1 Change-Id: Ia895712a110f6cca26474228141488f5f8ace756 Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: http://git-master/r/1225384 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2016-07-27 17:06:36 -07:00
parent bb5fd16c67
commit fda4ddfa79
7 changed files with 84 additions and 28 deletions
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1541,12 +1541,25 @@ clean_up:
 	return err;
 }

+u32 gk20a_userd_gp_get(struct gk20a *g, struct channel_gk20a *c)
+{
+	return gk20a_bar1_readl(g,
+		c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
+}
+
+void gk20a_userd_gp_put(struct gk20a *g, struct channel_gk20a *c)
+{
+	gk20a_bar1_writel(g,
+		c->userd_gpu_va + sizeof(u32) * ram_userd_gp_put_w(),
+		c->gpfifo.put);
+}
+
 /* Update with this periodically to determine how the gpfifo is draining. */
 static inline u32 update_gp_get(struct gk20a *g,
 				struct channel_gk20a *c)
 {
-	u32 new_get = gk20a_bar1_readl(g,
-		c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
+	u32 new_get = g->ops.fifo.userd_gp_get(g, c);
+
 	if (new_get < c->gpfifo.get)
 		c->gpfifo.wrap = !c->gpfifo.wrap;
 	c->gpfifo.get = new_get;
@@ -2360,9 +2373,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 				wait_cmd, incr_cmd,
 				skip_buffer_refcounting);

-	gk20a_bar1_writel(g,
-		c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
-		c->gpfifo.put);
+	g->ops.fifo.userd_gp_put(g, c);

 	trace_gk20a_channel_submitted_gpfifo(dev_name(c->g->dev),
 				c->hw_chid,
@@ -2988,6 +2999,8 @@ void gk20a_init_channel(struct gpu_ops *gops)
 	gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
 	gops->fifo.channel_set_priority = gk20a_channel_set_priority;
 	gops->fifo.channel_set_timeslice = gk20a_channel_set_timeslice;
+	gops->fifo.userd_gp_get = gk20a_userd_gp_get;
+	gops->fifo.userd_gp_put = gk20a_userd_gp_put;
 }

 long gk20a_channel_ioctl(struct file *filp,
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -491,7 +491,10 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f)

 	vfree(f->channel);
 	vfree(f->tsg);
-	gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd);
+	if (g->ops.mm.is_bar1_supported(g))
+		gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd);
+	else
+		gk20a_gmmu_free(g, &f->userd);

 	gk20a_fifo_delete_runlist(f);

@@ -797,16 +800,6 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)

 	f->userd_entry_size = 1 << ram_userd_base_shift_v();

-	err = gk20a_gmmu_alloc_map_sys(&g->mm.bar1.vm,
-				   f->userd_entry_size * f->num_channels,
-				   &f->userd);
-	if (err) {
-		dev_err(d, "memory allocation failed\n");
-		goto clean_up;
-	}
-
-	gk20a_dbg(gpu_dbg_map_v, "userd bar1 va = 0x%llx", f->userd.gpu_va);
-
 	f->channel = vzalloc(f->num_channels * sizeof(*f->channel));
 	f->tsg = vzalloc(f->num_channels * sizeof(*f->tsg));
 	f->pbdma_map = kzalloc(f->num_pbdma * sizeof(*f->pbdma_map),
@@ -834,13 +827,26 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
 	INIT_LIST_HEAD(&f->free_chs);
 	mutex_init(&f->free_chs_mutex);

+	if (g->ops.mm.is_bar1_supported(g))
+		err = gk20a_gmmu_alloc_map_sys(&g->mm.bar1.vm,
+				   f->userd_entry_size * f->num_channels,
+				   &f->userd);
+
+	else
+		err = gk20a_gmmu_alloc_sys(g, f->userd_entry_size *
+				f->num_channels, &f->userd);
+	if (err) {
+		dev_err(d, "userd memory allocation failed\n");
+		goto clean_up;
+	}
+	gk20a_dbg(gpu_dbg_map, "userd gpu va = 0x%llx", f->userd.gpu_va);
+
 	for (chid = 0; chid < f->num_channels; chid++) {
 		f->channel[chid].userd_iova =
 			g->ops.mm.get_iova_addr(g, f->userd.sgt->sgl, 0)
-				+ chid * f->userd_entry_size;
+			+ chid * f->userd_entry_size;
 		f->channel[chid].userd_gpu_va =
 			f->userd.gpu_va + chid * f->userd_entry_size;
-
 		gk20a_init_channel_support(g, chid);
 		gk20a_init_tsg_support(g, chid);
 	}
@@ -858,7 +864,10 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)

 clean_up:
 	gk20a_dbg_fn("fail");
-	gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd);
+	if (g->ops.mm.is_bar1_supported(g))
+		gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd);
+	else
+		gk20a_gmmu_free(g, &f->userd);

 	vfree(f->channel);
 	f->channel = NULL;
@@ -884,7 +893,7 @@ static void gk20a_fifo_handle_runlist_event(struct gk20a *g)
 	gk20a_writel(g, fifo_intr_runlist_r(), runlist_event);
 }

-static int gk20a_init_fifo_setup_hw(struct gk20a *g)
+int gk20a_init_fifo_setup_hw(struct gk20a *g)
 {
 	struct fifo_gk20a *f = &g->fifo;

@@ -952,7 +961,8 @@ int gk20a_init_fifo_support(struct gk20a *g)
 	if (err)
 		return err;

-	err = gk20a_init_fifo_setup_hw(g);
+	if (g->ops.fifo.init_fifo_setup_hw)
+		err = g->ops.fifo.init_fifo_setup_hw(g);
 	if (err)
 		return err;

@@ -3015,7 +3025,8 @@ int gk20a_fifo_suspend(struct gk20a *g)
 	gk20a_dbg_fn("");

 	/* stop bar1 snooping */
-	gk20a_writel(g, fifo_bar1_base_r(),
+	if (g->ops.mm.is_bar1_supported(g))
+		gk20a_writel(g, fifo_bar1_base_r(),
 			fifo_bar1_base_valid_false_f());

 	/* disable fifo intr */
@@ -3246,6 +3257,7 @@ void gk20a_fifo_debugfs_init(struct device *dev)
 void gk20a_init_fifo(struct gpu_ops *gops)
 {
 	gk20a_init_channel(gops);
+	gops->fifo.init_fifo_setup_hw = gk20a_init_fifo_setup_hw;
 	gops->fifo.preempt_channel = gk20a_fifo_preempt_channel;
 	gops->fifo.preempt_tsg = gk20a_fifo_preempt_tsg;
 	gops->fifo.update_runlist = gk20a_fifo_update_runlist;
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -167,6 +167,8 @@ static inline const char *gk20a_fifo_interleave_level_name(u32 interleave_level)

 int gk20a_init_fifo_support(struct gk20a *g);

+int gk20a_init_fifo_setup_hw(struct gk20a *g);
+
 void gk20a_fifo_isr(struct gk20a *g);
 void gk20a_fifo_nonstall_isr(struct gk20a *g);

@@ -263,4 +265,8 @@ int gk20a_fifo_init_engine_info(struct fifo_gk20a *f);
 void gk20a_get_tsg_runlist_entry(struct tsg_gk20a *tsg, u32 *runlist);
 void gk20a_get_ch_runlist_entry(struct channel_gk20a *ch, u32 *runlist);

+u32 gk20a_userd_gp_get(struct gk20a *g, struct channel_gk20a *c);
+void gk20a_userd_gp_put(struct gk20a *g, struct channel_gk20a *c);
+
+
 #endif /*__GR_GK20A_H__*/
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -342,6 +342,7 @@ struct gpu_ops {
 		void (*pg_gr_load_gating_prod)(struct gk20a *g, bool prod);
 	} clock_gating;
 	struct {
+		int (*init_fifo_setup_hw)(struct gk20a *g);
 		void (*bind_channel)(struct channel_gk20a *ch_gk20a);
 		void (*unbind_channel)(struct channel_gk20a *ch_gk20a);
 		void (*disable_channel)(struct channel_gk20a *ch);
@@ -386,6 +387,8 @@ struct gpu_ops {
 					u32 *runlist);
 		void (*get_ch_runlist_entry)(struct channel_gk20a *ch,
 					u32 *runlist);
+		u32 (*userd_gp_get)(struct gk20a *g, struct channel_gk20a *ch);
+		void (*userd_gp_put)(struct gk20a *g, struct channel_gk20a *ch);
 	} fifo;
 	struct pmu_v {
 		/*used for change of enum zbc update cmd id from ver 0 to ver1*/
@@ -552,6 +555,7 @@ struct gpu_ops {
 		u32 (*get_big_page_sizes)(void);
 		u32 (*get_physical_addr_bits)(struct gk20a *g);
 		int (*init_mm_setup_hw)(struct gk20a *g);
+		bool (*is_bar1_supported)(struct gk20a *g);
 		int (*init_bar2_vm)(struct gk20a *g);
 		int (*init_bar2_mm_hw_setup)(struct gk20a *g);
 		void (*remove_bar2_vm)(struct gk20a *g);
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -797,7 +797,10 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm)

 	if (g->ops.mm.remove_bar2_vm)
 		g->ops.mm.remove_bar2_vm(g);
-	gk20a_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block);
+
+	if (g->ops.mm.is_bar1_supported(g))
+		gk20a_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block);
+
 	gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block);
 	gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block);
 	gk20a_vm_remove_support_nofree(&mm->cde.vm);
@@ -1001,10 +1004,11 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
 	if (err)
 		return err;

-	err = gk20a_init_bar1_vm(mm);
-	if (err)
-		return err;
-
+	if (g->ops.mm.is_bar1_supported(g)) {
+		err = gk20a_init_bar1_vm(mm);
+		if (err)
+			return err;
+	}
 	if (g->ops.mm.init_bar2_vm) {
 		err = g->ops.mm.init_bar2_vm(g);
 		if (err)
@@ -1055,7 +1059,8 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
 		     g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.sgt->sgl, 0)
 		     >> 8);

-	g->ops.mm.bar1_bind(g, &mm->bar1.inst_block);
+	if (g->ops.mm.bar1_bind)
+		g->ops.mm.bar1_bind(g, &mm->bar1.inst_block);

 	if (g->ops.mm.init_bar2_mm_hw_setup) {
 		err = g->ops.mm.init_bar2_mm_hw_setup(g);
@@ -5249,6 +5254,11 @@ clean_up:
 	return err;
 }

+static bool gk20a_mm_is_bar1_supported(struct gk20a *g)
+{
+	return true;
+}
+
 #ifdef CONFIG_DEBUG_FS
 void gk20a_mm_debugfs_init(struct device *dev)
 {
@@ -5284,4 +5294,5 @@ void gk20a_init_mm(struct gpu_ops *gops)
 	gops->mm.init_pdb = gk20a_mm_init_pdb;
 	gops->mm.init_mm_setup_hw = gk20a_init_mm_setup_hw;
 	gops->mm.bar1_bind = gk20a_mm_bar1_bind;
+	gops->mm.is_bar1_supported = gk20a_mm_is_bar1_supported;
 }
--- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
@@ -15,6 +15,7 @@

 #include <linux/delay.h>
 #include "gk20a/gk20a.h"
+#include "gk20a/fifo_gk20a.h"
 #include "fifo_gm20b.h"
 #include "hw_ccsr_gm20b.h"
 #include "hw_ram_gm20b.h"
@@ -133,6 +134,7 @@ static void gm20b_device_info_data_parse(struct gk20a *g,
 }
 void gm20b_init_fifo(struct gpu_ops *gops)
 {
+	gops->fifo.init_fifo_setup_hw = gk20a_init_fifo_setup_hw;
 	gops->fifo.bind_channel = channel_gm20b_bind;
 	gops->fifo.unbind_channel = channel_gk20a_unbind;
 	gops->fifo.disable_channel = channel_gk20a_disable;
@@ -142,6 +144,8 @@ void gm20b_init_fifo(struct gpu_ops *gops)
 	gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
 	gops->fifo.channel_set_priority = gk20a_channel_set_priority;
 	gops->fifo.channel_set_timeslice = gk20a_channel_set_timeslice;
+	gops->fifo.userd_gp_get = gk20a_userd_gp_get;
+	gops->fifo.userd_gp_put = gk20a_userd_gp_put;

 	gops->fifo.preempt_channel = gk20a_fifo_preempt_channel;
 	gops->fifo.preempt_tsg = gk20a_fifo_preempt_tsg;
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -163,6 +163,11 @@ static int gm20b_mm_bar1_bind(struct gk20a *g, struct mem_desc *bar1_inst)
 	return retry ? -EINVAL : 0;
 }

+static bool gm20b_mm_is_bar1_supported(struct gk20a *g)
+{
+	return true;
+}
+
 void gm20b_init_mm(struct gpu_ops *gops)
 {
 	gops->mm.support_sparse = gm20b_mm_support_sparse;
@@ -186,4 +191,5 @@ void gm20b_init_mm(struct gpu_ops *gops)
 	gops->mm.init_pdb = gk20a_mm_init_pdb;
 	gops->mm.init_mm_setup_hw = gk20a_init_mm_setup_hw;
 	gops->mm.bar1_bind = gm20b_mm_bar1_bind;
+	gops->mm.is_bar1_supported = gm20b_mm_is_bar1_supported;
 }