gpu: nvgpu: userd allocation from sysmem

When bar1 memory is not supported then userd will be
allocated from sysmem.

Functions gp_get and gp_put are updated accordingly.

JIRA GV11B-1

Change-Id: Ia895712a110f6cca26474228141488f5f8ace756
Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/1225384
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
seshendra Gadagottu
2016-07-27 17:06:36 -07:00
committed by mobile promotions
parent bb5fd16c67
commit fda4ddfa79
7 changed files with 84 additions and 28 deletions

View File

@@ -1541,12 +1541,25 @@ clean_up:
return err; return err;
} }
u32 gk20a_userd_gp_get(struct gk20a *g, struct channel_gk20a *c)
{
return gk20a_bar1_readl(g,
c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
}
void gk20a_userd_gp_put(struct gk20a *g, struct channel_gk20a *c)
{
gk20a_bar1_writel(g,
c->userd_gpu_va + sizeof(u32) * ram_userd_gp_put_w(),
c->gpfifo.put);
}
/* Update with this periodically to determine how the gpfifo is draining. */ /* Update with this periodically to determine how the gpfifo is draining. */
static inline u32 update_gp_get(struct gk20a *g, static inline u32 update_gp_get(struct gk20a *g,
struct channel_gk20a *c) struct channel_gk20a *c)
{ {
u32 new_get = gk20a_bar1_readl(g, u32 new_get = g->ops.fifo.userd_gp_get(g, c);
c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
if (new_get < c->gpfifo.get) if (new_get < c->gpfifo.get)
c->gpfifo.wrap = !c->gpfifo.wrap; c->gpfifo.wrap = !c->gpfifo.wrap;
c->gpfifo.get = new_get; c->gpfifo.get = new_get;
@@ -2360,9 +2373,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
wait_cmd, incr_cmd, wait_cmd, incr_cmd,
skip_buffer_refcounting); skip_buffer_refcounting);
gk20a_bar1_writel(g, g->ops.fifo.userd_gp_put(g, c);
c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
c->gpfifo.put);
trace_gk20a_channel_submitted_gpfifo(dev_name(c->g->dev), trace_gk20a_channel_submitted_gpfifo(dev_name(c->g->dev),
c->hw_chid, c->hw_chid,
@@ -2988,6 +2999,8 @@ void gk20a_init_channel(struct gpu_ops *gops)
gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc; gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
gops->fifo.channel_set_priority = gk20a_channel_set_priority; gops->fifo.channel_set_priority = gk20a_channel_set_priority;
gops->fifo.channel_set_timeslice = gk20a_channel_set_timeslice; gops->fifo.channel_set_timeslice = gk20a_channel_set_timeslice;
gops->fifo.userd_gp_get = gk20a_userd_gp_get;
gops->fifo.userd_gp_put = gk20a_userd_gp_put;
} }
long gk20a_channel_ioctl(struct file *filp, long gk20a_channel_ioctl(struct file *filp,

View File

@@ -491,7 +491,10 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
vfree(f->channel); vfree(f->channel);
vfree(f->tsg); vfree(f->tsg);
gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd); if (g->ops.mm.is_bar1_supported(g))
gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd);
else
gk20a_gmmu_free(g, &f->userd);
gk20a_fifo_delete_runlist(f); gk20a_fifo_delete_runlist(f);
@@ -797,16 +800,6 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
f->userd_entry_size = 1 << ram_userd_base_shift_v(); f->userd_entry_size = 1 << ram_userd_base_shift_v();
err = gk20a_gmmu_alloc_map_sys(&g->mm.bar1.vm,
f->userd_entry_size * f->num_channels,
&f->userd);
if (err) {
dev_err(d, "memory allocation failed\n");
goto clean_up;
}
gk20a_dbg(gpu_dbg_map_v, "userd bar1 va = 0x%llx", f->userd.gpu_va);
f->channel = vzalloc(f->num_channels * sizeof(*f->channel)); f->channel = vzalloc(f->num_channels * sizeof(*f->channel));
f->tsg = vzalloc(f->num_channels * sizeof(*f->tsg)); f->tsg = vzalloc(f->num_channels * sizeof(*f->tsg));
f->pbdma_map = kzalloc(f->num_pbdma * sizeof(*f->pbdma_map), f->pbdma_map = kzalloc(f->num_pbdma * sizeof(*f->pbdma_map),
@@ -834,13 +827,26 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
INIT_LIST_HEAD(&f->free_chs); INIT_LIST_HEAD(&f->free_chs);
mutex_init(&f->free_chs_mutex); mutex_init(&f->free_chs_mutex);
if (g->ops.mm.is_bar1_supported(g))
err = gk20a_gmmu_alloc_map_sys(&g->mm.bar1.vm,
f->userd_entry_size * f->num_channels,
&f->userd);
else
err = gk20a_gmmu_alloc_sys(g, f->userd_entry_size *
f->num_channels, &f->userd);
if (err) {
dev_err(d, "userd memory allocation failed\n");
goto clean_up;
}
gk20a_dbg(gpu_dbg_map, "userd gpu va = 0x%llx", f->userd.gpu_va);
for (chid = 0; chid < f->num_channels; chid++) { for (chid = 0; chid < f->num_channels; chid++) {
f->channel[chid].userd_iova = f->channel[chid].userd_iova =
g->ops.mm.get_iova_addr(g, f->userd.sgt->sgl, 0) g->ops.mm.get_iova_addr(g, f->userd.sgt->sgl, 0)
+ chid * f->userd_entry_size; + chid * f->userd_entry_size;
f->channel[chid].userd_gpu_va = f->channel[chid].userd_gpu_va =
f->userd.gpu_va + chid * f->userd_entry_size; f->userd.gpu_va + chid * f->userd_entry_size;
gk20a_init_channel_support(g, chid); gk20a_init_channel_support(g, chid);
gk20a_init_tsg_support(g, chid); gk20a_init_tsg_support(g, chid);
} }
@@ -858,7 +864,10 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
clean_up: clean_up:
gk20a_dbg_fn("fail"); gk20a_dbg_fn("fail");
gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd); if (g->ops.mm.is_bar1_supported(g))
gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd);
else
gk20a_gmmu_free(g, &f->userd);
vfree(f->channel); vfree(f->channel);
f->channel = NULL; f->channel = NULL;
@@ -884,7 +893,7 @@ static void gk20a_fifo_handle_runlist_event(struct gk20a *g)
gk20a_writel(g, fifo_intr_runlist_r(), runlist_event); gk20a_writel(g, fifo_intr_runlist_r(), runlist_event);
} }
static int gk20a_init_fifo_setup_hw(struct gk20a *g) int gk20a_init_fifo_setup_hw(struct gk20a *g)
{ {
struct fifo_gk20a *f = &g->fifo; struct fifo_gk20a *f = &g->fifo;
@@ -952,7 +961,8 @@ int gk20a_init_fifo_support(struct gk20a *g)
if (err) if (err)
return err; return err;
err = gk20a_init_fifo_setup_hw(g); if (g->ops.fifo.init_fifo_setup_hw)
err = g->ops.fifo.init_fifo_setup_hw(g);
if (err) if (err)
return err; return err;
@@ -3015,7 +3025,8 @@ int gk20a_fifo_suspend(struct gk20a *g)
gk20a_dbg_fn(""); gk20a_dbg_fn("");
/* stop bar1 snooping */ /* stop bar1 snooping */
gk20a_writel(g, fifo_bar1_base_r(), if (g->ops.mm.is_bar1_supported(g))
gk20a_writel(g, fifo_bar1_base_r(),
fifo_bar1_base_valid_false_f()); fifo_bar1_base_valid_false_f());
/* disable fifo intr */ /* disable fifo intr */
@@ -3246,6 +3257,7 @@ void gk20a_fifo_debugfs_init(struct device *dev)
void gk20a_init_fifo(struct gpu_ops *gops) void gk20a_init_fifo(struct gpu_ops *gops)
{ {
gk20a_init_channel(gops); gk20a_init_channel(gops);
gops->fifo.init_fifo_setup_hw = gk20a_init_fifo_setup_hw;
gops->fifo.preempt_channel = gk20a_fifo_preempt_channel; gops->fifo.preempt_channel = gk20a_fifo_preempt_channel;
gops->fifo.preempt_tsg = gk20a_fifo_preempt_tsg; gops->fifo.preempt_tsg = gk20a_fifo_preempt_tsg;
gops->fifo.update_runlist = gk20a_fifo_update_runlist; gops->fifo.update_runlist = gk20a_fifo_update_runlist;

View File

@@ -167,6 +167,8 @@ static inline const char *gk20a_fifo_interleave_level_name(u32 interleave_level)
int gk20a_init_fifo_support(struct gk20a *g); int gk20a_init_fifo_support(struct gk20a *g);
int gk20a_init_fifo_setup_hw(struct gk20a *g);
void gk20a_fifo_isr(struct gk20a *g); void gk20a_fifo_isr(struct gk20a *g);
void gk20a_fifo_nonstall_isr(struct gk20a *g); void gk20a_fifo_nonstall_isr(struct gk20a *g);
@@ -263,4 +265,8 @@ int gk20a_fifo_init_engine_info(struct fifo_gk20a *f);
void gk20a_get_tsg_runlist_entry(struct tsg_gk20a *tsg, u32 *runlist); void gk20a_get_tsg_runlist_entry(struct tsg_gk20a *tsg, u32 *runlist);
void gk20a_get_ch_runlist_entry(struct channel_gk20a *ch, u32 *runlist); void gk20a_get_ch_runlist_entry(struct channel_gk20a *ch, u32 *runlist);
u32 gk20a_userd_gp_get(struct gk20a *g, struct channel_gk20a *c);
void gk20a_userd_gp_put(struct gk20a *g, struct channel_gk20a *c);
#endif /*__GR_GK20A_H__*/ #endif /*__GR_GK20A_H__*/

View File

@@ -342,6 +342,7 @@ struct gpu_ops {
void (*pg_gr_load_gating_prod)(struct gk20a *g, bool prod); void (*pg_gr_load_gating_prod)(struct gk20a *g, bool prod);
} clock_gating; } clock_gating;
struct { struct {
int (*init_fifo_setup_hw)(struct gk20a *g);
void (*bind_channel)(struct channel_gk20a *ch_gk20a); void (*bind_channel)(struct channel_gk20a *ch_gk20a);
void (*unbind_channel)(struct channel_gk20a *ch_gk20a); void (*unbind_channel)(struct channel_gk20a *ch_gk20a);
void (*disable_channel)(struct channel_gk20a *ch); void (*disable_channel)(struct channel_gk20a *ch);
@@ -386,6 +387,8 @@ struct gpu_ops {
u32 *runlist); u32 *runlist);
void (*get_ch_runlist_entry)(struct channel_gk20a *ch, void (*get_ch_runlist_entry)(struct channel_gk20a *ch,
u32 *runlist); u32 *runlist);
u32 (*userd_gp_get)(struct gk20a *g, struct channel_gk20a *ch);
void (*userd_gp_put)(struct gk20a *g, struct channel_gk20a *ch);
} fifo; } fifo;
struct pmu_v { struct pmu_v {
/*used for change of enum zbc update cmd id from ver 0 to ver1*/ /*used for change of enum zbc update cmd id from ver 0 to ver1*/
@@ -552,6 +555,7 @@ struct gpu_ops {
u32 (*get_big_page_sizes)(void); u32 (*get_big_page_sizes)(void);
u32 (*get_physical_addr_bits)(struct gk20a *g); u32 (*get_physical_addr_bits)(struct gk20a *g);
int (*init_mm_setup_hw)(struct gk20a *g); int (*init_mm_setup_hw)(struct gk20a *g);
bool (*is_bar1_supported)(struct gk20a *g);
int (*init_bar2_vm)(struct gk20a *g); int (*init_bar2_vm)(struct gk20a *g);
int (*init_bar2_mm_hw_setup)(struct gk20a *g); int (*init_bar2_mm_hw_setup)(struct gk20a *g);
void (*remove_bar2_vm)(struct gk20a *g); void (*remove_bar2_vm)(struct gk20a *g);

View File

@@ -797,7 +797,10 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm)
if (g->ops.mm.remove_bar2_vm) if (g->ops.mm.remove_bar2_vm)
g->ops.mm.remove_bar2_vm(g); g->ops.mm.remove_bar2_vm(g);
gk20a_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block);
if (g->ops.mm.is_bar1_supported(g))
gk20a_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block);
gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block); gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block);
gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block); gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block);
gk20a_vm_remove_support_nofree(&mm->cde.vm); gk20a_vm_remove_support_nofree(&mm->cde.vm);
@@ -1001,10 +1004,11 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
if (err) if (err)
return err; return err;
err = gk20a_init_bar1_vm(mm); if (g->ops.mm.is_bar1_supported(g)) {
if (err) err = gk20a_init_bar1_vm(mm);
return err; if (err)
return err;
}
if (g->ops.mm.init_bar2_vm) { if (g->ops.mm.init_bar2_vm) {
err = g->ops.mm.init_bar2_vm(g); err = g->ops.mm.init_bar2_vm(g);
if (err) if (err)
@@ -1055,7 +1059,8 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.sgt->sgl, 0) g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.sgt->sgl, 0)
>> 8); >> 8);
g->ops.mm.bar1_bind(g, &mm->bar1.inst_block); if (g->ops.mm.bar1_bind)
g->ops.mm.bar1_bind(g, &mm->bar1.inst_block);
if (g->ops.mm.init_bar2_mm_hw_setup) { if (g->ops.mm.init_bar2_mm_hw_setup) {
err = g->ops.mm.init_bar2_mm_hw_setup(g); err = g->ops.mm.init_bar2_mm_hw_setup(g);
@@ -5249,6 +5254,11 @@ clean_up:
return err; return err;
} }
static bool gk20a_mm_is_bar1_supported(struct gk20a *g)
{
return true;
}
#ifdef CONFIG_DEBUG_FS #ifdef CONFIG_DEBUG_FS
void gk20a_mm_debugfs_init(struct device *dev) void gk20a_mm_debugfs_init(struct device *dev)
{ {
@@ -5284,4 +5294,5 @@ void gk20a_init_mm(struct gpu_ops *gops)
gops->mm.init_pdb = gk20a_mm_init_pdb; gops->mm.init_pdb = gk20a_mm_init_pdb;
gops->mm.init_mm_setup_hw = gk20a_init_mm_setup_hw; gops->mm.init_mm_setup_hw = gk20a_init_mm_setup_hw;
gops->mm.bar1_bind = gk20a_mm_bar1_bind; gops->mm.bar1_bind = gk20a_mm_bar1_bind;
gops->mm.is_bar1_supported = gk20a_mm_is_bar1_supported;
} }

View File

@@ -15,6 +15,7 @@
#include <linux/delay.h> #include <linux/delay.h>
#include "gk20a/gk20a.h" #include "gk20a/gk20a.h"
#include "gk20a/fifo_gk20a.h"
#include "fifo_gm20b.h" #include "fifo_gm20b.h"
#include "hw_ccsr_gm20b.h" #include "hw_ccsr_gm20b.h"
#include "hw_ram_gm20b.h" #include "hw_ram_gm20b.h"
@@ -133,6 +134,7 @@ static void gm20b_device_info_data_parse(struct gk20a *g,
} }
void gm20b_init_fifo(struct gpu_ops *gops) void gm20b_init_fifo(struct gpu_ops *gops)
{ {
gops->fifo.init_fifo_setup_hw = gk20a_init_fifo_setup_hw;
gops->fifo.bind_channel = channel_gm20b_bind; gops->fifo.bind_channel = channel_gm20b_bind;
gops->fifo.unbind_channel = channel_gk20a_unbind; gops->fifo.unbind_channel = channel_gk20a_unbind;
gops->fifo.disable_channel = channel_gk20a_disable; gops->fifo.disable_channel = channel_gk20a_disable;
@@ -142,6 +144,8 @@ void gm20b_init_fifo(struct gpu_ops *gops)
gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc; gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
gops->fifo.channel_set_priority = gk20a_channel_set_priority; gops->fifo.channel_set_priority = gk20a_channel_set_priority;
gops->fifo.channel_set_timeslice = gk20a_channel_set_timeslice; gops->fifo.channel_set_timeslice = gk20a_channel_set_timeslice;
gops->fifo.userd_gp_get = gk20a_userd_gp_get;
gops->fifo.userd_gp_put = gk20a_userd_gp_put;
gops->fifo.preempt_channel = gk20a_fifo_preempt_channel; gops->fifo.preempt_channel = gk20a_fifo_preempt_channel;
gops->fifo.preempt_tsg = gk20a_fifo_preempt_tsg; gops->fifo.preempt_tsg = gk20a_fifo_preempt_tsg;

View File

@@ -163,6 +163,11 @@ static int gm20b_mm_bar1_bind(struct gk20a *g, struct mem_desc *bar1_inst)
return retry ? -EINVAL : 0; return retry ? -EINVAL : 0;
} }
static bool gm20b_mm_is_bar1_supported(struct gk20a *g)
{
return true;
}
void gm20b_init_mm(struct gpu_ops *gops) void gm20b_init_mm(struct gpu_ops *gops)
{ {
gops->mm.support_sparse = gm20b_mm_support_sparse; gops->mm.support_sparse = gm20b_mm_support_sparse;
@@ -186,4 +191,5 @@ void gm20b_init_mm(struct gpu_ops *gops)
gops->mm.init_pdb = gk20a_mm_init_pdb; gops->mm.init_pdb = gk20a_mm_init_pdb;
gops->mm.init_mm_setup_hw = gk20a_init_mm_setup_hw; gops->mm.init_mm_setup_hw = gk20a_init_mm_setup_hw;
gops->mm.bar1_bind = gm20b_mm_bar1_bind; gops->mm.bar1_bind = gm20b_mm_bar1_bind;
gops->mm.is_bar1_supported = gm20b_mm_is_bar1_supported;
} }