mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: userd allocation from sysmem
When bar1 memory is not supported then userd will be allocated from sysmem. Functions gp_get and gp_put are updated accordingly. JIRA GV11B-1 Change-Id: Ia895712a110f6cca26474228141488f5f8ace756 Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: http://git-master/r/1225384 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
bb5fd16c67
commit
fda4ddfa79
@@ -1541,12 +1541,25 @@ clean_up:
|
||||
return err;
|
||||
}
|
||||
|
||||
u32 gk20a_userd_gp_get(struct gk20a *g, struct channel_gk20a *c)
|
||||
{
|
||||
return gk20a_bar1_readl(g,
|
||||
c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
|
||||
}
|
||||
|
||||
void gk20a_userd_gp_put(struct gk20a *g, struct channel_gk20a *c)
|
||||
{
|
||||
gk20a_bar1_writel(g,
|
||||
c->userd_gpu_va + sizeof(u32) * ram_userd_gp_put_w(),
|
||||
c->gpfifo.put);
|
||||
}
|
||||
|
||||
/* Update with this periodically to determine how the gpfifo is draining. */
|
||||
static inline u32 update_gp_get(struct gk20a *g,
|
||||
struct channel_gk20a *c)
|
||||
{
|
||||
u32 new_get = gk20a_bar1_readl(g,
|
||||
c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
|
||||
u32 new_get = g->ops.fifo.userd_gp_get(g, c);
|
||||
|
||||
if (new_get < c->gpfifo.get)
|
||||
c->gpfifo.wrap = !c->gpfifo.wrap;
|
||||
c->gpfifo.get = new_get;
|
||||
@@ -2360,9 +2373,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
||||
wait_cmd, incr_cmd,
|
||||
skip_buffer_refcounting);
|
||||
|
||||
gk20a_bar1_writel(g,
|
||||
c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
|
||||
c->gpfifo.put);
|
||||
g->ops.fifo.userd_gp_put(g, c);
|
||||
|
||||
trace_gk20a_channel_submitted_gpfifo(dev_name(c->g->dev),
|
||||
c->hw_chid,
|
||||
@@ -2988,6 +2999,8 @@ void gk20a_init_channel(struct gpu_ops *gops)
|
||||
gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
|
||||
gops->fifo.channel_set_priority = gk20a_channel_set_priority;
|
||||
gops->fifo.channel_set_timeslice = gk20a_channel_set_timeslice;
|
||||
gops->fifo.userd_gp_get = gk20a_userd_gp_get;
|
||||
gops->fifo.userd_gp_put = gk20a_userd_gp_put;
|
||||
}
|
||||
|
||||
long gk20a_channel_ioctl(struct file *filp,
|
||||
|
||||
@@ -491,7 +491,10 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
|
||||
|
||||
vfree(f->channel);
|
||||
vfree(f->tsg);
|
||||
gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd);
|
||||
if (g->ops.mm.is_bar1_supported(g))
|
||||
gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd);
|
||||
else
|
||||
gk20a_gmmu_free(g, &f->userd);
|
||||
|
||||
gk20a_fifo_delete_runlist(f);
|
||||
|
||||
@@ -797,16 +800,6 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
|
||||
|
||||
f->userd_entry_size = 1 << ram_userd_base_shift_v();
|
||||
|
||||
err = gk20a_gmmu_alloc_map_sys(&g->mm.bar1.vm,
|
||||
f->userd_entry_size * f->num_channels,
|
||||
&f->userd);
|
||||
if (err) {
|
||||
dev_err(d, "memory allocation failed\n");
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
gk20a_dbg(gpu_dbg_map_v, "userd bar1 va = 0x%llx", f->userd.gpu_va);
|
||||
|
||||
f->channel = vzalloc(f->num_channels * sizeof(*f->channel));
|
||||
f->tsg = vzalloc(f->num_channels * sizeof(*f->tsg));
|
||||
f->pbdma_map = kzalloc(f->num_pbdma * sizeof(*f->pbdma_map),
|
||||
@@ -834,13 +827,26 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
|
||||
INIT_LIST_HEAD(&f->free_chs);
|
||||
mutex_init(&f->free_chs_mutex);
|
||||
|
||||
if (g->ops.mm.is_bar1_supported(g))
|
||||
err = gk20a_gmmu_alloc_map_sys(&g->mm.bar1.vm,
|
||||
f->userd_entry_size * f->num_channels,
|
||||
&f->userd);
|
||||
|
||||
else
|
||||
err = gk20a_gmmu_alloc_sys(g, f->userd_entry_size *
|
||||
f->num_channels, &f->userd);
|
||||
if (err) {
|
||||
dev_err(d, "userd memory allocation failed\n");
|
||||
goto clean_up;
|
||||
}
|
||||
gk20a_dbg(gpu_dbg_map, "userd gpu va = 0x%llx", f->userd.gpu_va);
|
||||
|
||||
for (chid = 0; chid < f->num_channels; chid++) {
|
||||
f->channel[chid].userd_iova =
|
||||
g->ops.mm.get_iova_addr(g, f->userd.sgt->sgl, 0)
|
||||
+ chid * f->userd_entry_size;
|
||||
+ chid * f->userd_entry_size;
|
||||
f->channel[chid].userd_gpu_va =
|
||||
f->userd.gpu_va + chid * f->userd_entry_size;
|
||||
|
||||
gk20a_init_channel_support(g, chid);
|
||||
gk20a_init_tsg_support(g, chid);
|
||||
}
|
||||
@@ -858,7 +864,10 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
|
||||
|
||||
clean_up:
|
||||
gk20a_dbg_fn("fail");
|
||||
gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd);
|
||||
if (g->ops.mm.is_bar1_supported(g))
|
||||
gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd);
|
||||
else
|
||||
gk20a_gmmu_free(g, &f->userd);
|
||||
|
||||
vfree(f->channel);
|
||||
f->channel = NULL;
|
||||
@@ -884,7 +893,7 @@ static void gk20a_fifo_handle_runlist_event(struct gk20a *g)
|
||||
gk20a_writel(g, fifo_intr_runlist_r(), runlist_event);
|
||||
}
|
||||
|
||||
static int gk20a_init_fifo_setup_hw(struct gk20a *g)
|
||||
int gk20a_init_fifo_setup_hw(struct gk20a *g)
|
||||
{
|
||||
struct fifo_gk20a *f = &g->fifo;
|
||||
|
||||
@@ -952,7 +961,8 @@ int gk20a_init_fifo_support(struct gk20a *g)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = gk20a_init_fifo_setup_hw(g);
|
||||
if (g->ops.fifo.init_fifo_setup_hw)
|
||||
err = g->ops.fifo.init_fifo_setup_hw(g);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@@ -3015,7 +3025,8 @@ int gk20a_fifo_suspend(struct gk20a *g)
|
||||
gk20a_dbg_fn("");
|
||||
|
||||
/* stop bar1 snooping */
|
||||
gk20a_writel(g, fifo_bar1_base_r(),
|
||||
if (g->ops.mm.is_bar1_supported(g))
|
||||
gk20a_writel(g, fifo_bar1_base_r(),
|
||||
fifo_bar1_base_valid_false_f());
|
||||
|
||||
/* disable fifo intr */
|
||||
@@ -3246,6 +3257,7 @@ void gk20a_fifo_debugfs_init(struct device *dev)
|
||||
void gk20a_init_fifo(struct gpu_ops *gops)
|
||||
{
|
||||
gk20a_init_channel(gops);
|
||||
gops->fifo.init_fifo_setup_hw = gk20a_init_fifo_setup_hw;
|
||||
gops->fifo.preempt_channel = gk20a_fifo_preempt_channel;
|
||||
gops->fifo.preempt_tsg = gk20a_fifo_preempt_tsg;
|
||||
gops->fifo.update_runlist = gk20a_fifo_update_runlist;
|
||||
|
||||
@@ -167,6 +167,8 @@ static inline const char *gk20a_fifo_interleave_level_name(u32 interleave_level)
|
||||
|
||||
int gk20a_init_fifo_support(struct gk20a *g);
|
||||
|
||||
int gk20a_init_fifo_setup_hw(struct gk20a *g);
|
||||
|
||||
void gk20a_fifo_isr(struct gk20a *g);
|
||||
void gk20a_fifo_nonstall_isr(struct gk20a *g);
|
||||
|
||||
@@ -263,4 +265,8 @@ int gk20a_fifo_init_engine_info(struct fifo_gk20a *f);
|
||||
void gk20a_get_tsg_runlist_entry(struct tsg_gk20a *tsg, u32 *runlist);
|
||||
void gk20a_get_ch_runlist_entry(struct channel_gk20a *ch, u32 *runlist);
|
||||
|
||||
u32 gk20a_userd_gp_get(struct gk20a *g, struct channel_gk20a *c);
|
||||
void gk20a_userd_gp_put(struct gk20a *g, struct channel_gk20a *c);
|
||||
|
||||
|
||||
#endif /*__GR_GK20A_H__*/
|
||||
|
||||
@@ -342,6 +342,7 @@ struct gpu_ops {
|
||||
void (*pg_gr_load_gating_prod)(struct gk20a *g, bool prod);
|
||||
} clock_gating;
|
||||
struct {
|
||||
int (*init_fifo_setup_hw)(struct gk20a *g);
|
||||
void (*bind_channel)(struct channel_gk20a *ch_gk20a);
|
||||
void (*unbind_channel)(struct channel_gk20a *ch_gk20a);
|
||||
void (*disable_channel)(struct channel_gk20a *ch);
|
||||
@@ -386,6 +387,8 @@ struct gpu_ops {
|
||||
u32 *runlist);
|
||||
void (*get_ch_runlist_entry)(struct channel_gk20a *ch,
|
||||
u32 *runlist);
|
||||
u32 (*userd_gp_get)(struct gk20a *g, struct channel_gk20a *ch);
|
||||
void (*userd_gp_put)(struct gk20a *g, struct channel_gk20a *ch);
|
||||
} fifo;
|
||||
struct pmu_v {
|
||||
/*used for change of enum zbc update cmd id from ver 0 to ver1*/
|
||||
@@ -552,6 +555,7 @@ struct gpu_ops {
|
||||
u32 (*get_big_page_sizes)(void);
|
||||
u32 (*get_physical_addr_bits)(struct gk20a *g);
|
||||
int (*init_mm_setup_hw)(struct gk20a *g);
|
||||
bool (*is_bar1_supported)(struct gk20a *g);
|
||||
int (*init_bar2_vm)(struct gk20a *g);
|
||||
int (*init_bar2_mm_hw_setup)(struct gk20a *g);
|
||||
void (*remove_bar2_vm)(struct gk20a *g);
|
||||
|
||||
@@ -797,7 +797,10 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm)
|
||||
|
||||
if (g->ops.mm.remove_bar2_vm)
|
||||
g->ops.mm.remove_bar2_vm(g);
|
||||
gk20a_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block);
|
||||
|
||||
if (g->ops.mm.is_bar1_supported(g))
|
||||
gk20a_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block);
|
||||
|
||||
gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block);
|
||||
gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block);
|
||||
gk20a_vm_remove_support_nofree(&mm->cde.vm);
|
||||
@@ -1001,10 +1004,11 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = gk20a_init_bar1_vm(mm);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (g->ops.mm.is_bar1_supported(g)) {
|
||||
err = gk20a_init_bar1_vm(mm);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
if (g->ops.mm.init_bar2_vm) {
|
||||
err = g->ops.mm.init_bar2_vm(g);
|
||||
if (err)
|
||||
@@ -1055,7 +1059,8 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
|
||||
g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.sgt->sgl, 0)
|
||||
>> 8);
|
||||
|
||||
g->ops.mm.bar1_bind(g, &mm->bar1.inst_block);
|
||||
if (g->ops.mm.bar1_bind)
|
||||
g->ops.mm.bar1_bind(g, &mm->bar1.inst_block);
|
||||
|
||||
if (g->ops.mm.init_bar2_mm_hw_setup) {
|
||||
err = g->ops.mm.init_bar2_mm_hw_setup(g);
|
||||
@@ -5249,6 +5254,11 @@ clean_up:
|
||||
return err;
|
||||
}
|
||||
|
||||
static bool gk20a_mm_is_bar1_supported(struct gk20a *g)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
void gk20a_mm_debugfs_init(struct device *dev)
|
||||
{
|
||||
@@ -5284,4 +5294,5 @@ void gk20a_init_mm(struct gpu_ops *gops)
|
||||
gops->mm.init_pdb = gk20a_mm_init_pdb;
|
||||
gops->mm.init_mm_setup_hw = gk20a_init_mm_setup_hw;
|
||||
gops->mm.bar1_bind = gk20a_mm_bar1_bind;
|
||||
gops->mm.is_bar1_supported = gk20a_mm_is_bar1_supported;
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
|
||||
#include <linux/delay.h>
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "gk20a/fifo_gk20a.h"
|
||||
#include "fifo_gm20b.h"
|
||||
#include "hw_ccsr_gm20b.h"
|
||||
#include "hw_ram_gm20b.h"
|
||||
@@ -133,6 +134,7 @@ static void gm20b_device_info_data_parse(struct gk20a *g,
|
||||
}
|
||||
void gm20b_init_fifo(struct gpu_ops *gops)
|
||||
{
|
||||
gops->fifo.init_fifo_setup_hw = gk20a_init_fifo_setup_hw;
|
||||
gops->fifo.bind_channel = channel_gm20b_bind;
|
||||
gops->fifo.unbind_channel = channel_gk20a_unbind;
|
||||
gops->fifo.disable_channel = channel_gk20a_disable;
|
||||
@@ -142,6 +144,8 @@ void gm20b_init_fifo(struct gpu_ops *gops)
|
||||
gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
|
||||
gops->fifo.channel_set_priority = gk20a_channel_set_priority;
|
||||
gops->fifo.channel_set_timeslice = gk20a_channel_set_timeslice;
|
||||
gops->fifo.userd_gp_get = gk20a_userd_gp_get;
|
||||
gops->fifo.userd_gp_put = gk20a_userd_gp_put;
|
||||
|
||||
gops->fifo.preempt_channel = gk20a_fifo_preempt_channel;
|
||||
gops->fifo.preempt_tsg = gk20a_fifo_preempt_tsg;
|
||||
|
||||
@@ -163,6 +163,11 @@ static int gm20b_mm_bar1_bind(struct gk20a *g, struct mem_desc *bar1_inst)
|
||||
return retry ? -EINVAL : 0;
|
||||
}
|
||||
|
||||
static bool gm20b_mm_is_bar1_supported(struct gk20a *g)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
void gm20b_init_mm(struct gpu_ops *gops)
|
||||
{
|
||||
gops->mm.support_sparse = gm20b_mm_support_sparse;
|
||||
@@ -186,4 +191,5 @@ void gm20b_init_mm(struct gpu_ops *gops)
|
||||
gops->mm.init_pdb = gk20a_mm_init_pdb;
|
||||
gops->mm.init_mm_setup_hw = gk20a_init_mm_setup_hw;
|
||||
gops->mm.bar1_bind = gm20b_mm_bar1_bind;
|
||||
gops->mm.is_bar1_supported = gm20b_mm_is_bar1_supported;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user