gpu: nvgpu: add TSG support for engine context

All channels in a TSG need to share same engine context
i.e. pointer in RAMFC of all channels in a TSG must point
to same NV_RAMIN_GR_WFI_TARGET

To get this, add a pointer to gr_ctx inside TSG struct so
that TSG can maintain its own unique gr_ctx
Also, change the type of gr_ctx in a channel to pointer
variable so that if channel is part of TSG it can point
to TSG's gr_ctx otherwise it will point to its own gr_ctx

In gk20a_alloc_obj_ctx(), allocate gr_ctx as below :

1) If channel is not part of any TSG
- allocate its own gr_ctx buffer if it is already not allocated

2) If channel is part of TSG
- Check if TSG has already allocated gr_ctx (as part of TSG)
- If yes, channel's gr_ctx will point to that of TSG's
- If not, then it means channels is first to be bounded to
  this TSG
- And in this case we will allocate new gr_ctx on TSG first
  and then make channel's gr_ctx to point to this gr_ctx

Also, gr_ctx will be released as below ;

1) If channels is not part of TSG, then it will be released
   when channels is closed
2) Otherwise, it will be released when TSG itself is closed

Bug 1470692

Change-Id: Id347217d5b462e0e972cd3d79d17795b37034a50
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/417065
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
Deepak Nibade
2014-06-18 18:32:03 +05:30
committed by Dan Willemsen
parent b6466fbe07
commit ee66559a0b
5 changed files with 131 additions and 43 deletions

View File

@@ -56,7 +56,7 @@ struct fence {
/* contexts associated with a channel */ /* contexts associated with a channel */
struct channel_ctx_gk20a { struct channel_ctx_gk20a {
struct gr_ctx_desc gr_ctx; struct gr_ctx_desc *gr_ctx;
struct pm_ctx_desc pm_ctx; struct pm_ctx_desc pm_ctx;
struct patch_desc patch_ctx; struct patch_desc patch_ctx;
struct zcull_ctx_desc zcull_ctx; struct zcull_ctx_desc zcull_ctx;

View File

@@ -801,8 +801,8 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
gk20a_dbg_fn(""); gk20a_dbg_fn("");
ctx_ptr = vmap(ch_ctx->gr_ctx.pages, ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
0, pgprot_dmacoherent(PAGE_KERNEL)); 0, pgprot_dmacoherent(PAGE_KERNEL));
if (!ctx_ptr) if (!ctx_ptr)
return -ENOMEM; return -ENOMEM;
@@ -1562,8 +1562,8 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
if (!gold_ptr) if (!gold_ptr)
goto clean_up; goto clean_up;
ctx_ptr = vmap(ch_ctx->gr_ctx.pages, ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
0, pgprot_dmacoherent(PAGE_KERNEL)); 0, pgprot_dmacoherent(PAGE_KERNEL));
if (!ctx_ptr) if (!ctx_ptr)
goto clean_up; goto clean_up;
@@ -1602,7 +1602,7 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
gk20a_mem_rd32(gold_ptr, i); gk20a_mem_rd32(gold_ptr, i);
} }
gr_gk20a_commit_inst(c, ch_ctx->gr_ctx.gpu_va); gr_gk20a_commit_inst(c, ch_ctx->gr_ctx->gpu_va);
gr->ctx_vars.golden_image_initialized = true; gr->ctx_vars.golden_image_initialized = true;
@@ -1636,8 +1636,8 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
Flush and invalidate before cpu update. */ Flush and invalidate before cpu update. */
gk20a_mm_l2_flush(g, true); gk20a_mm_l2_flush(g, true);
ctx_ptr = vmap(ch_ctx->gr_ctx.pages, ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
0, pgprot_dmacoherent(PAGE_KERNEL)); 0, pgprot_dmacoherent(PAGE_KERNEL));
if (!ctx_ptr) if (!ctx_ptr)
return -ENOMEM; return -ENOMEM;
@@ -1676,8 +1676,8 @@ static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
Flush and invalidate before cpu update. */ Flush and invalidate before cpu update. */
gk20a_mm_l2_flush(g, true); gk20a_mm_l2_flush(g, true);
ctx_ptr = vmap(ch_ctx->gr_ctx.pages, ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
0, pgprot_dmacoherent(PAGE_KERNEL)); 0, pgprot_dmacoherent(PAGE_KERNEL));
if (!ctx_ptr) if (!ctx_ptr)
return -ENOMEM; return -ENOMEM;
@@ -2521,12 +2521,11 @@ static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c)
c->ch_ctx.global_ctx_buffer_mapped = false; c->ch_ctx.global_ctx_buffer_mapped = false;
} }
static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, static int __gr_gk20a_alloc_gr_ctx(struct gk20a *g,
struct channel_gk20a *c) struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm)
{ {
struct gr_ctx_desc *gr_ctx = NULL;
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
struct gr_ctx_desc *gr_ctx = &c->ch_ctx.gr_ctx;
struct vm_gk20a *ch_vm = c->vm;
struct device *d = dev_from_gk20a(g); struct device *d = dev_from_gk20a(g);
struct sg_table *sgt; struct sg_table *sgt;
DEFINE_DMA_ATTRS(attrs); DEFINE_DMA_ATTRS(attrs);
@@ -2542,12 +2541,18 @@ static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
gr_ctx = kzalloc(sizeof(*gr_ctx), GFP_KERNEL);
if (!gr_ctx)
return -ENOMEM;
gr_ctx->size = gr->ctx_vars.buffer_total_size; gr_ctx->size = gr->ctx_vars.buffer_total_size;
dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
gr_ctx->pages = dma_alloc_attrs(d, gr_ctx->size, gr_ctx->pages = dma_alloc_attrs(d, gr_ctx->size,
&iova, GFP_KERNEL, &attrs); &iova, GFP_KERNEL, &attrs);
if (!gr_ctx->pages) if (!gr_ctx->pages) {
return -ENOMEM; err = -ENOMEM;
goto err_free_ctx;
}
gr_ctx->iova = iova; gr_ctx->iova = iova;
err = gk20a_get_sgtable_from_pages(d, &sgt, gr_ctx->pages, err = gk20a_get_sgtable_from_pages(d, &sgt, gr_ctx->pages,
@@ -2555,7 +2560,7 @@ static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
if (err) if (err)
goto err_free; goto err_free;
gr_ctx->gpu_va = gk20a_gmmu_map(ch_vm, &sgt, gr_ctx->size, gr_ctx->gpu_va = gk20a_gmmu_map(vm, &sgt, gr_ctx->size,
NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
gk20a_mem_flag_none); gk20a_mem_flag_none);
if (!gr_ctx->gpu_va) if (!gr_ctx->gpu_va)
@@ -2563,6 +2568,8 @@ static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
gk20a_free_sgtable(&sgt); gk20a_free_sgtable(&sgt);
*__gr_ctx = gr_ctx;
return 0; return 0;
err_free_sgt: err_free_sgt:
@@ -2572,30 +2579,74 @@ static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
gr_ctx->pages, gr_ctx->iova, &attrs); gr_ctx->pages, gr_ctx->iova, &attrs);
gr_ctx->pages = NULL; gr_ctx->pages = NULL;
gr_ctx->iova = 0; gr_ctx->iova = 0;
err_free_ctx:
kfree(gr_ctx);
gr_ctx = NULL;
return err; return err;
} }
static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c) static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g,
struct tsg_gk20a *tsg)
{
struct gr_ctx_desc **gr_ctx = &tsg->tsg_gr_ctx;
int err;
if (!tsg->vm) {
gk20a_err(dev_from_gk20a(tsg->g), "No address space bound\n");
return -ENOMEM;
}
err = __gr_gk20a_alloc_gr_ctx(g, gr_ctx, tsg->vm);
if (err)
return err;
return 0;
}
static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
struct channel_gk20a *c)
{
struct gr_ctx_desc **gr_ctx = &c->ch_ctx.gr_ctx;
int err = __gr_gk20a_alloc_gr_ctx(g, gr_ctx, c->vm);
if (err)
return err;
return 0;
}
static void __gr_gk20a_free_gr_ctx(struct gk20a *g,
struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx)
{ {
struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
struct vm_gk20a *ch_vm = c->vm;
struct gk20a *g = c->g;
struct device *d = dev_from_gk20a(g); struct device *d = dev_from_gk20a(g);
DEFINE_DMA_ATTRS(attrs); DEFINE_DMA_ATTRS(attrs);
gk20a_dbg_fn(""); gk20a_dbg_fn("");
if (!ch_ctx->gr_ctx.gpu_va) if (!gr_ctx || !gr_ctx->gpu_va)
return; return;
gk20a_gmmu_unmap(ch_vm, ch_ctx->gr_ctx.gpu_va, gk20a_gmmu_unmap(vm, gr_ctx->gpu_va,
ch_ctx->gr_ctx.size, gk20a_mem_flag_none); gr_ctx->size, gk20a_mem_flag_none);
dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
dma_free_attrs(d, ch_ctx->gr_ctx.size, dma_free_attrs(d, gr_ctx->size,
ch_ctx->gr_ctx.pages, ch_ctx->gr_ctx.iova, &attrs); gr_ctx->pages, gr_ctx->iova, &attrs);
ch_ctx->gr_ctx.pages = NULL; gr_ctx->pages = NULL;
ch_ctx->gr_ctx.iova = 0; gr_ctx->iova = 0;
}
void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg)
{
if (!tsg->vm) {
gk20a_err(dev_from_gk20a(tsg->g), "No address space bound\n");
return;
}
__gr_gk20a_free_gr_ctx(tsg->g, tsg->vm, tsg->tsg_gr_ctx);
}
static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c)
{
__gr_gk20a_free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx);
} }
static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
@@ -2684,7 +2735,8 @@ void gk20a_free_channel_ctx(struct channel_gk20a *c)
{ {
gr_gk20a_unmap_global_ctx_buffers(c); gr_gk20a_unmap_global_ctx_buffers(c);
gr_gk20a_free_channel_patch_ctx(c); gr_gk20a_free_channel_patch_ctx(c);
gr_gk20a_free_channel_gr_ctx(c); if (!gk20a_is_channel_marked_as_tsg(c))
gr_gk20a_free_channel_gr_ctx(c);
/* zcull_ctx, pm_ctx */ /* zcull_ctx, pm_ctx */
@@ -2717,7 +2769,9 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
struct nvhost_alloc_obj_ctx_args *args) struct nvhost_alloc_obj_ctx_args *args)
{ {
struct gk20a *g = c->g; struct gk20a *g = c->g;
struct fifo_gk20a *f = &g->fifo;
struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
struct tsg_gk20a *tsg = NULL;
int err = 0; int err = 0;
gk20a_dbg_fn(""); gk20a_dbg_fn("");
@@ -2736,27 +2790,44 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
err = -EINVAL; err = -EINVAL;
goto out; goto out;
} }
c->obj_class = args->class_num;
if (gk20a_is_channel_marked_as_tsg(c))
tsg = &f->tsg[c->tsgid];
/* allocate gr ctx buffer */ /* allocate gr ctx buffer */
if (ch_ctx->gr_ctx.pages == NULL) { if (!tsg) {
err = gr_gk20a_alloc_channel_gr_ctx(g, c); if (!ch_ctx->gr_ctx) {
if (err) { err = gr_gk20a_alloc_channel_gr_ctx(g, c);
if (err) {
gk20a_err(dev_from_gk20a(g),
"fail to allocate gr ctx buffer");
goto out;
}
} else {
/*TBD: needs to be more subtle about which is
* being allocated as some are allowed to be
* allocated along same channel */
gk20a_err(dev_from_gk20a(g), gk20a_err(dev_from_gk20a(g),
"fail to allocate gr ctx buffer"); "too many classes alloc'd on same channel");
err = -EINVAL;
goto out; goto out;
} }
c->obj_class = args->class_num;
} else { } else {
/*TBD: needs to be more subtle about which is being allocated if (!tsg->tsg_gr_ctx) {
* as some are allowed to be allocated along same channel */ tsg->vm = c->vm;
gk20a_err(dev_from_gk20a(g), err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg);
"too many classes alloc'd on same channel"); if (err) {
err = -EINVAL; gk20a_err(dev_from_gk20a(g),
goto out; "fail to allocate TSG gr ctx buffer");
goto out;
}
}
ch_ctx->gr_ctx = tsg->tsg_gr_ctx;
} }
/* commit gr ctx buffer */ /* commit gr ctx buffer */
err = gr_gk20a_commit_inst(c, ch_ctx->gr_ctx.gpu_va); err = gr_gk20a_commit_inst(c, ch_ctx->gr_ctx->gpu_va);
if (err) { if (err) {
gk20a_err(dev_from_gk20a(g), gk20a_err(dev_from_gk20a(g),
"fail to commit gr ctx buffer"); "fail to commit gr ctx buffer");
@@ -6657,8 +6728,8 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
/* would have been a variant of gr_gk20a_apply_instmem_overrides */ /* would have been a variant of gr_gk20a_apply_instmem_overrides */
/* recoded in-place instead.*/ /* recoded in-place instead.*/
ctx_ptr = vmap(ch_ctx->gr_ctx.pages, ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
0, pgprot_dmacoherent(PAGE_KERNEL)); 0, pgprot_dmacoherent(PAGE_KERNEL));
if (!ctx_ptr) { if (!ctx_ptr) {
err = -ENOMEM; err = -ENOMEM;

View File

@@ -20,6 +20,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include "tsg_gk20a.h"
#include "gr_ctx_gk20a.h" #include "gr_ctx_gk20a.h"
#define GR_IDLE_CHECK_DEFAULT 100 /* usec */ #define GR_IDLE_CHECK_DEFAULT 100 /* usec */
@@ -414,4 +415,6 @@ void gr_gk20a_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
u32 **sm_dsm_perf_regs, u32 **sm_dsm_perf_regs,
u32 *perf_register_stride); u32 *perf_register_stride);
int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr); int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr);
void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *c);
#endif /*__GR_GK20A_H__*/ #endif /*__GR_GK20A_H__*/

View File

@@ -165,6 +165,9 @@ int gk20a_tsg_dev_open(struct inode *inode, struct file *filp)
tsg->g = g; tsg->g = g;
tsg->num_runnable_channels = 0; tsg->num_runnable_channels = 0;
tsg->tsg_gr_ctx = NULL;
tsg->vm = NULL;
filp->private_data = tsg; filp->private_data = tsg;
gk20a_dbg(gpu_dbg_fn, "tsg opened %d\n", tsg->tsgid); gk20a_dbg(gpu_dbg_fn, "tsg opened %d\n", tsg->tsgid);
@@ -185,6 +188,13 @@ int gk20a_tsg_dev_release(struct inode *inode, struct file *filp)
return -EBUSY; return -EBUSY;
} }
if (tsg->tsg_gr_ctx) {
gr_gk20a_free_tsg_gr_ctx(tsg);
tsg->tsg_gr_ctx = NULL;
}
if (tsg->vm)
tsg->vm = NULL;
release_used_tsg(&g->fifo, tsg); release_used_tsg(&g->fifo, tsg);
gk20a_dbg(gpu_dbg_fn, "tsg released %d\n", tsg->tsgid); gk20a_dbg(gpu_dbg_fn, "tsg released %d\n", tsg->tsgid);

View File

@@ -39,6 +39,10 @@ struct tsg_gk20a {
struct list_head ch_runnable_list; struct list_head ch_runnable_list;
int num_runnable_channels; int num_runnable_channels;
struct mutex ch_list_lock; struct mutex ch_list_lock;
struct gr_ctx_desc *tsg_gr_ctx;
struct vm_gk20a *vm;
}; };
#endif /* __TSG_GK20A_H_ */ #endif /* __TSG_GK20A_H_ */