diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 9bacb5c98..6015ab5ea 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -47,7 +47,6 @@ static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c); static void free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e); -static void recycle_priv_cmdbuf(struct channel_gk20a *c); static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c); static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c); @@ -1179,9 +1178,6 @@ static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c) q->size = q->mem.size / sizeof (u32); - INIT_LIST_HEAD(&q->head); - INIT_LIST_HEAD(&q->free); - return 0; clean_up: @@ -1193,28 +1189,12 @@ static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c) { struct vm_gk20a *ch_vm = c->vm; struct priv_cmd_queue *q = &c->priv_cmd_q; - struct priv_cmd_entry *e; - struct list_head *pos, *tmp, *head; if (q->size == 0) return; gk20a_gmmu_unmap_free(ch_vm, &q->mem); - /* free used list */ - head = &q->head; - list_for_each_safe(pos, tmp, head) { - e = container_of(pos, struct priv_cmd_entry, list); - free_priv_cmdbuf(c, e); - } - - /* free free list */ - head = &q->free; - list_for_each_safe(pos, tmp, head) { - e = container_of(pos, struct priv_cmd_entry, list); - kfree(e); - } - memset(q, 0, sizeof(struct priv_cmd_queue)); } @@ -1226,7 +1206,6 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, struct priv_cmd_entry *e; u32 free_count; u32 size = orig_size; - bool no_retry = false; gk20a_dbg_fn("size %d", orig_size); @@ -1240,17 +1219,10 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d", c->hw_chid, q->get, q->put); -TRY_AGAIN: free_count = (q->size - (q->put - q->get) - 1) % q->size; - if (size > free_count) { - if (!no_retry) { - recycle_priv_cmdbuf(c); - no_retry = true; - goto TRY_AGAIN; - } else - return -EAGAIN; - } + if (size > free_count) + return -EAGAIN; e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL); if (!e) { @@ -1280,9 +1252,6 @@ TRY_AGAIN: /* we already handled q->put + size > q->size so BUG_ON this */ BUG_ON(q->put > q->size); - /* add new entry to head since we free from head */ - list_add(&e->list, &q->head); - *entry = e; gk20a_dbg_fn("done"); @@ -1295,65 +1264,9 @@ TRY_AGAIN: static void free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e) { - if (!e) - return; - - list_del(&e->list); - kfree(e); } -/* free entries if they're no longer being used */ -static void recycle_priv_cmdbuf(struct channel_gk20a *c) -{ - struct priv_cmd_queue *q = &c->priv_cmd_q; - struct priv_cmd_entry *e, *tmp; - struct list_head *head = &q->head; - bool wrap_around, found = false; - - gk20a_dbg_fn(""); - - /* Find the most recent free entry. Free it and everything before it */ - list_for_each_entry(e, head, list) { - - gk20a_dbg_info("ch %d: cmd entry get:put:wrap %d:%d:%d " - "curr get:put:wrap %d:%d:%d", - c->hw_chid, e->gp_get, e->gp_put, e->gp_wrap, - c->gpfifo.get, c->gpfifo.put, c->gpfifo.wrap); - - wrap_around = (c->gpfifo.wrap != e->gp_wrap); - if (e->gp_get < e->gp_put) { - if (c->gpfifo.get >= e->gp_put || - wrap_around) { - found = true; - break; - } else - e->gp_get = c->gpfifo.get; - } else if (e->gp_get > e->gp_put) { - if (wrap_around && - c->gpfifo.get >= e->gp_put) { - found = true; - break; - } else - e->gp_get = c->gpfifo.get; - } - } - - if (found) - q->get = (e->ptr - (u32 *)q->mem.cpu_va) + e->size; - else { - gk20a_dbg_info("no free entry recycled"); - return; - } - - list_for_each_entry_safe_continue(e, tmp, head, list) { - free_priv_cmdbuf(c, e); - } - - gk20a_dbg_fn("done"); -} - - int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, struct nvgpu_alloc_gpfifo_args *args) { @@ -1724,9 +1637,31 @@ fail_unlock: gk20a_channel_put(ch); } +static int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, + struct priv_cmd_entry *e) +{ + struct priv_cmd_queue *q = &c->priv_cmd_q; + u32 cmd_entry_start; + struct device *d = dev_from_gk20a(c->g); + + if (!e) + return 0; + + cmd_entry_start = (u32)(e->ptr - (u32 *)q->mem.cpu_va); + if ((q->get != cmd_entry_start) && cmd_entry_start != 0) + gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid); + + q->get = (e->ptr - (u32 *)q->mem.cpu_va) + e->size; + free_priv_cmdbuf(c, e); + + return 0; +} + static int gk20a_channel_add_job(struct channel_gk20a *c, struct gk20a_fence *pre_fence, struct gk20a_fence *post_fence, + struct priv_cmd_entry *wait_cmd, + struct priv_cmd_entry *incr_cmd, bool skip_buffer_refcounting) { struct vm_gk20a *vm = c->vm; @@ -1761,6 +1696,8 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, job->mapped_buffers = mapped_buffers; job->pre_fence = gk20a_fence_get(pre_fence); job->post_fence = gk20a_fence_get(post_fence); + job->wait_cmd = wait_cmd; + job->incr_cmd = incr_cmd; gk20a_channel_timeout_start(c, job); @@ -1808,6 +1745,11 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed) gk20a_fence_put(job->pre_fence); gk20a_fence_put(job->post_fence); + /* Free the private command buffers (wait_cmd first and + * then incr_cmd i.e. order of allocation) */ + gk20a_free_priv_cmdbuf(c, job->wait_cmd); + gk20a_free_priv_cmdbuf(c, job->incr_cmd); + /* job is done. release its vm reference (taken in add_job) */ gk20a_vm_put(vm); /* another bookkeeping taken in add_job. caller must hold a ref @@ -2114,6 +2056,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, /* TODO! Check for errors... */ gk20a_channel_add_job(c, pre_fence, post_fence, + wait_cmd, incr_cmd, skip_buffer_refcounting); c->cmds_pending = true; diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index d5f5e6a2f..245db56a0 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -62,6 +62,8 @@ struct channel_gk20a_job { int num_mapped_buffers; struct gk20a_fence *pre_fence; struct gk20a_fence *post_fence; + struct priv_cmd_entry *wait_cmd; + struct priv_cmd_entry *incr_cmd; struct list_head list; }; diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 2dd4ccf56..ac55e9882 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -160,8 +160,6 @@ struct priv_cmd_queue { u32 size; /* num of entries in words */ u32 put; /* put for priv cmd queue */ u32 get; /* get for priv cmd queue */ - struct list_head free; /* list of pre-allocated free entries */ - struct list_head head; /* list of used entries */ }; struct priv_cmd_entry {