gpu: nvgpu: add support for pre-allocated resources

Add support for pre-allocation of job tracking resources
w/ new (extended) ioctl. Goal is to avoid dynamic memory
allocation in the submit path. This patch does the following:

1) Intoduces a new ioctl, NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX,
which enables pre-allocation of tracking resources per job:
a) 2x priv_cmd_entry
b) 2x gk20a_fence

2) Implements circular ring buffer for job
tracking to avoid lock contention between producer
(submitter) and consumer (clean-up)

Bug 1795076

Change-Id: I6b52e5c575871107ff380f9a5790f440a6969347
Signed-off-by: Sachit Kadle <skadle@nvidia.com>
Reviewed-on: http://git-master/r/1203300
(cherry picked from commit 9fd270c22b860935dffe244753dabd87454bef39)
Reviewed-on: http://git-master/r/1223934
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Sachit Kadle
2016-08-15 14:32:39 -07:00
committed by mobile promotions
parent 63e8592e06
commit 733fb79b39
7 changed files with 463 additions and 66 deletions

View File

@@ -1126,9 +1126,9 @@ __releases(&cde_app->mutex)
struct gk20a_cde_app *cde_app = &g->cde_app;
bool channel_idle;
spin_lock(&ch->jobs_lock);
channel_idle = list_empty(&ch->jobs);
spin_unlock(&ch->jobs_lock);
channel_gk20a_joblist_lock(ch);
channel_idle = channel_gk20a_joblist_is_empty(ch);
channel_gk20a_joblist_unlock(ch);
if (!channel_idle)
return;
@@ -1207,7 +1207,7 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
/* allocate gpfifo (1024 should be more than enough) */
err = gk20a_alloc_channel_gpfifo(ch,
&(struct nvgpu_alloc_gpfifo_args){1024, 0});
&(struct nvgpu_alloc_gpfifo_ex_args){1024, 0, 0, {}});
if (err) {
gk20a_warn(cde_ctx->dev, "cde: unable to allocate gpfifo");
goto err_alloc_gpfifo;

View File

@@ -126,9 +126,9 @@ static void gk20a_ce_finished_ctx_cb(struct channel_gk20a *ch, void *data)
bool channel_idle;
u32 event;
spin_lock(&ch->jobs_lock);
channel_idle = list_empty(&ch->jobs);
spin_unlock(&ch->jobs_lock);
channel_gk20a_joblist_lock(ch);
channel_idle = channel_gk20a_joblist_is_empty(ch);
channel_gk20a_joblist_unlock(ch);
if (!channel_idle)
return;
@@ -462,7 +462,7 @@ u32 gk20a_ce_create_context_with_cb(struct device *dev,
/* allocate gpfifo (1024 should be more than enough) */
err = gk20a_alloc_channel_gpfifo(ce_ctx->ch,
&(struct nvgpu_alloc_gpfifo_args){1024, 0});
&(struct nvgpu_alloc_gpfifo_ex_args){1024, 0, 0, {}});
if (err) {
gk20a_err(ce_ctx->dev, "ce: unable to allocate gpfifo");
goto end;

View File

@@ -26,6 +26,7 @@
#include <linux/anon_inodes.h>
#include <linux/dma-buf.h>
#include <linux/vmalloc.h>
#include <linux/circ_buf.h>
#include "debug_gk20a.h"
#include "ctxsw_trace_gk20a.h"
@@ -55,6 +56,15 @@ static void free_priv_cmdbuf(struct channel_gk20a *c,
static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c);
static void channel_gk20a_joblist_add(struct channel_gk20a *c,
struct channel_gk20a_job *job);
static void channel_gk20a_joblist_delete(struct channel_gk20a *c,
struct channel_gk20a_job *job);
static struct channel_gk20a_job *channel_gk20a_joblist_peek(
struct channel_gk20a *c);
static int channel_gk20a_commit_userd(struct channel_gk20a *c);
static int channel_gk20a_setup_userd(struct channel_gk20a *c);
@@ -460,6 +470,7 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
{
struct channel_gk20a_job *job, *n;
bool released_job_semaphore = false;
bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(ch);
gk20a_channel_cancel_job_clean_up(ch, true);
@@ -471,14 +482,37 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
/* release all job semaphores (applies only to jobs that use
semaphore synchronization) */
spin_lock(&ch->jobs_lock);
list_for_each_entry_safe(job, n, &ch->jobs, list) {
channel_gk20a_joblist_lock(ch);
if (pre_alloc_enabled) {
int tmp_get = ch->joblist.pre_alloc.get;
int put = ch->joblist.pre_alloc.put;
/*
* ensure put is read before any subsequent reads.
* see corresponding wmb in gk20a_channel_add_job()
*/
rmb();
while (tmp_get != put) {
job = &ch->joblist.pre_alloc.jobs[tmp_get];
if (job->post_fence->semaphore) {
gk20a_semaphore_release(job->post_fence->semaphore);
gk20a_semaphore_release(
job->post_fence->semaphore);
released_job_semaphore = true;
}
tmp_get = (tmp_get + 1) % ch->joblist.pre_alloc.length;
}
} else {
list_for_each_entry_safe(job, n,
&ch->joblist.dynamic.jobs, list) {
if (job->post_fence->semaphore) {
gk20a_semaphore_release(
job->post_fence->semaphore);
released_job_semaphore = true;
}
}
spin_unlock(&ch->jobs_lock);
}
channel_gk20a_joblist_unlock(ch);
if (released_job_semaphore)
wake_up_interruptible_all(&ch->semaphore_wq);
@@ -511,9 +545,9 @@ int gk20a_wait_channel_idle(struct channel_gk20a *ch)
msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
do {
spin_lock(&ch->jobs_lock);
channel_idle = list_empty(&ch->jobs);
spin_unlock(&ch->jobs_lock);
channel_gk20a_joblist_lock(ch);
channel_idle = channel_gk20a_joblist_is_empty(ch);
channel_gk20a_joblist_unlock(ch);
if (channel_idle)
break;
@@ -1016,6 +1050,10 @@ unbind:
mutex_unlock(&g->dbg_sessions_lock);
/* free pre-allocated resources, if applicable */
if (channel_gk20a_is_prealloc_enabled(ch))
channel_gk20a_free_prealloc_resources(ch);
/* make sure we catch accesses of unopened channels in case
* there's non-refcounted channel pointers hanging around */
ch->g = NULL;
@@ -1422,7 +1460,10 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
/* we already handled q->put + size > q->size so BUG_ON this */
BUG_ON(q->put > q->size);
/* commit the previous writes before making the entry valid */
/*
* commit the previous writes before making the entry valid.
* see the corresponding rmb() in gk20a_free_priv_cmdbuf().
*/
wmb();
e->valid = true;
@@ -1436,26 +1477,222 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
static void free_priv_cmdbuf(struct channel_gk20a *c,
struct priv_cmd_entry *e)
{
if (channel_gk20a_is_prealloc_enabled(c))
memset(e, 0, sizeof(struct priv_cmd_entry));
else
kfree(e);
}
static struct channel_gk20a_job *channel_gk20a_alloc_job(
struct channel_gk20a *c)
static int channel_gk20a_alloc_job(struct channel_gk20a *c,
struct channel_gk20a_job **job_out)
{
struct channel_gk20a_job *job = NULL;
int err = 0;
job = kzalloc(sizeof(*job), GFP_KERNEL);
return job;
if (channel_gk20a_is_prealloc_enabled(c)) {
int put = c->joblist.pre_alloc.put;
int get = c->joblist.pre_alloc.get;
/*
* ensure all subsequent reads happen after reading get.
* see corresponding wmb in gk20a_channel_clean_up_jobs()
*/
rmb();
if (CIRC_SPACE(put, get, c->joblist.pre_alloc.length))
*job_out = &c->joblist.pre_alloc.jobs[put];
else {
gk20a_warn(dev_from_gk20a(c->g),
"out of job ringbuffer space\n");
err = -EAGAIN;
}
} else {
*job_out = kzalloc(sizeof(struct channel_gk20a_job),
GFP_KERNEL);
if (!job_out)
err = -ENOMEM;
}
return err;
}
static void channel_gk20a_free_job(struct channel_gk20a *c,
struct channel_gk20a_job *job)
{
/*
* In case of pre_allocated jobs, we need to clean out
* the job but maintain the pointers to the priv_cmd_entry,
* since they're inherently tied to the job node.
*/
if (channel_gk20a_is_prealloc_enabled(c)) {
struct priv_cmd_entry *wait_cmd = job->wait_cmd;
struct priv_cmd_entry *incr_cmd = job->incr_cmd;
memset(job, 0, sizeof(*job));
job->wait_cmd = wait_cmd;
job->incr_cmd = incr_cmd;
} else
kfree(job);
}
void channel_gk20a_joblist_lock(struct channel_gk20a *c)
{
if (channel_gk20a_is_prealloc_enabled(c))
mutex_lock(&c->joblist.pre_alloc.read_lock);
else
spin_lock(&c->joblist.dynamic.lock);
}
void channel_gk20a_joblist_unlock(struct channel_gk20a *c)
{
if (channel_gk20a_is_prealloc_enabled(c))
mutex_unlock(&c->joblist.pre_alloc.read_lock);
else
spin_unlock(&c->joblist.dynamic.lock);
}
static struct channel_gk20a_job *channel_gk20a_joblist_peek(
struct channel_gk20a *c)
{
int get;
struct channel_gk20a_job *job = NULL;
if (channel_gk20a_is_prealloc_enabled(c)) {
if (!channel_gk20a_joblist_is_empty(c)) {
get = c->joblist.pre_alloc.get;
job = &c->joblist.pre_alloc.jobs[get];
}
} else {
if (!list_empty(&c->joblist.dynamic.jobs))
job = list_first_entry(&c->joblist.dynamic.jobs,
struct channel_gk20a_job, list);
}
return job;
}
static void channel_gk20a_joblist_add(struct channel_gk20a *c,
struct channel_gk20a_job *job)
{
if (channel_gk20a_is_prealloc_enabled(c)) {
c->joblist.pre_alloc.put = (c->joblist.pre_alloc.put + 1) %
(c->joblist.pre_alloc.length);
} else {
list_add_tail(&job->list, &c->joblist.dynamic.jobs);
}
}
static void channel_gk20a_joblist_delete(struct channel_gk20a *c,
struct channel_gk20a_job *job)
{
if (channel_gk20a_is_prealloc_enabled(c)) {
c->joblist.pre_alloc.get = (c->joblist.pre_alloc.get + 1) %
(c->joblist.pre_alloc.length);
} else {
list_del_init(&job->list);
}
}
bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c)
{
if (channel_gk20a_is_prealloc_enabled(c)) {
int get = c->joblist.pre_alloc.get;
int put = c->joblist.pre_alloc.put;
return !(CIRC_CNT(put, get, c->joblist.pre_alloc.length));
}
return list_empty(&c->joblist.dynamic.jobs);
}
bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c)
{
bool pre_alloc_enabled = c->joblist.pre_alloc.enabled;
rmb();
return pre_alloc_enabled;
}
static int channel_gk20a_prealloc_resources(struct channel_gk20a *c,
unsigned int num_jobs)
{
int i, err;
size_t size;
struct priv_cmd_entry *entries = NULL;
if (channel_gk20a_is_prealloc_enabled(c) || !num_jobs)
return -EINVAL;
/*
* pre-allocate the job list.
* since vmalloc take in an unsigned long, we need
* to make sure we don't hit an overflow condition
*/
size = sizeof(struct channel_gk20a_job);
if (num_jobs <= ULONG_MAX / size)
c->joblist.pre_alloc.jobs = vzalloc(num_jobs * size);
if (!c->joblist.pre_alloc.jobs) {
err = -ENOMEM;
goto clean_up;
}
/*
* pre-allocate 2x priv_cmd_entry for each job up front.
* since vmalloc take in an unsigned long, we need
* to make sure we don't hit an overflow condition
*/
size = sizeof(struct priv_cmd_entry);
if (num_jobs <= ULONG_MAX / (size << 1))
entries = vzalloc((num_jobs << 1) * size);
if (!entries) {
err = -ENOMEM;
goto clean_up_joblist;
}
for (i = 0; i < num_jobs; i++) {
c->joblist.pre_alloc.jobs[i].wait_cmd = &entries[i];
c->joblist.pre_alloc.jobs[i].incr_cmd =
&entries[i + num_jobs];
}
/* pre-allocate a fence pool */
err = gk20a_alloc_fence_pool(c, num_jobs);
if (err)
goto clean_up_priv_cmd;
c->joblist.pre_alloc.length = num_jobs;
/*
* commit the previous writes before setting the flag.
* see corresponding rmb in channel_gk20a_is_prealloc_enabled()
*/
wmb();
c->joblist.pre_alloc.enabled = true;
return 0;
clean_up_priv_cmd:
vfree(entries);
clean_up_joblist:
vfree(c->joblist.pre_alloc.jobs);
clean_up:
memset(&c->joblist.pre_alloc, 0, sizeof(c->joblist.pre_alloc));
return err;
}
static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c)
{
vfree(c->joblist.pre_alloc.jobs[0].wait_cmd);
vfree(c->joblist.pre_alloc.jobs);
gk20a_free_fence_pool(c);
/*
* commit the previous writes before disabling the flag.
* see corresponding rmb in channel_gk20a_is_prealloc_enabled()
*/
wmb();
c->joblist.pre_alloc.enabled = false;
}
int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
struct nvgpu_alloc_gpfifo_args *args)
struct nvgpu_alloc_gpfifo_ex_args *args)
{
struct gk20a *g = c->g;
struct device *d = dev_from_gk20a(g);
@@ -1539,19 +1776,30 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
/* TBD: setup engine contexts */
err = channel_gk20a_alloc_priv_cmdbuf(c);
if (args->num_inflight_jobs) {
err = channel_gk20a_prealloc_resources(c,
args->num_inflight_jobs);
if (err)
goto clean_up_sync;
}
err = channel_gk20a_alloc_priv_cmdbuf(c);
if (err)
goto clean_up_prealloc;
err = channel_gk20a_update_runlist(c, true);
if (err)
goto clean_up_sync;
goto clean_up_priv_cmd;
g->ops.fifo.bind_channel(c);
gk20a_dbg_fn("done");
return 0;
clean_up_priv_cmd:
channel_gk20a_free_priv_cmdbuf(c);
clean_up_prealloc:
channel_gk20a_free_prealloc_resources(c);
clean_up_sync:
gk20a_channel_sync_destroy(c->sync);
c->sync = NULL;
@@ -1878,6 +2126,7 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
struct vm_gk20a *vm = c->vm;
struct mapped_buffer_node **mapped_buffers = NULL;
int err = 0, num_mapped_buffers = 0;
bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
/* job needs reference to this vm (released in channel_update) */
gk20a_vm_get(vm);
@@ -1898,9 +2147,19 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
gk20a_channel_timeout_start(c, job);
spin_lock(&c->jobs_lock);
list_add_tail(&job->list, &c->jobs);
spin_unlock(&c->jobs_lock);
if (!pre_alloc_enabled)
channel_gk20a_joblist_lock(c);
/*
* ensure all pending write complete before adding to the list.
* see corresponding rmb in gk20a_channel_clean_up_jobs() &
* gk20a_channel_abort_clean_up()
*/
wmb();
channel_gk20a_joblist_add(c, job);
if (!pre_alloc_enabled)
channel_gk20a_joblist_unlock(c);
} else {
err = -ETIMEDOUT;
goto err_put_buffers;
@@ -1945,14 +2204,20 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work)
while (1) {
bool completed;
spin_lock(&c->jobs_lock);
if (list_empty(&c->jobs)) {
spin_unlock(&c->jobs_lock);
channel_gk20a_joblist_lock(c);
if (channel_gk20a_joblist_is_empty(c)) {
channel_gk20a_joblist_unlock(c);
break;
}
job = list_first_entry(&c->jobs,
struct channel_gk20a_job, list);
spin_unlock(&c->jobs_lock);
/*
* ensure that all subsequent reads occur after checking
* that we have a valid node. see corresponding wmb in
* gk20a_channel_add_job().
*/
rmb();
job = channel_gk20a_joblist_peek(c);
channel_gk20a_joblist_unlock(c);
completed = gk20a_fence_is_expired(job->post_fence);
if (!completed) {
@@ -1998,9 +2263,14 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work)
* so this wouldn't get freed here. */
gk20a_channel_put(c);
spin_lock(&c->jobs_lock);
list_del_init(&job->list);
spin_unlock(&c->jobs_lock);
/*
* ensure all pending writes complete before deleting the node.
* see corresponding rmb in channel_gk20a_alloc_job().
*/
wmb();
channel_gk20a_joblist_lock(c);
channel_gk20a_joblist_delete(c, job);
channel_gk20a_joblist_unlock(c);
channel_gk20a_free_job(c, job);
job_finished = 1;
@@ -2160,6 +2430,7 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
int wait_fence_fd = -1;
int err = 0;
bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
/*
* If user wants to always allocate sync_fence_fds then respect that;
@@ -2197,9 +2468,10 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
* this condition.
*/
if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
job->pre_fence = gk20a_alloc_fence(c);
if (!pre_alloc_enabled)
job->wait_cmd = kzalloc(sizeof(struct priv_cmd_entry),
GFP_KERNEL);
job->pre_fence = gk20a_alloc_fence(c);
if (!job->wait_cmd || !job->pre_fence) {
err = -ENOMEM;
@@ -2233,8 +2505,10 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
* is used to keep track of method completion for idle railgating. The
* sync_pt/semaphore PB is added to the GPFIFO later on in submit.
*/
job->incr_cmd = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
job->post_fence = gk20a_alloc_fence(c);
if (!pre_alloc_enabled)
job->incr_cmd = kzalloc(sizeof(struct priv_cmd_entry),
GFP_KERNEL);
if (!job->incr_cmd || !job->post_fence) {
err = -ENOMEM;
@@ -2256,15 +2530,17 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
return 0;
clean_up_post_fence:
gk20a_free_priv_cmdbuf(c, job->incr_cmd);
gk20a_fence_put(job->post_fence);
job->incr_cmd = NULL;
job->post_fence = NULL;
free_priv_cmdbuf(c, job->incr_cmd);
if (!pre_alloc_enabled)
job->incr_cmd = NULL;
clean_up_pre_fence:
gk20a_free_priv_cmdbuf(c, job->wait_cmd);
gk20a_fence_put(job->pre_fence);
job->wait_cmd = NULL;
job->pre_fence = NULL;
free_priv_cmdbuf(c, job->wait_cmd);
if (!pre_alloc_enabled)
job->wait_cmd = NULL;
*wait_cmd = NULL;
*pre_fence = NULL;
fail:
@@ -2388,11 +2664,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
}
if (need_job_tracking) {
job = channel_gk20a_alloc_job(c);
if (!job) {
err = -ENOMEM;
err = channel_gk20a_alloc_job(c, &job);
if (err)
goto clean_up;
}
err = gk20a_submit_prepare_syncs(c, fence, job,
&wait_cmd, &incr_cmd,
@@ -2463,13 +2737,14 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
init_waitqueue_head(&c->ref_count_dec_wq);
mutex_init(&c->ioctl_lock);
mutex_init(&c->error_notifier_mutex);
spin_lock_init(&c->jobs_lock);
spin_lock_init(&c->joblist.dynamic.lock);
mutex_init(&c->joblist.pre_alloc.read_lock);
raw_spin_lock_init(&c->timeout.lock);
mutex_init(&c->sync_lock);
INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler);
INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_jobs);
mutex_init(&c->clean_up.lock);
INIT_LIST_HEAD(&c->jobs);
INIT_LIST_HEAD(&c->joblist.dynamic.jobs);
#if defined(CONFIG_GK20A_CYCLE_STATS)
mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
mutex_init(&c->cs_client_mutex);
@@ -3119,7 +3394,7 @@ long gk20a_channel_ioctl(struct file *filp,
(struct nvgpu_free_obj_ctx_args *)buf);
gk20a_idle(dev);
break;
case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO:
case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX:
err = gk20a_busy(dev);
if (err) {
dev_err(dev,
@@ -3128,9 +3403,34 @@ long gk20a_channel_ioctl(struct file *filp,
break;
}
err = gk20a_alloc_channel_gpfifo(ch,
(struct nvgpu_alloc_gpfifo_args *)buf);
(struct nvgpu_alloc_gpfifo_ex_args *)buf);
gk20a_idle(dev);
break;
case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO:
{
struct nvgpu_alloc_gpfifo_ex_args alloc_gpfifo_ex_args;
struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args =
(struct nvgpu_alloc_gpfifo_args *)buf;
err = gk20a_busy(dev);
if (err) {
dev_err(dev,
"%s: failed to host gk20a for ioctl cmd: 0x%x",
__func__, cmd);
break;
}
/* prepare new args structure */
memset(&alloc_gpfifo_ex_args, 0,
sizeof(struct nvgpu_alloc_gpfifo_ex_args));
alloc_gpfifo_ex_args.num_entries =
alloc_gpfifo_args->num_entries;
alloc_gpfifo_ex_args.flags = alloc_gpfifo_args->flags;
err = gk20a_alloc_channel_gpfifo(ch, &alloc_gpfifo_ex_args);
gk20a_idle(dev);
break;
}
case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO:
err = gk20a_ioctl_channel_submit_gpfifo(ch,
(struct nvgpu_submit_gpfifo_args *)buf);

View File

@@ -70,6 +70,22 @@ struct channel_gk20a_job {
struct list_head list;
};
struct channel_gk20a_joblist {
struct {
bool enabled;
unsigned int length;
unsigned int put;
unsigned int get;
struct channel_gk20a_job *jobs;
struct mutex read_lock;
} pre_alloc;
struct {
struct list_head jobs;
spinlock_t lock;
} dynamic;
};
struct channel_gk20a_timeout {
struct delayed_work wq;
raw_spinlock_t lock;
@@ -115,6 +131,7 @@ struct channel_gk20a {
bool bound;
bool first_init;
bool vpr;
bool no_block;
bool cde;
pid_t pid;
pid_t tgid;
@@ -123,8 +140,8 @@ struct channel_gk20a {
int tsgid;
struct list_head ch_entry; /* channel's entry in TSG */
struct list_head jobs;
spinlock_t jobs_lock;
struct channel_gk20a_joblist joblist;
struct gk20a_allocator fence_allocator;
struct vm_gk20a *vm;
@@ -272,7 +289,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
bool force_need_sync_fence);
int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
struct nvgpu_alloc_gpfifo_args *args);
struct nvgpu_alloc_gpfifo_ex_args *args);
void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a);
void channel_gk20a_disable(struct channel_gk20a *ch);
@@ -284,6 +301,11 @@ int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
void channel_gk20a_enable(struct channel_gk20a *ch);
void gk20a_channel_timeout_restart_all_channels(struct gk20a *g);
bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c);
void channel_gk20a_joblist_lock(struct channel_gk20a *c);
void channel_gk20a_joblist_unlock(struct channel_gk20a *c);
bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c);
int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
int timeslice_period,
int *__timeslice_timeout, int *__timeslice_scale);

View File

@@ -47,6 +47,11 @@ static void gk20a_fence_free(struct kref *ref)
#endif
if (f->semaphore)
gk20a_semaphore_put(f->semaphore);
if (f->allocator) {
if (gk20a_alloc_initialized(f->allocator))
gk20a_free(f->allocator, (u64)f);
} else
kfree(f);
}
@@ -109,15 +114,66 @@ int gk20a_fence_install_fd(struct gk20a_fence *f)
#endif
}
int gk20a_alloc_fence_pool(struct channel_gk20a *c, int count)
{
int err;
size_t size;
struct gk20a_fence *fence_pool = NULL;
size = sizeof(struct gk20a_fence);
if (count <= ULONG_MAX / size) {
size = count * size;
fence_pool = vzalloc(size);
}
if (!fence_pool)
return -ENOMEM;
err = gk20a_lockless_allocator_init(&c->fence_allocator,
"fence_pool", (u64)fence_pool, size,
sizeof(struct gk20a_fence), 0);
if (err)
goto fail;
return 0;
fail:
vfree(fence_pool);
return err;
}
void gk20a_free_fence_pool(struct channel_gk20a *c)
{
if (gk20a_alloc_initialized(&c->fence_allocator)) {
void *base = (void *)gk20a_alloc_base(&c->fence_allocator);
gk20a_alloc_destroy(&c->fence_allocator);
vfree(base);
}
}
struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c)
{
struct gk20a_fence *fence;
struct gk20a_fence *fence = NULL;
if (channel_gk20a_is_prealloc_enabled(c)) {
if (gk20a_alloc_initialized(&c->fence_allocator)) {
fence = (struct gk20a_fence *)
gk20a_alloc(&c->fence_allocator,
sizeof(struct gk20a_fence));
/* clear the node and reset the allocator pointer */
if (fence) {
memset(fence, 0, sizeof(*fence));
fence->allocator = &c->fence_allocator;
}
}
} else
fence = kzalloc(sizeof(struct gk20a_fence), GFP_KERNEL);
if (!fence)
return NULL;
if (fence)
kref_init(&fence->ref);
return fence;
}

View File

@@ -3,7 +3,7 @@
*
* GK20A Fences
*
* Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -45,6 +45,9 @@ struct gk20a_fence {
struct platform_device *host1x_pdev;
u32 syncpt_id;
u32 syncpt_value;
/* Valid for fences part of a pre-allocated fence pool */
struct gk20a_allocator *allocator;
};
/* Fences can be created from semaphores or syncpoint (id, value) pairs */
@@ -62,7 +65,15 @@ int gk20a_fence_from_syncpt(
u32 id, u32 value, bool wfi,
bool need_sync_fence);
struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c);
int gk20a_alloc_fence_pool(
struct channel_gk20a *c,
int size);
void gk20a_free_fence_pool(
struct channel_gk20a *c);
struct gk20a_fence *gk20a_alloc_fence(
struct channel_gk20a *c);
void gk20a_init_fence(struct gk20a_fence *f,
const struct gk20a_fence_ops *ops,

View File

@@ -963,7 +963,13 @@ struct nvgpu_alloc_gpfifo_args {
__u32 num_entries;
#define NVGPU_ALLOC_GPFIFO_FLAGS_VPR_ENABLED (1 << 0) /* set owner channel of this gpfifo as a vpr channel */
__u32 flags;
};
struct nvgpu_alloc_gpfifo_ex_args {
__u32 num_entries;
__u32 num_inflight_jobs;
__u32 flags;
__u32 reserved[5];
};
struct gk20a_sync_pt_info {
@@ -1182,6 +1188,8 @@ struct nvgpu_preemption_mode_args {
_IOWR(NVGPU_IOCTL_MAGIC, 18, struct nvgpu_set_timeout_ex_args)
#define NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO \
_IOW(NVGPU_IOCTL_MAGIC, 100, struct nvgpu_alloc_gpfifo_args)
#define NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX \
_IOW(NVGPU_IOCTL_MAGIC, 101, struct nvgpu_alloc_gpfifo_ex_args)
#define NVGPU_IOCTL_CHANNEL_WAIT \
_IOWR(NVGPU_IOCTL_MAGIC, 102, struct nvgpu_wait_args)
#define NVGPU_IOCTL_CHANNEL_CYCLE_STATS \