mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 01:50:07 +03:00
gpu: nvgpu: add support for pre-allocated resources
Add support for pre-allocation of job tracking resources w/ new (extended) ioctl. Goal is to avoid dynamic memory allocation in the submit path. This patch does the following: 1) Intoduces a new ioctl, NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX, which enables pre-allocation of tracking resources per job: a) 2x priv_cmd_entry b) 2x gk20a_fence 2) Implements circular ring buffer for job tracking to avoid lock contention between producer (submitter) and consumer (clean-up) Bug 1795076 Change-Id: I6b52e5c575871107ff380f9a5790f440a6969347 Signed-off-by: Sachit Kadle <skadle@nvidia.com> Reviewed-on: http://git-master/r/1203300 (cherry picked from commit 9fd270c22b860935dffe244753dabd87454bef39) Reviewed-on: http://git-master/r/1223934 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
63e8592e06
commit
733fb79b39
@@ -1126,9 +1126,9 @@ __releases(&cde_app->mutex)
|
||||
struct gk20a_cde_app *cde_app = &g->cde_app;
|
||||
bool channel_idle;
|
||||
|
||||
spin_lock(&ch->jobs_lock);
|
||||
channel_idle = list_empty(&ch->jobs);
|
||||
spin_unlock(&ch->jobs_lock);
|
||||
channel_gk20a_joblist_lock(ch);
|
||||
channel_idle = channel_gk20a_joblist_is_empty(ch);
|
||||
channel_gk20a_joblist_unlock(ch);
|
||||
|
||||
if (!channel_idle)
|
||||
return;
|
||||
@@ -1207,7 +1207,7 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
|
||||
|
||||
/* allocate gpfifo (1024 should be more than enough) */
|
||||
err = gk20a_alloc_channel_gpfifo(ch,
|
||||
&(struct nvgpu_alloc_gpfifo_args){1024, 0});
|
||||
&(struct nvgpu_alloc_gpfifo_ex_args){1024, 0, 0, {}});
|
||||
if (err) {
|
||||
gk20a_warn(cde_ctx->dev, "cde: unable to allocate gpfifo");
|
||||
goto err_alloc_gpfifo;
|
||||
|
||||
@@ -126,9 +126,9 @@ static void gk20a_ce_finished_ctx_cb(struct channel_gk20a *ch, void *data)
|
||||
bool channel_idle;
|
||||
u32 event;
|
||||
|
||||
spin_lock(&ch->jobs_lock);
|
||||
channel_idle = list_empty(&ch->jobs);
|
||||
spin_unlock(&ch->jobs_lock);
|
||||
channel_gk20a_joblist_lock(ch);
|
||||
channel_idle = channel_gk20a_joblist_is_empty(ch);
|
||||
channel_gk20a_joblist_unlock(ch);
|
||||
|
||||
if (!channel_idle)
|
||||
return;
|
||||
@@ -462,7 +462,7 @@ u32 gk20a_ce_create_context_with_cb(struct device *dev,
|
||||
|
||||
/* allocate gpfifo (1024 should be more than enough) */
|
||||
err = gk20a_alloc_channel_gpfifo(ce_ctx->ch,
|
||||
&(struct nvgpu_alloc_gpfifo_args){1024, 0});
|
||||
&(struct nvgpu_alloc_gpfifo_ex_args){1024, 0, 0, {}});
|
||||
if (err) {
|
||||
gk20a_err(ce_ctx->dev, "ce: unable to allocate gpfifo");
|
||||
goto end;
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include <linux/anon_inodes.h>
|
||||
#include <linux/dma-buf.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/circ_buf.h>
|
||||
|
||||
#include "debug_gk20a.h"
|
||||
#include "ctxsw_trace_gk20a.h"
|
||||
@@ -55,6 +56,15 @@ static void free_priv_cmdbuf(struct channel_gk20a *c,
|
||||
static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
|
||||
static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
|
||||
|
||||
static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c);
|
||||
|
||||
static void channel_gk20a_joblist_add(struct channel_gk20a *c,
|
||||
struct channel_gk20a_job *job);
|
||||
static void channel_gk20a_joblist_delete(struct channel_gk20a *c,
|
||||
struct channel_gk20a_job *job);
|
||||
static struct channel_gk20a_job *channel_gk20a_joblist_peek(
|
||||
struct channel_gk20a *c);
|
||||
|
||||
static int channel_gk20a_commit_userd(struct channel_gk20a *c);
|
||||
static int channel_gk20a_setup_userd(struct channel_gk20a *c);
|
||||
|
||||
@@ -460,6 +470,7 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
|
||||
{
|
||||
struct channel_gk20a_job *job, *n;
|
||||
bool released_job_semaphore = false;
|
||||
bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(ch);
|
||||
|
||||
gk20a_channel_cancel_job_clean_up(ch, true);
|
||||
|
||||
@@ -471,14 +482,37 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
|
||||
|
||||
/* release all job semaphores (applies only to jobs that use
|
||||
semaphore synchronization) */
|
||||
spin_lock(&ch->jobs_lock);
|
||||
list_for_each_entry_safe(job, n, &ch->jobs, list) {
|
||||
channel_gk20a_joblist_lock(ch);
|
||||
if (pre_alloc_enabled) {
|
||||
int tmp_get = ch->joblist.pre_alloc.get;
|
||||
int put = ch->joblist.pre_alloc.put;
|
||||
|
||||
/*
|
||||
* ensure put is read before any subsequent reads.
|
||||
* see corresponding wmb in gk20a_channel_add_job()
|
||||
*/
|
||||
rmb();
|
||||
|
||||
while (tmp_get != put) {
|
||||
job = &ch->joblist.pre_alloc.jobs[tmp_get];
|
||||
if (job->post_fence->semaphore) {
|
||||
gk20a_semaphore_release(job->post_fence->semaphore);
|
||||
gk20a_semaphore_release(
|
||||
job->post_fence->semaphore);
|
||||
released_job_semaphore = true;
|
||||
}
|
||||
tmp_get = (tmp_get + 1) % ch->joblist.pre_alloc.length;
|
||||
}
|
||||
} else {
|
||||
list_for_each_entry_safe(job, n,
|
||||
&ch->joblist.dynamic.jobs, list) {
|
||||
if (job->post_fence->semaphore) {
|
||||
gk20a_semaphore_release(
|
||||
job->post_fence->semaphore);
|
||||
released_job_semaphore = true;
|
||||
}
|
||||
}
|
||||
spin_unlock(&ch->jobs_lock);
|
||||
}
|
||||
channel_gk20a_joblist_unlock(ch);
|
||||
|
||||
if (released_job_semaphore)
|
||||
wake_up_interruptible_all(&ch->semaphore_wq);
|
||||
@@ -511,9 +545,9 @@ int gk20a_wait_channel_idle(struct channel_gk20a *ch)
|
||||
msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
|
||||
|
||||
do {
|
||||
spin_lock(&ch->jobs_lock);
|
||||
channel_idle = list_empty(&ch->jobs);
|
||||
spin_unlock(&ch->jobs_lock);
|
||||
channel_gk20a_joblist_lock(ch);
|
||||
channel_idle = channel_gk20a_joblist_is_empty(ch);
|
||||
channel_gk20a_joblist_unlock(ch);
|
||||
if (channel_idle)
|
||||
break;
|
||||
|
||||
@@ -1016,6 +1050,10 @@ unbind:
|
||||
|
||||
mutex_unlock(&g->dbg_sessions_lock);
|
||||
|
||||
/* free pre-allocated resources, if applicable */
|
||||
if (channel_gk20a_is_prealloc_enabled(ch))
|
||||
channel_gk20a_free_prealloc_resources(ch);
|
||||
|
||||
/* make sure we catch accesses of unopened channels in case
|
||||
* there's non-refcounted channel pointers hanging around */
|
||||
ch->g = NULL;
|
||||
@@ -1422,7 +1460,10 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
|
||||
/* we already handled q->put + size > q->size so BUG_ON this */
|
||||
BUG_ON(q->put > q->size);
|
||||
|
||||
/* commit the previous writes before making the entry valid */
|
||||
/*
|
||||
* commit the previous writes before making the entry valid.
|
||||
* see the corresponding rmb() in gk20a_free_priv_cmdbuf().
|
||||
*/
|
||||
wmb();
|
||||
|
||||
e->valid = true;
|
||||
@@ -1436,26 +1477,222 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
|
||||
static void free_priv_cmdbuf(struct channel_gk20a *c,
|
||||
struct priv_cmd_entry *e)
|
||||
{
|
||||
if (channel_gk20a_is_prealloc_enabled(c))
|
||||
memset(e, 0, sizeof(struct priv_cmd_entry));
|
||||
else
|
||||
kfree(e);
|
||||
}
|
||||
|
||||
static struct channel_gk20a_job *channel_gk20a_alloc_job(
|
||||
struct channel_gk20a *c)
|
||||
static int channel_gk20a_alloc_job(struct channel_gk20a *c,
|
||||
struct channel_gk20a_job **job_out)
|
||||
{
|
||||
struct channel_gk20a_job *job = NULL;
|
||||
int err = 0;
|
||||
|
||||
job = kzalloc(sizeof(*job), GFP_KERNEL);
|
||||
return job;
|
||||
if (channel_gk20a_is_prealloc_enabled(c)) {
|
||||
int put = c->joblist.pre_alloc.put;
|
||||
int get = c->joblist.pre_alloc.get;
|
||||
|
||||
/*
|
||||
* ensure all subsequent reads happen after reading get.
|
||||
* see corresponding wmb in gk20a_channel_clean_up_jobs()
|
||||
*/
|
||||
rmb();
|
||||
|
||||
if (CIRC_SPACE(put, get, c->joblist.pre_alloc.length))
|
||||
*job_out = &c->joblist.pre_alloc.jobs[put];
|
||||
else {
|
||||
gk20a_warn(dev_from_gk20a(c->g),
|
||||
"out of job ringbuffer space\n");
|
||||
err = -EAGAIN;
|
||||
}
|
||||
} else {
|
||||
*job_out = kzalloc(sizeof(struct channel_gk20a_job),
|
||||
GFP_KERNEL);
|
||||
if (!job_out)
|
||||
err = -ENOMEM;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void channel_gk20a_free_job(struct channel_gk20a *c,
|
||||
struct channel_gk20a_job *job)
|
||||
{
|
||||
/*
|
||||
* In case of pre_allocated jobs, we need to clean out
|
||||
* the job but maintain the pointers to the priv_cmd_entry,
|
||||
* since they're inherently tied to the job node.
|
||||
*/
|
||||
if (channel_gk20a_is_prealloc_enabled(c)) {
|
||||
struct priv_cmd_entry *wait_cmd = job->wait_cmd;
|
||||
struct priv_cmd_entry *incr_cmd = job->incr_cmd;
|
||||
memset(job, 0, sizeof(*job));
|
||||
job->wait_cmd = wait_cmd;
|
||||
job->incr_cmd = incr_cmd;
|
||||
} else
|
||||
kfree(job);
|
||||
}
|
||||
|
||||
void channel_gk20a_joblist_lock(struct channel_gk20a *c)
|
||||
{
|
||||
if (channel_gk20a_is_prealloc_enabled(c))
|
||||
mutex_lock(&c->joblist.pre_alloc.read_lock);
|
||||
else
|
||||
spin_lock(&c->joblist.dynamic.lock);
|
||||
}
|
||||
|
||||
void channel_gk20a_joblist_unlock(struct channel_gk20a *c)
|
||||
{
|
||||
if (channel_gk20a_is_prealloc_enabled(c))
|
||||
mutex_unlock(&c->joblist.pre_alloc.read_lock);
|
||||
else
|
||||
spin_unlock(&c->joblist.dynamic.lock);
|
||||
}
|
||||
|
||||
static struct channel_gk20a_job *channel_gk20a_joblist_peek(
|
||||
struct channel_gk20a *c)
|
||||
{
|
||||
int get;
|
||||
struct channel_gk20a_job *job = NULL;
|
||||
|
||||
if (channel_gk20a_is_prealloc_enabled(c)) {
|
||||
if (!channel_gk20a_joblist_is_empty(c)) {
|
||||
get = c->joblist.pre_alloc.get;
|
||||
job = &c->joblist.pre_alloc.jobs[get];
|
||||
}
|
||||
} else {
|
||||
if (!list_empty(&c->joblist.dynamic.jobs))
|
||||
job = list_first_entry(&c->joblist.dynamic.jobs,
|
||||
struct channel_gk20a_job, list);
|
||||
}
|
||||
|
||||
return job;
|
||||
}
|
||||
|
||||
static void channel_gk20a_joblist_add(struct channel_gk20a *c,
|
||||
struct channel_gk20a_job *job)
|
||||
{
|
||||
if (channel_gk20a_is_prealloc_enabled(c)) {
|
||||
c->joblist.pre_alloc.put = (c->joblist.pre_alloc.put + 1) %
|
||||
(c->joblist.pre_alloc.length);
|
||||
} else {
|
||||
list_add_tail(&job->list, &c->joblist.dynamic.jobs);
|
||||
}
|
||||
}
|
||||
|
||||
static void channel_gk20a_joblist_delete(struct channel_gk20a *c,
|
||||
struct channel_gk20a_job *job)
|
||||
{
|
||||
if (channel_gk20a_is_prealloc_enabled(c)) {
|
||||
c->joblist.pre_alloc.get = (c->joblist.pre_alloc.get + 1) %
|
||||
(c->joblist.pre_alloc.length);
|
||||
} else {
|
||||
list_del_init(&job->list);
|
||||
}
|
||||
}
|
||||
|
||||
bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c)
|
||||
{
|
||||
if (channel_gk20a_is_prealloc_enabled(c)) {
|
||||
int get = c->joblist.pre_alloc.get;
|
||||
int put = c->joblist.pre_alloc.put;
|
||||
return !(CIRC_CNT(put, get, c->joblist.pre_alloc.length));
|
||||
}
|
||||
|
||||
return list_empty(&c->joblist.dynamic.jobs);
|
||||
}
|
||||
|
||||
bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c)
|
||||
{
|
||||
bool pre_alloc_enabled = c->joblist.pre_alloc.enabled;
|
||||
|
||||
rmb();
|
||||
return pre_alloc_enabled;
|
||||
}
|
||||
|
||||
static int channel_gk20a_prealloc_resources(struct channel_gk20a *c,
|
||||
unsigned int num_jobs)
|
||||
{
|
||||
int i, err;
|
||||
size_t size;
|
||||
struct priv_cmd_entry *entries = NULL;
|
||||
|
||||
if (channel_gk20a_is_prealloc_enabled(c) || !num_jobs)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* pre-allocate the job list.
|
||||
* since vmalloc take in an unsigned long, we need
|
||||
* to make sure we don't hit an overflow condition
|
||||
*/
|
||||
size = sizeof(struct channel_gk20a_job);
|
||||
if (num_jobs <= ULONG_MAX / size)
|
||||
c->joblist.pre_alloc.jobs = vzalloc(num_jobs * size);
|
||||
if (!c->joblist.pre_alloc.jobs) {
|
||||
err = -ENOMEM;
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
/*
|
||||
* pre-allocate 2x priv_cmd_entry for each job up front.
|
||||
* since vmalloc take in an unsigned long, we need
|
||||
* to make sure we don't hit an overflow condition
|
||||
*/
|
||||
size = sizeof(struct priv_cmd_entry);
|
||||
if (num_jobs <= ULONG_MAX / (size << 1))
|
||||
entries = vzalloc((num_jobs << 1) * size);
|
||||
if (!entries) {
|
||||
err = -ENOMEM;
|
||||
goto clean_up_joblist;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_jobs; i++) {
|
||||
c->joblist.pre_alloc.jobs[i].wait_cmd = &entries[i];
|
||||
c->joblist.pre_alloc.jobs[i].incr_cmd =
|
||||
&entries[i + num_jobs];
|
||||
}
|
||||
|
||||
/* pre-allocate a fence pool */
|
||||
err = gk20a_alloc_fence_pool(c, num_jobs);
|
||||
if (err)
|
||||
goto clean_up_priv_cmd;
|
||||
|
||||
c->joblist.pre_alloc.length = num_jobs;
|
||||
|
||||
/*
|
||||
* commit the previous writes before setting the flag.
|
||||
* see corresponding rmb in channel_gk20a_is_prealloc_enabled()
|
||||
*/
|
||||
wmb();
|
||||
c->joblist.pre_alloc.enabled = true;
|
||||
|
||||
return 0;
|
||||
|
||||
clean_up_priv_cmd:
|
||||
vfree(entries);
|
||||
clean_up_joblist:
|
||||
vfree(c->joblist.pre_alloc.jobs);
|
||||
clean_up:
|
||||
memset(&c->joblist.pre_alloc, 0, sizeof(c->joblist.pre_alloc));
|
||||
return err;
|
||||
}
|
||||
|
||||
static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c)
|
||||
{
|
||||
vfree(c->joblist.pre_alloc.jobs[0].wait_cmd);
|
||||
vfree(c->joblist.pre_alloc.jobs);
|
||||
gk20a_free_fence_pool(c);
|
||||
|
||||
/*
|
||||
* commit the previous writes before disabling the flag.
|
||||
* see corresponding rmb in channel_gk20a_is_prealloc_enabled()
|
||||
*/
|
||||
wmb();
|
||||
c->joblist.pre_alloc.enabled = false;
|
||||
}
|
||||
|
||||
int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
|
||||
struct nvgpu_alloc_gpfifo_args *args)
|
||||
struct nvgpu_alloc_gpfifo_ex_args *args)
|
||||
{
|
||||
struct gk20a *g = c->g;
|
||||
struct device *d = dev_from_gk20a(g);
|
||||
@@ -1539,19 +1776,30 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
|
||||
|
||||
/* TBD: setup engine contexts */
|
||||
|
||||
err = channel_gk20a_alloc_priv_cmdbuf(c);
|
||||
if (args->num_inflight_jobs) {
|
||||
err = channel_gk20a_prealloc_resources(c,
|
||||
args->num_inflight_jobs);
|
||||
if (err)
|
||||
goto clean_up_sync;
|
||||
}
|
||||
|
||||
err = channel_gk20a_alloc_priv_cmdbuf(c);
|
||||
if (err)
|
||||
goto clean_up_prealloc;
|
||||
|
||||
err = channel_gk20a_update_runlist(c, true);
|
||||
if (err)
|
||||
goto clean_up_sync;
|
||||
goto clean_up_priv_cmd;
|
||||
|
||||
g->ops.fifo.bind_channel(c);
|
||||
|
||||
gk20a_dbg_fn("done");
|
||||
return 0;
|
||||
|
||||
clean_up_priv_cmd:
|
||||
channel_gk20a_free_priv_cmdbuf(c);
|
||||
clean_up_prealloc:
|
||||
channel_gk20a_free_prealloc_resources(c);
|
||||
clean_up_sync:
|
||||
gk20a_channel_sync_destroy(c->sync);
|
||||
c->sync = NULL;
|
||||
@@ -1878,6 +2126,7 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
|
||||
struct vm_gk20a *vm = c->vm;
|
||||
struct mapped_buffer_node **mapped_buffers = NULL;
|
||||
int err = 0, num_mapped_buffers = 0;
|
||||
bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
|
||||
|
||||
/* job needs reference to this vm (released in channel_update) */
|
||||
gk20a_vm_get(vm);
|
||||
@@ -1898,9 +2147,19 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
|
||||
|
||||
gk20a_channel_timeout_start(c, job);
|
||||
|
||||
spin_lock(&c->jobs_lock);
|
||||
list_add_tail(&job->list, &c->jobs);
|
||||
spin_unlock(&c->jobs_lock);
|
||||
if (!pre_alloc_enabled)
|
||||
channel_gk20a_joblist_lock(c);
|
||||
|
||||
/*
|
||||
* ensure all pending write complete before adding to the list.
|
||||
* see corresponding rmb in gk20a_channel_clean_up_jobs() &
|
||||
* gk20a_channel_abort_clean_up()
|
||||
*/
|
||||
wmb();
|
||||
channel_gk20a_joblist_add(c, job);
|
||||
|
||||
if (!pre_alloc_enabled)
|
||||
channel_gk20a_joblist_unlock(c);
|
||||
} else {
|
||||
err = -ETIMEDOUT;
|
||||
goto err_put_buffers;
|
||||
@@ -1945,14 +2204,20 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work)
|
||||
while (1) {
|
||||
bool completed;
|
||||
|
||||
spin_lock(&c->jobs_lock);
|
||||
if (list_empty(&c->jobs)) {
|
||||
spin_unlock(&c->jobs_lock);
|
||||
channel_gk20a_joblist_lock(c);
|
||||
if (channel_gk20a_joblist_is_empty(c)) {
|
||||
channel_gk20a_joblist_unlock(c);
|
||||
break;
|
||||
}
|
||||
job = list_first_entry(&c->jobs,
|
||||
struct channel_gk20a_job, list);
|
||||
spin_unlock(&c->jobs_lock);
|
||||
|
||||
/*
|
||||
* ensure that all subsequent reads occur after checking
|
||||
* that we have a valid node. see corresponding wmb in
|
||||
* gk20a_channel_add_job().
|
||||
*/
|
||||
rmb();
|
||||
job = channel_gk20a_joblist_peek(c);
|
||||
channel_gk20a_joblist_unlock(c);
|
||||
|
||||
completed = gk20a_fence_is_expired(job->post_fence);
|
||||
if (!completed) {
|
||||
@@ -1998,9 +2263,14 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work)
|
||||
* so this wouldn't get freed here. */
|
||||
gk20a_channel_put(c);
|
||||
|
||||
spin_lock(&c->jobs_lock);
|
||||
list_del_init(&job->list);
|
||||
spin_unlock(&c->jobs_lock);
|
||||
/*
|
||||
* ensure all pending writes complete before deleting the node.
|
||||
* see corresponding rmb in channel_gk20a_alloc_job().
|
||||
*/
|
||||
wmb();
|
||||
channel_gk20a_joblist_lock(c);
|
||||
channel_gk20a_joblist_delete(c, job);
|
||||
channel_gk20a_joblist_unlock(c);
|
||||
|
||||
channel_gk20a_free_job(c, job);
|
||||
job_finished = 1;
|
||||
@@ -2160,6 +2430,7 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
|
||||
int wait_fence_fd = -1;
|
||||
int err = 0;
|
||||
bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
|
||||
bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
|
||||
|
||||
/*
|
||||
* If user wants to always allocate sync_fence_fds then respect that;
|
||||
@@ -2197,9 +2468,10 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
|
||||
* this condition.
|
||||
*/
|
||||
if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
|
||||
job->pre_fence = gk20a_alloc_fence(c);
|
||||
if (!pre_alloc_enabled)
|
||||
job->wait_cmd = kzalloc(sizeof(struct priv_cmd_entry),
|
||||
GFP_KERNEL);
|
||||
job->pre_fence = gk20a_alloc_fence(c);
|
||||
|
||||
if (!job->wait_cmd || !job->pre_fence) {
|
||||
err = -ENOMEM;
|
||||
@@ -2233,8 +2505,10 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
|
||||
* is used to keep track of method completion for idle railgating. The
|
||||
* sync_pt/semaphore PB is added to the GPFIFO later on in submit.
|
||||
*/
|
||||
job->incr_cmd = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
|
||||
job->post_fence = gk20a_alloc_fence(c);
|
||||
if (!pre_alloc_enabled)
|
||||
job->incr_cmd = kzalloc(sizeof(struct priv_cmd_entry),
|
||||
GFP_KERNEL);
|
||||
|
||||
if (!job->incr_cmd || !job->post_fence) {
|
||||
err = -ENOMEM;
|
||||
@@ -2256,15 +2530,17 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
|
||||
return 0;
|
||||
|
||||
clean_up_post_fence:
|
||||
gk20a_free_priv_cmdbuf(c, job->incr_cmd);
|
||||
gk20a_fence_put(job->post_fence);
|
||||
job->incr_cmd = NULL;
|
||||
job->post_fence = NULL;
|
||||
free_priv_cmdbuf(c, job->incr_cmd);
|
||||
if (!pre_alloc_enabled)
|
||||
job->incr_cmd = NULL;
|
||||
clean_up_pre_fence:
|
||||
gk20a_free_priv_cmdbuf(c, job->wait_cmd);
|
||||
gk20a_fence_put(job->pre_fence);
|
||||
job->wait_cmd = NULL;
|
||||
job->pre_fence = NULL;
|
||||
free_priv_cmdbuf(c, job->wait_cmd);
|
||||
if (!pre_alloc_enabled)
|
||||
job->wait_cmd = NULL;
|
||||
*wait_cmd = NULL;
|
||||
*pre_fence = NULL;
|
||||
fail:
|
||||
@@ -2388,11 +2664,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
||||
}
|
||||
|
||||
if (need_job_tracking) {
|
||||
job = channel_gk20a_alloc_job(c);
|
||||
if (!job) {
|
||||
err = -ENOMEM;
|
||||
err = channel_gk20a_alloc_job(c, &job);
|
||||
if (err)
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
err = gk20a_submit_prepare_syncs(c, fence, job,
|
||||
&wait_cmd, &incr_cmd,
|
||||
@@ -2463,13 +2737,14 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
|
||||
init_waitqueue_head(&c->ref_count_dec_wq);
|
||||
mutex_init(&c->ioctl_lock);
|
||||
mutex_init(&c->error_notifier_mutex);
|
||||
spin_lock_init(&c->jobs_lock);
|
||||
spin_lock_init(&c->joblist.dynamic.lock);
|
||||
mutex_init(&c->joblist.pre_alloc.read_lock);
|
||||
raw_spin_lock_init(&c->timeout.lock);
|
||||
mutex_init(&c->sync_lock);
|
||||
INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler);
|
||||
INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_jobs);
|
||||
mutex_init(&c->clean_up.lock);
|
||||
INIT_LIST_HEAD(&c->jobs);
|
||||
INIT_LIST_HEAD(&c->joblist.dynamic.jobs);
|
||||
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
||||
mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
|
||||
mutex_init(&c->cs_client_mutex);
|
||||
@@ -3119,7 +3394,7 @@ long gk20a_channel_ioctl(struct file *filp,
|
||||
(struct nvgpu_free_obj_ctx_args *)buf);
|
||||
gk20a_idle(dev);
|
||||
break;
|
||||
case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO:
|
||||
case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX:
|
||||
err = gk20a_busy(dev);
|
||||
if (err) {
|
||||
dev_err(dev,
|
||||
@@ -3128,9 +3403,34 @@ long gk20a_channel_ioctl(struct file *filp,
|
||||
break;
|
||||
}
|
||||
err = gk20a_alloc_channel_gpfifo(ch,
|
||||
(struct nvgpu_alloc_gpfifo_args *)buf);
|
||||
(struct nvgpu_alloc_gpfifo_ex_args *)buf);
|
||||
gk20a_idle(dev);
|
||||
break;
|
||||
case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO:
|
||||
{
|
||||
struct nvgpu_alloc_gpfifo_ex_args alloc_gpfifo_ex_args;
|
||||
struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args =
|
||||
(struct nvgpu_alloc_gpfifo_args *)buf;
|
||||
|
||||
err = gk20a_busy(dev);
|
||||
if (err) {
|
||||
dev_err(dev,
|
||||
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
||||
__func__, cmd);
|
||||
break;
|
||||
}
|
||||
|
||||
/* prepare new args structure */
|
||||
memset(&alloc_gpfifo_ex_args, 0,
|
||||
sizeof(struct nvgpu_alloc_gpfifo_ex_args));
|
||||
alloc_gpfifo_ex_args.num_entries =
|
||||
alloc_gpfifo_args->num_entries;
|
||||
alloc_gpfifo_ex_args.flags = alloc_gpfifo_args->flags;
|
||||
|
||||
err = gk20a_alloc_channel_gpfifo(ch, &alloc_gpfifo_ex_args);
|
||||
gk20a_idle(dev);
|
||||
break;
|
||||
}
|
||||
case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO:
|
||||
err = gk20a_ioctl_channel_submit_gpfifo(ch,
|
||||
(struct nvgpu_submit_gpfifo_args *)buf);
|
||||
|
||||
@@ -70,6 +70,22 @@ struct channel_gk20a_job {
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
struct channel_gk20a_joblist {
|
||||
struct {
|
||||
bool enabled;
|
||||
unsigned int length;
|
||||
unsigned int put;
|
||||
unsigned int get;
|
||||
struct channel_gk20a_job *jobs;
|
||||
struct mutex read_lock;
|
||||
} pre_alloc;
|
||||
|
||||
struct {
|
||||
struct list_head jobs;
|
||||
spinlock_t lock;
|
||||
} dynamic;
|
||||
};
|
||||
|
||||
struct channel_gk20a_timeout {
|
||||
struct delayed_work wq;
|
||||
raw_spinlock_t lock;
|
||||
@@ -115,6 +131,7 @@ struct channel_gk20a {
|
||||
bool bound;
|
||||
bool first_init;
|
||||
bool vpr;
|
||||
bool no_block;
|
||||
bool cde;
|
||||
pid_t pid;
|
||||
pid_t tgid;
|
||||
@@ -123,8 +140,8 @@ struct channel_gk20a {
|
||||
int tsgid;
|
||||
struct list_head ch_entry; /* channel's entry in TSG */
|
||||
|
||||
struct list_head jobs;
|
||||
spinlock_t jobs_lock;
|
||||
struct channel_gk20a_joblist joblist;
|
||||
struct gk20a_allocator fence_allocator;
|
||||
|
||||
struct vm_gk20a *vm;
|
||||
|
||||
@@ -272,7 +289,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
||||
bool force_need_sync_fence);
|
||||
|
||||
int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
|
||||
struct nvgpu_alloc_gpfifo_args *args);
|
||||
struct nvgpu_alloc_gpfifo_ex_args *args);
|
||||
|
||||
void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a);
|
||||
void channel_gk20a_disable(struct channel_gk20a *ch);
|
||||
@@ -284,6 +301,11 @@ int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
|
||||
void channel_gk20a_enable(struct channel_gk20a *ch);
|
||||
void gk20a_channel_timeout_restart_all_channels(struct gk20a *g);
|
||||
|
||||
bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c);
|
||||
void channel_gk20a_joblist_lock(struct channel_gk20a *c);
|
||||
void channel_gk20a_joblist_unlock(struct channel_gk20a *c);
|
||||
bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c);
|
||||
|
||||
int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
|
||||
int timeslice_period,
|
||||
int *__timeslice_timeout, int *__timeslice_scale);
|
||||
|
||||
@@ -47,6 +47,11 @@ static void gk20a_fence_free(struct kref *ref)
|
||||
#endif
|
||||
if (f->semaphore)
|
||||
gk20a_semaphore_put(f->semaphore);
|
||||
|
||||
if (f->allocator) {
|
||||
if (gk20a_alloc_initialized(f->allocator))
|
||||
gk20a_free(f->allocator, (u64)f);
|
||||
} else
|
||||
kfree(f);
|
||||
}
|
||||
|
||||
@@ -109,15 +114,66 @@ int gk20a_fence_install_fd(struct gk20a_fence *f)
|
||||
#endif
|
||||
}
|
||||
|
||||
int gk20a_alloc_fence_pool(struct channel_gk20a *c, int count)
|
||||
{
|
||||
int err;
|
||||
size_t size;
|
||||
struct gk20a_fence *fence_pool = NULL;
|
||||
|
||||
size = sizeof(struct gk20a_fence);
|
||||
if (count <= ULONG_MAX / size) {
|
||||
size = count * size;
|
||||
fence_pool = vzalloc(size);
|
||||
}
|
||||
|
||||
if (!fence_pool)
|
||||
return -ENOMEM;
|
||||
|
||||
err = gk20a_lockless_allocator_init(&c->fence_allocator,
|
||||
"fence_pool", (u64)fence_pool, size,
|
||||
sizeof(struct gk20a_fence), 0);
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
vfree(fence_pool);
|
||||
return err;
|
||||
}
|
||||
|
||||
void gk20a_free_fence_pool(struct channel_gk20a *c)
|
||||
{
|
||||
if (gk20a_alloc_initialized(&c->fence_allocator)) {
|
||||
void *base = (void *)gk20a_alloc_base(&c->fence_allocator);
|
||||
|
||||
gk20a_alloc_destroy(&c->fence_allocator);
|
||||
vfree(base);
|
||||
}
|
||||
}
|
||||
|
||||
struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c)
|
||||
{
|
||||
struct gk20a_fence *fence;
|
||||
struct gk20a_fence *fence = NULL;
|
||||
|
||||
if (channel_gk20a_is_prealloc_enabled(c)) {
|
||||
if (gk20a_alloc_initialized(&c->fence_allocator)) {
|
||||
fence = (struct gk20a_fence *)
|
||||
gk20a_alloc(&c->fence_allocator,
|
||||
sizeof(struct gk20a_fence));
|
||||
|
||||
/* clear the node and reset the allocator pointer */
|
||||
if (fence) {
|
||||
memset(fence, 0, sizeof(*fence));
|
||||
fence->allocator = &c->fence_allocator;
|
||||
}
|
||||
}
|
||||
} else
|
||||
fence = kzalloc(sizeof(struct gk20a_fence), GFP_KERNEL);
|
||||
if (!fence)
|
||||
return NULL;
|
||||
|
||||
if (fence)
|
||||
kref_init(&fence->ref);
|
||||
|
||||
return fence;
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
*
|
||||
* GK20A Fences
|
||||
*
|
||||
* Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
@@ -45,6 +45,9 @@ struct gk20a_fence {
|
||||
struct platform_device *host1x_pdev;
|
||||
u32 syncpt_id;
|
||||
u32 syncpt_value;
|
||||
|
||||
/* Valid for fences part of a pre-allocated fence pool */
|
||||
struct gk20a_allocator *allocator;
|
||||
};
|
||||
|
||||
/* Fences can be created from semaphores or syncpoint (id, value) pairs */
|
||||
@@ -62,7 +65,15 @@ int gk20a_fence_from_syncpt(
|
||||
u32 id, u32 value, bool wfi,
|
||||
bool need_sync_fence);
|
||||
|
||||
struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c);
|
||||
int gk20a_alloc_fence_pool(
|
||||
struct channel_gk20a *c,
|
||||
int size);
|
||||
|
||||
void gk20a_free_fence_pool(
|
||||
struct channel_gk20a *c);
|
||||
|
||||
struct gk20a_fence *gk20a_alloc_fence(
|
||||
struct channel_gk20a *c);
|
||||
|
||||
void gk20a_init_fence(struct gk20a_fence *f,
|
||||
const struct gk20a_fence_ops *ops,
|
||||
|
||||
@@ -963,7 +963,13 @@ struct nvgpu_alloc_gpfifo_args {
|
||||
__u32 num_entries;
|
||||
#define NVGPU_ALLOC_GPFIFO_FLAGS_VPR_ENABLED (1 << 0) /* set owner channel of this gpfifo as a vpr channel */
|
||||
__u32 flags;
|
||||
};
|
||||
|
||||
struct nvgpu_alloc_gpfifo_ex_args {
|
||||
__u32 num_entries;
|
||||
__u32 num_inflight_jobs;
|
||||
__u32 flags;
|
||||
__u32 reserved[5];
|
||||
};
|
||||
|
||||
struct gk20a_sync_pt_info {
|
||||
@@ -1182,6 +1188,8 @@ struct nvgpu_preemption_mode_args {
|
||||
_IOWR(NVGPU_IOCTL_MAGIC, 18, struct nvgpu_set_timeout_ex_args)
|
||||
#define NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO \
|
||||
_IOW(NVGPU_IOCTL_MAGIC, 100, struct nvgpu_alloc_gpfifo_args)
|
||||
#define NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX \
|
||||
_IOW(NVGPU_IOCTL_MAGIC, 101, struct nvgpu_alloc_gpfifo_ex_args)
|
||||
#define NVGPU_IOCTL_CHANNEL_WAIT \
|
||||
_IOWR(NVGPU_IOCTL_MAGIC, 102, struct nvgpu_wait_args)
|
||||
#define NVGPU_IOCTL_CHANNEL_CYCLE_STATS \
|
||||
|
||||
Reference in New Issue
Block a user