mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 18:16:01 +03:00
In gk20a_fifo_abort_tsg(), we loop through channels of TSG and call gk20a_channel_abort() for each channel This is incorrect since we disable and preempt each channel separately, whereas we should disable all channels at once and use TSG specific API to preempt TSG Fix this with below sequence : - gk20a_disable_tsg() to disable all channels - preempt tsg if required - for each channel in TSG - set has_timedout flag - call gk20a_channel_abort_clean_up() to clean up channel state Also, separate out common gk20a_channel_abort_clean_up() API which can be called from both channel and TSG abort routines In gk20a_channel_abort(), call gk20a_fifo_abort_tsg() if the channel is part of TSG Add new argument "preempt" to gk20a_fifo_abort_tsg() and preempt TSG if flag is set Bug 200205041 Change-Id: I4eff5394d26fbb53996f2d30b35140b75450f338 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/1157190 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
3241 lines
80 KiB
C
3241 lines
80 KiB
C
/*
|
|
* GK20A Graphics channel
|
|
*
|
|
* Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include <linux/nvhost.h>
|
|
#include <linux/list.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/highmem.h> /* need for nvmap.h*/
|
|
#include <trace/events/gk20a.h>
|
|
#include <linux/scatterlist.h>
|
|
#include <linux/file.h>
|
|
#include <linux/anon_inodes.h>
|
|
#include <linux/dma-buf.h>
|
|
#include <linux/vmalloc.h>
|
|
|
|
#include "debug_gk20a.h"
|
|
#include "ctxsw_trace_gk20a.h"
|
|
|
|
#include "gk20a.h"
|
|
#include "dbg_gpu_gk20a.h"
|
|
#include "fence_gk20a.h"
|
|
#include "semaphore_gk20a.h"
|
|
|
|
#include "hw_ram_gk20a.h"
|
|
#include "hw_fifo_gk20a.h"
|
|
#include "hw_pbdma_gk20a.h"
|
|
#include "hw_ccsr_gk20a.h"
|
|
#include "hw_ltc_gk20a.h"
|
|
|
|
#define NVMAP_HANDLE_PARAM_SIZE 1
|
|
|
|
#define NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT 64 /* channels */
|
|
|
|
#define NVGPU_CHANNEL_MIN_TIMESLICE_US 1000
|
|
#define NVGPU_CHANNEL_MAX_TIMESLICE_US 50000
|
|
|
|
static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f);
|
|
static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
|
|
|
|
static void free_priv_cmdbuf(struct channel_gk20a *c,
|
|
struct priv_cmd_entry *e);
|
|
|
|
static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
|
|
static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
|
|
|
|
static int channel_gk20a_commit_userd(struct channel_gk20a *c);
|
|
static int channel_gk20a_setup_userd(struct channel_gk20a *c);
|
|
|
|
static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
|
|
|
|
static int channel_gk20a_update_runlist(struct channel_gk20a *c,
|
|
bool add);
|
|
static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
|
|
|
|
static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch);
|
|
|
|
static void gk20a_channel_clean_up_jobs(struct work_struct *work);
|
|
|
|
/* allocate GPU channel */
|
|
static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
|
|
{
|
|
struct channel_gk20a *ch = NULL;
|
|
struct gk20a_platform *platform;
|
|
|
|
platform = gk20a_get_platform(f->g->dev);
|
|
|
|
mutex_lock(&f->free_chs_mutex);
|
|
if (!list_empty(&f->free_chs)) {
|
|
ch = list_first_entry(&f->free_chs, struct channel_gk20a,
|
|
free_chs);
|
|
list_del(&ch->free_chs);
|
|
WARN_ON(atomic_read(&ch->ref_count));
|
|
WARN_ON(ch->referenceable);
|
|
f->used_channels++;
|
|
}
|
|
mutex_unlock(&f->free_chs_mutex);
|
|
|
|
if (f->used_channels > NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT)
|
|
platform->aggressive_sync_destroy = true;
|
|
|
|
return ch;
|
|
}
|
|
|
|
static void free_channel(struct fifo_gk20a *f,
|
|
struct channel_gk20a *ch)
|
|
{
|
|
struct gk20a_platform *platform = gk20a_get_platform(f->g->dev);
|
|
|
|
trace_gk20a_release_used_channel(ch->hw_chid);
|
|
/* refcount is zero here and channel is in a freed/dead state */
|
|
mutex_lock(&f->free_chs_mutex);
|
|
/* add to head to increase visibility of timing-related bugs */
|
|
list_add(&ch->free_chs, &f->free_chs);
|
|
f->used_channels--;
|
|
mutex_unlock(&f->free_chs_mutex);
|
|
|
|
if (f->used_channels < NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT)
|
|
platform->aggressive_sync_destroy = false;
|
|
}
|
|
|
|
int channel_gk20a_commit_va(struct channel_gk20a *c)
|
|
{
|
|
gk20a_dbg_fn("");
|
|
|
|
if (!c->inst_block.cpu_va)
|
|
return -ENOMEM;
|
|
|
|
gk20a_init_inst_block(&c->inst_block, c->vm,
|
|
c->vm->gmmu_page_sizes[gmmu_page_size_big]);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int channel_gk20a_commit_userd(struct channel_gk20a *c)
|
|
{
|
|
u32 addr_lo;
|
|
u32 addr_hi;
|
|
struct gk20a *g = c->g;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
|
|
addr_hi = u64_hi32(c->userd_iova);
|
|
|
|
gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
|
|
c->hw_chid, (u64)c->userd_iova);
|
|
|
|
gk20a_mem_wr32(g, &c->inst_block,
|
|
ram_in_ramfc_w() + ram_fc_userd_w(),
|
|
(g->mm.vidmem_is_vidmem ?
|
|
pbdma_userd_target_sys_mem_ncoh_f() :
|
|
pbdma_userd_target_vid_mem_f()) |
|
|
pbdma_userd_addr_f(addr_lo));
|
|
|
|
gk20a_mem_wr32(g, &c->inst_block,
|
|
ram_in_ramfc_w() + ram_fc_userd_hi_w(),
|
|
pbdma_userd_hi_addr_f(addr_hi));
|
|
|
|
return 0;
|
|
}
|
|
|
|
int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
|
|
int timeslice_period,
|
|
int *__timeslice_timeout, int *__timeslice_scale)
|
|
{
|
|
struct gk20a_platform *platform = dev_get_drvdata(g->dev);
|
|
int value = scale_ptimer(timeslice_period,
|
|
ptimer_scalingfactor10x(platform->ptimer_src_freq));
|
|
int shift = 0;
|
|
|
|
/* value field is 8 bits long */
|
|
while (value >= 1 << 8) {
|
|
value >>= 1;
|
|
shift++;
|
|
}
|
|
|
|
/* time slice register is only 18bits long */
|
|
if ((value << shift) >= 1<<19) {
|
|
pr_err("Requested timeslice value is clamped to 18 bits\n");
|
|
value = 255;
|
|
shift = 10;
|
|
}
|
|
|
|
*__timeslice_timeout = value;
|
|
*__timeslice_scale = shift;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int channel_gk20a_set_schedule_params(struct channel_gk20a *c)
|
|
{
|
|
int shift = 0, value = 0;
|
|
|
|
gk20a_channel_get_timescale_from_timeslice(c->g,
|
|
c->timeslice_us, &value, &shift);
|
|
|
|
/* disable channel */
|
|
c->g->ops.fifo.disable_channel(c);
|
|
|
|
/* preempt the channel */
|
|
WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid));
|
|
|
|
/* set new timeslice */
|
|
gk20a_mem_wr32(c->g, &c->inst_block, ram_fc_runlist_timeslice_w(),
|
|
value | (shift << 12) |
|
|
fifo_runlist_timeslice_enable_true_f());
|
|
|
|
/* enable channel */
|
|
gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
|
|
gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
|
|
ccsr_channel_enable_set_true_f());
|
|
|
|
return 0;
|
|
}
|
|
|
|
u32 channel_gk20a_pbdma_acquire_val(struct channel_gk20a *c)
|
|
{
|
|
u32 val, exp, man;
|
|
u64 timeout;
|
|
int val_len;
|
|
|
|
val = pbdma_acquire_retry_man_2_f() |
|
|
pbdma_acquire_retry_exp_2_f();
|
|
|
|
if (!c->g->timeouts_enabled)
|
|
return val;
|
|
|
|
timeout = gk20a_get_channel_watchdog_timeout(c);
|
|
do_div(timeout, 2); /* set acquire timeout to half of channel wdt */
|
|
timeout *= 1000000UL; /* ms -> ns */
|
|
do_div(timeout, 1024); /* in unit of 1024ns */
|
|
val_len = fls(timeout >> 32) + 32;
|
|
if (val_len == 32)
|
|
val_len = fls(timeout);
|
|
if (val_len > 16 + pbdma_acquire_timeout_exp_max_v()) { /* man: 16bits */
|
|
exp = pbdma_acquire_timeout_exp_max_v();
|
|
man = pbdma_acquire_timeout_man_max_v();
|
|
} else if (val_len > 16) {
|
|
exp = val_len - 16;
|
|
man = timeout >> exp;
|
|
} else {
|
|
exp = 0;
|
|
man = timeout;
|
|
}
|
|
|
|
val |= pbdma_acquire_timeout_exp_f(exp) |
|
|
pbdma_acquire_timeout_man_f(man) |
|
|
pbdma_acquire_timeout_en_enable_f();
|
|
|
|
return val;
|
|
}
|
|
|
|
int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
|
|
u64 gpfifo_base, u32 gpfifo_entries, u32 flags)
|
|
{
|
|
struct gk20a *g = c->g;
|
|
struct mem_desc *mem = &c->inst_block;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
gk20a_memset(g, mem, 0, 0, ram_fc_size_val_v());
|
|
|
|
gk20a_mem_wr32(g, mem, ram_fc_gp_base_w(),
|
|
pbdma_gp_base_offset_f(
|
|
u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
|
|
|
|
gk20a_mem_wr32(g, mem, ram_fc_gp_base_hi_w(),
|
|
pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
|
|
pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
|
|
|
|
gk20a_mem_wr32(g, mem, ram_fc_signature_w(),
|
|
c->g->ops.fifo.get_pbdma_signature(c->g));
|
|
|
|
gk20a_mem_wr32(g, mem, ram_fc_formats_w(),
|
|
pbdma_formats_gp_fermi0_f() |
|
|
pbdma_formats_pb_fermi1_f() |
|
|
pbdma_formats_mp_fermi0_f());
|
|
|
|
gk20a_mem_wr32(g, mem, ram_fc_pb_header_w(),
|
|
pbdma_pb_header_priv_user_f() |
|
|
pbdma_pb_header_method_zero_f() |
|
|
pbdma_pb_header_subchannel_zero_f() |
|
|
pbdma_pb_header_level_main_f() |
|
|
pbdma_pb_header_first_true_f() |
|
|
pbdma_pb_header_type_inc_f());
|
|
|
|
gk20a_mem_wr32(g, mem, ram_fc_subdevice_w(),
|
|
pbdma_subdevice_id_f(1) |
|
|
pbdma_subdevice_status_active_f() |
|
|
pbdma_subdevice_channel_dma_enable_f());
|
|
|
|
gk20a_mem_wr32(g, mem, ram_fc_target_w(), pbdma_target_engine_sw_f());
|
|
|
|
gk20a_mem_wr32(g, mem, ram_fc_acquire_w(),
|
|
channel_gk20a_pbdma_acquire_val(c));
|
|
|
|
gk20a_mem_wr32(g, mem, ram_fc_runlist_timeslice_w(),
|
|
fifo_runlist_timeslice_timeout_128_f() |
|
|
fifo_runlist_timeslice_timescale_3_f() |
|
|
fifo_runlist_timeslice_enable_true_f());
|
|
|
|
gk20a_mem_wr32(g, mem, ram_fc_pb_timeslice_w(),
|
|
fifo_pb_timeslice_timeout_16_f() |
|
|
fifo_pb_timeslice_timescale_0_f() |
|
|
fifo_pb_timeslice_enable_true_f());
|
|
|
|
gk20a_mem_wr32(g, mem, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
|
|
|
|
return channel_gk20a_commit_userd(c);
|
|
}
|
|
|
|
static int channel_gk20a_setup_userd(struct channel_gk20a *c)
|
|
{
|
|
struct gk20a *g = c->g;
|
|
struct mem_desc *mem = &g->fifo.userd;
|
|
u32 offset = c->hw_chid * g->fifo.userd_entry_size / sizeof(u32);
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
gk20a_mem_wr32(g, mem, offset + ram_userd_put_w(), 0);
|
|
gk20a_mem_wr32(g, mem, offset + ram_userd_get_w(), 0);
|
|
gk20a_mem_wr32(g, mem, offset + ram_userd_ref_w(), 0);
|
|
gk20a_mem_wr32(g, mem, offset + ram_userd_put_hi_w(), 0);
|
|
gk20a_mem_wr32(g, mem, offset + ram_userd_ref_threshold_w(), 0);
|
|
gk20a_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_w(), 0);
|
|
gk20a_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_hi_w(), 0);
|
|
gk20a_mem_wr32(g, mem, offset + ram_userd_get_hi_w(), 0);
|
|
gk20a_mem_wr32(g, mem, offset + ram_userd_gp_get_w(), 0);
|
|
gk20a_mem_wr32(g, mem, offset + ram_userd_gp_put_w(), 0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void channel_gk20a_bind(struct channel_gk20a *c)
|
|
{
|
|
struct gk20a *g = c->g;
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
struct fifo_engine_info_gk20a *engine_info =
|
|
f->engine_info + ENGINE_GR_GK20A;
|
|
u32 inst_ptr = gk20a_mm_inst_block_addr(g, &c->inst_block)
|
|
>> ram_in_base_shift_v();
|
|
|
|
gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
|
|
c->hw_chid, inst_ptr);
|
|
|
|
c->bound = true;
|
|
|
|
gk20a_writel(g, ccsr_channel_r(c->hw_chid),
|
|
(gk20a_readl(g, ccsr_channel_r(c->hw_chid)) &
|
|
~ccsr_channel_runlist_f(~0)) |
|
|
ccsr_channel_runlist_f(engine_info->runlist_id));
|
|
|
|
gk20a_writel(g, ccsr_channel_inst_r(c->hw_chid),
|
|
ccsr_channel_inst_ptr_f(inst_ptr) |
|
|
(g->mm.vidmem_is_vidmem ?
|
|
ccsr_channel_inst_target_sys_mem_ncoh_f() :
|
|
ccsr_channel_inst_target_vid_mem_f()) |
|
|
ccsr_channel_inst_bind_true_f());
|
|
|
|
gk20a_writel(g, ccsr_channel_r(c->hw_chid),
|
|
(gk20a_readl(g, ccsr_channel_r(c->hw_chid)) &
|
|
~ccsr_channel_enable_set_f(~0)) |
|
|
ccsr_channel_enable_set_true_f());
|
|
}
|
|
|
|
void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
|
|
{
|
|
struct gk20a *g = ch_gk20a->g;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
if (ch_gk20a->bound)
|
|
gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
|
|
ccsr_channel_inst_ptr_f(0) |
|
|
ccsr_channel_inst_bind_false_f());
|
|
|
|
ch_gk20a->bound = false;
|
|
}
|
|
|
|
int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
|
|
{
|
|
int err;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
err = gk20a_alloc_inst_block(g, &ch->inst_block);
|
|
if (err)
|
|
return err;
|
|
|
|
gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
|
|
ch->hw_chid, gk20a_mm_inst_block_addr(g, &ch->inst_block));
|
|
|
|
gk20a_dbg_fn("done");
|
|
return 0;
|
|
}
|
|
|
|
void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch)
|
|
{
|
|
gk20a_free_inst_block(g, &ch->inst_block);
|
|
}
|
|
|
|
static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
|
|
{
|
|
return c->g->ops.fifo.update_runlist(c->g, 0, c->hw_chid, add, true);
|
|
}
|
|
|
|
void channel_gk20a_enable(struct channel_gk20a *ch)
|
|
{
|
|
/* enable channel */
|
|
gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
|
|
gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
|
|
ccsr_channel_enable_set_true_f());
|
|
}
|
|
|
|
void channel_gk20a_disable(struct channel_gk20a *ch)
|
|
{
|
|
/* disable channel */
|
|
gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
|
|
gk20a_readl(ch->g,
|
|
ccsr_channel_r(ch->hw_chid)) |
|
|
ccsr_channel_enable_clr_true_f());
|
|
}
|
|
|
|
int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch)
|
|
{
|
|
struct tsg_gk20a *tsg;
|
|
|
|
if (gk20a_is_channel_marked_as_tsg(ch)) {
|
|
tsg = &g->fifo.tsg[ch->tsgid];
|
|
gk20a_enable_tsg(tsg);
|
|
} else {
|
|
g->ops.fifo.enable_channel(ch);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch)
|
|
{
|
|
struct tsg_gk20a *tsg;
|
|
|
|
if (gk20a_is_channel_marked_as_tsg(ch)) {
|
|
tsg = &g->fifo.tsg[ch->tsgid];
|
|
gk20a_disable_tsg(tsg);
|
|
} else {
|
|
g->ops.fifo.disable_channel(ch);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
|
|
{
|
|
struct channel_gk20a_job *job, *n;
|
|
bool released_job_semaphore = false;
|
|
|
|
/* ensure no fences are pending */
|
|
mutex_lock(&ch->sync_lock);
|
|
if (ch->sync)
|
|
ch->sync->set_min_eq_max(ch->sync);
|
|
mutex_unlock(&ch->sync_lock);
|
|
|
|
/* release all job semaphores (applies only to jobs that use
|
|
semaphore synchronization) */
|
|
mutex_lock(&ch->jobs_lock);
|
|
list_for_each_entry_safe(job, n, &ch->jobs, list) {
|
|
if (job->post_fence->semaphore) {
|
|
gk20a_semaphore_release(job->post_fence->semaphore);
|
|
released_job_semaphore = true;
|
|
}
|
|
}
|
|
mutex_unlock(&ch->jobs_lock);
|
|
|
|
if (released_job_semaphore)
|
|
wake_up_interruptible_all(&ch->semaphore_wq);
|
|
|
|
gk20a_channel_update(ch, 0);
|
|
}
|
|
|
|
void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt)
|
|
{
|
|
gk20a_dbg_fn("");
|
|
|
|
if (gk20a_is_channel_marked_as_tsg(ch))
|
|
return gk20a_fifo_abort_tsg(ch->g, ch->tsgid, channel_preempt);
|
|
|
|
/* make sure new kickoffs are prevented */
|
|
ch->has_timedout = true;
|
|
|
|
ch->g->ops.fifo.disable_channel(ch);
|
|
|
|
if (channel_preempt)
|
|
ch->g->ops.fifo.preempt_channel(ch->g, ch->hw_chid);
|
|
|
|
gk20a_channel_abort_clean_up(ch);
|
|
}
|
|
|
|
int gk20a_wait_channel_idle(struct channel_gk20a *ch)
|
|
{
|
|
bool channel_idle = false;
|
|
unsigned long end_jiffies = jiffies +
|
|
msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
|
|
|
|
do {
|
|
mutex_lock(&ch->jobs_lock);
|
|
channel_idle = list_empty(&ch->jobs);
|
|
mutex_unlock(&ch->jobs_lock);
|
|
if (channel_idle)
|
|
break;
|
|
|
|
usleep_range(1000, 3000);
|
|
} while (time_before(jiffies, end_jiffies)
|
|
|| !tegra_platform_is_silicon());
|
|
|
|
if (!channel_idle) {
|
|
gk20a_err(dev_from_gk20a(ch->g), "jobs not freed for channel %d\n",
|
|
ch->hw_chid);
|
|
return -EBUSY;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void gk20a_disable_channel(struct channel_gk20a *ch)
|
|
{
|
|
gk20a_channel_abort(ch, true);
|
|
channel_gk20a_update_runlist(ch, false);
|
|
}
|
|
|
|
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
|
|
|
static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
|
|
{
|
|
/* disable existing cyclestats buffer */
|
|
mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
|
|
if (ch->cyclestate.cyclestate_buffer_handler) {
|
|
dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler,
|
|
ch->cyclestate.cyclestate_buffer);
|
|
dma_buf_put(ch->cyclestate.cyclestate_buffer_handler);
|
|
ch->cyclestate.cyclestate_buffer_handler = NULL;
|
|
ch->cyclestate.cyclestate_buffer = NULL;
|
|
ch->cyclestate.cyclestate_buffer_size = 0;
|
|
}
|
|
mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
|
|
}
|
|
|
|
static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
|
|
struct nvgpu_cycle_stats_args *args)
|
|
{
|
|
struct dma_buf *dmabuf;
|
|
void *virtual_address;
|
|
|
|
/* is it allowed to handle calls for current GPU? */
|
|
if (0 == (ch->g->gpu_characteristics.flags &
|
|
NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS))
|
|
return -ENOSYS;
|
|
|
|
if (args->dmabuf_fd && !ch->cyclestate.cyclestate_buffer_handler) {
|
|
|
|
/* set up new cyclestats buffer */
|
|
dmabuf = dma_buf_get(args->dmabuf_fd);
|
|
if (IS_ERR(dmabuf))
|
|
return PTR_ERR(dmabuf);
|
|
virtual_address = dma_buf_vmap(dmabuf);
|
|
if (!virtual_address)
|
|
return -ENOMEM;
|
|
|
|
ch->cyclestate.cyclestate_buffer_handler = dmabuf;
|
|
ch->cyclestate.cyclestate_buffer = virtual_address;
|
|
ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
|
|
return 0;
|
|
|
|
} else if (!args->dmabuf_fd &&
|
|
ch->cyclestate.cyclestate_buffer_handler) {
|
|
gk20a_free_cycle_stats_buffer(ch);
|
|
return 0;
|
|
|
|
} else if (!args->dmabuf_fd &&
|
|
!ch->cyclestate.cyclestate_buffer_handler) {
|
|
/* no requst from GL */
|
|
return 0;
|
|
|
|
} else {
|
|
pr_err("channel already has cyclestats buffer\n");
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
|
|
static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch)
|
|
{
|
|
int ret;
|
|
|
|
mutex_lock(&ch->cs_client_mutex);
|
|
if (ch->cs_client)
|
|
ret = gr_gk20a_css_flush(ch->g, ch->cs_client);
|
|
else
|
|
ret = -EBADF;
|
|
mutex_unlock(&ch->cs_client_mutex);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
|
|
u32 dmabuf_fd,
|
|
u32 perfmon_id_count,
|
|
u32 *perfmon_id_start)
|
|
{
|
|
int ret;
|
|
|
|
mutex_lock(&ch->cs_client_mutex);
|
|
if (ch->cs_client) {
|
|
ret = -EEXIST;
|
|
} else {
|
|
ret = gr_gk20a_css_attach(ch->g,
|
|
dmabuf_fd,
|
|
perfmon_id_count,
|
|
perfmon_id_start,
|
|
&ch->cs_client);
|
|
}
|
|
mutex_unlock(&ch->cs_client_mutex);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int gk20a_free_cycle_stats_snapshot(struct channel_gk20a *ch)
|
|
{
|
|
int ret;
|
|
|
|
mutex_lock(&ch->cs_client_mutex);
|
|
if (ch->cs_client) {
|
|
ret = gr_gk20a_css_detach(ch->g, ch->cs_client);
|
|
ch->cs_client = NULL;
|
|
} else {
|
|
ret = 0;
|
|
}
|
|
mutex_unlock(&ch->cs_client_mutex);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int gk20a_channel_cycle_stats_snapshot(struct channel_gk20a *ch,
|
|
struct nvgpu_cycle_stats_snapshot_args *args)
|
|
{
|
|
int ret;
|
|
|
|
/* is it allowed to handle calls for current GPU? */
|
|
if (0 == (ch->g->gpu_characteristics.flags &
|
|
NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT))
|
|
return -ENOSYS;
|
|
|
|
if (!args->dmabuf_fd)
|
|
return -EINVAL;
|
|
|
|
/* handle the command (most frequent cases first) */
|
|
switch (args->cmd) {
|
|
case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH:
|
|
ret = gk20a_flush_cycle_stats_snapshot(ch);
|
|
args->extra = 0;
|
|
break;
|
|
|
|
case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH:
|
|
ret = gk20a_attach_cycle_stats_snapshot(ch,
|
|
args->dmabuf_fd,
|
|
args->extra,
|
|
&args->extra);
|
|
break;
|
|
|
|
case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_DETACH:
|
|
ret = gk20a_free_cycle_stats_snapshot(ch);
|
|
args->extra = 0;
|
|
break;
|
|
|
|
default:
|
|
pr_err("cyclestats: unknown command %u\n", args->cmd);
|
|
ret = -EINVAL;
|
|
break;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
#endif
|
|
|
|
static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch,
|
|
struct nvgpu_channel_wdt_args *args)
|
|
{
|
|
if (args->wdt_status == NVGPU_IOCTL_CHANNEL_DISABLE_WDT)
|
|
ch->wdt_enabled = false;
|
|
else if (args->wdt_status == NVGPU_IOCTL_CHANNEL_ENABLE_WDT)
|
|
ch->wdt_enabled = true;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int gk20a_channel_set_runlist_interleave(struct channel_gk20a *ch,
|
|
u32 level)
|
|
{
|
|
struct gk20a *g = ch->g;
|
|
int ret;
|
|
|
|
if (gk20a_is_channel_marked_as_tsg(ch)) {
|
|
gk20a_err(dev_from_gk20a(g), "invalid operation for TSG!\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
switch (level) {
|
|
case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW:
|
|
case NVGPU_RUNLIST_INTERLEAVE_LEVEL_MEDIUM:
|
|
case NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH:
|
|
ret = g->ops.fifo.set_runlist_interleave(g, ch->hw_chid,
|
|
false, 0, level);
|
|
break;
|
|
default:
|
|
ret = -EINVAL;
|
|
break;
|
|
}
|
|
|
|
return ret ? ret : g->ops.fifo.update_runlist(g, 0, ~0, true, true);
|
|
}
|
|
|
|
static int gk20a_init_error_notifier(struct channel_gk20a *ch,
|
|
struct nvgpu_set_error_notifier *args)
|
|
{
|
|
struct device *dev = dev_from_gk20a(ch->g);
|
|
struct dma_buf *dmabuf;
|
|
void *va;
|
|
u64 end = args->offset + sizeof(struct nvgpu_notification);
|
|
|
|
if (!args->mem) {
|
|
pr_err("gk20a_init_error_notifier: invalid memory handle\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
dmabuf = dma_buf_get(args->mem);
|
|
|
|
if (ch->error_notifier_ref)
|
|
gk20a_free_error_notifiers(ch);
|
|
|
|
if (IS_ERR(dmabuf)) {
|
|
pr_err("Invalid handle: %d\n", args->mem);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (end > dmabuf->size || end < sizeof(struct nvgpu_notification)) {
|
|
dma_buf_put(dmabuf);
|
|
gk20a_err(dev, "gk20a_init_error_notifier: invalid offset\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* map handle */
|
|
va = dma_buf_vmap(dmabuf);
|
|
if (!va) {
|
|
dma_buf_put(dmabuf);
|
|
pr_err("Cannot map notifier handle\n");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
/* set channel notifiers pointer */
|
|
ch->error_notifier_ref = dmabuf;
|
|
ch->error_notifier = va + args->offset;
|
|
ch->error_notifier_va = va;
|
|
memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification));
|
|
return 0;
|
|
}
|
|
|
|
void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
|
|
{
|
|
if (ch->error_notifier_ref) {
|
|
struct timespec time_data;
|
|
u64 nsec;
|
|
getnstimeofday(&time_data);
|
|
nsec = ((u64)time_data.tv_sec) * 1000000000u +
|
|
(u64)time_data.tv_nsec;
|
|
ch->error_notifier->time_stamp.nanoseconds[0] =
|
|
(u32)nsec;
|
|
ch->error_notifier->time_stamp.nanoseconds[1] =
|
|
(u32)(nsec >> 32);
|
|
ch->error_notifier->info32 = error;
|
|
ch->error_notifier->status = 0xffff;
|
|
|
|
gk20a_err(dev_from_gk20a(ch->g),
|
|
"error notifier set to %d for ch %d", error, ch->hw_chid);
|
|
}
|
|
}
|
|
|
|
static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
|
|
{
|
|
if (ch->error_notifier_ref) {
|
|
dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
|
|
dma_buf_put(ch->error_notifier_ref);
|
|
ch->error_notifier_ref = NULL;
|
|
ch->error_notifier = NULL;
|
|
ch->error_notifier_va = NULL;
|
|
}
|
|
}
|
|
|
|
/* Returns delta of cyclic integers a and b. If a is ahead of b, delta
|
|
* is positive */
|
|
static int cyclic_delta(int a, int b)
|
|
{
|
|
return a - b;
|
|
}
|
|
|
|
static void gk20a_wait_for_deferred_interrupts(struct gk20a *g)
|
|
{
|
|
int stall_irq_threshold = atomic_read(&g->hw_irq_stall_count);
|
|
int nonstall_irq_threshold = atomic_read(&g->hw_irq_nonstall_count);
|
|
|
|
/* wait until all stalling irqs are handled */
|
|
wait_event(g->sw_irq_stall_last_handled_wq,
|
|
cyclic_delta(stall_irq_threshold,
|
|
atomic_read(&g->sw_irq_stall_last_handled))
|
|
<= 0);
|
|
|
|
/* wait until all non-stalling irqs are handled */
|
|
wait_event(g->sw_irq_nonstall_last_handled_wq,
|
|
cyclic_delta(nonstall_irq_threshold,
|
|
atomic_read(&g->sw_irq_nonstall_last_handled))
|
|
<= 0);
|
|
}
|
|
|
|
static void gk20a_wait_until_counter_is_N(
|
|
struct channel_gk20a *ch, atomic_t *counter, int wait_value,
|
|
wait_queue_head_t *wq, const char *caller, const char *counter_name)
|
|
{
|
|
while (true) {
|
|
if (wait_event_timeout(
|
|
*wq,
|
|
atomic_read(counter) == wait_value,
|
|
msecs_to_jiffies(5000)) > 0)
|
|
break;
|
|
|
|
gk20a_warn(dev_from_gk20a(ch->g),
|
|
"%s: channel %d, still waiting, %s left: %d, waiting for: %d",
|
|
caller, ch->hw_chid, counter_name,
|
|
atomic_read(counter), wait_value);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/* call ONLY when no references to the channel exist: after the last put */
|
|
static void gk20a_free_channel(struct channel_gk20a *ch)
|
|
{
|
|
struct gk20a *g = ch->g;
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
struct gr_gk20a *gr = &g->gr;
|
|
struct vm_gk20a *ch_vm = ch->vm;
|
|
unsigned long timeout = gk20a_get_gr_idle_timeout(g);
|
|
struct dbg_session_gk20a *dbg_s;
|
|
struct dbg_session_data *session_data, *tmp_s;
|
|
struct dbg_session_channel_data *ch_data, *tmp;
|
|
bool was_reset;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
WARN_ON(ch->g == NULL);
|
|
|
|
trace_gk20a_free_channel(ch->hw_chid);
|
|
|
|
/* abort channel and remove from runlist */
|
|
gk20a_disable_channel(ch);
|
|
|
|
/* wait until there's only our ref to the channel */
|
|
gk20a_wait_until_counter_is_N(
|
|
ch, &ch->ref_count, 1, &ch->ref_count_dec_wq,
|
|
__func__, "references");
|
|
|
|
/* wait until all pending interrupts for recently completed
|
|
* jobs are handled */
|
|
gk20a_wait_for_deferred_interrupts(g);
|
|
|
|
/* prevent new refs */
|
|
spin_lock(&ch->ref_obtain_lock);
|
|
if (!ch->referenceable) {
|
|
spin_unlock(&ch->ref_obtain_lock);
|
|
gk20a_err(dev_from_gk20a(ch->g),
|
|
"Extra %s() called to channel %u",
|
|
__func__, ch->hw_chid);
|
|
return;
|
|
}
|
|
ch->referenceable = false;
|
|
spin_unlock(&ch->ref_obtain_lock);
|
|
|
|
/* matches with the initial reference in gk20a_open_new_channel() */
|
|
atomic_dec(&ch->ref_count);
|
|
|
|
/* wait until no more refs to the channel */
|
|
gk20a_wait_until_counter_is_N(
|
|
ch, &ch->ref_count, 0, &ch->ref_count_dec_wq,
|
|
__func__, "references");
|
|
|
|
/* if engine reset was deferred, perform it now */
|
|
mutex_lock(&f->deferred_reset_mutex);
|
|
if (g->fifo.deferred_reset_pending) {
|
|
gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
|
|
" deferred, running now");
|
|
was_reset = mutex_is_locked(&g->fifo.gr_reset_mutex);
|
|
mutex_lock(&g->fifo.gr_reset_mutex);
|
|
/* if lock is already taken, a reset is taking place
|
|
so no need to repeat */
|
|
if (!was_reset)
|
|
gk20a_fifo_reset_engine(g,
|
|
g->fifo.deferred_fault_engines);
|
|
mutex_unlock(&g->fifo.gr_reset_mutex);
|
|
g->fifo.deferred_fault_engines = 0;
|
|
g->fifo.deferred_reset_pending = false;
|
|
}
|
|
mutex_unlock(&f->deferred_reset_mutex);
|
|
|
|
if (!gk20a_channel_as_bound(ch))
|
|
goto unbind;
|
|
|
|
gk20a_dbg_info("freeing bound channel context, timeout=%ld",
|
|
timeout);
|
|
|
|
gk20a_free_error_notifiers(ch);
|
|
|
|
if (g->ops.fecs_trace.unbind_channel)
|
|
g->ops.fecs_trace.unbind_channel(g, ch);
|
|
|
|
/* release channel ctx */
|
|
g->ops.gr.free_channel_ctx(ch);
|
|
|
|
gk20a_gr_flush_channel_tlb(gr);
|
|
|
|
memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
|
|
|
|
gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem);
|
|
|
|
memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
|
|
|
|
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
|
gk20a_free_cycle_stats_buffer(ch);
|
|
gk20a_free_cycle_stats_snapshot(ch);
|
|
#endif
|
|
|
|
channel_gk20a_free_priv_cmdbuf(ch);
|
|
|
|
/* sync must be destroyed before releasing channel vm */
|
|
mutex_lock(&ch->sync_lock);
|
|
if (ch->sync) {
|
|
gk20a_channel_sync_destroy(ch->sync);
|
|
ch->sync = NULL;
|
|
}
|
|
mutex_unlock(&ch->sync_lock);
|
|
|
|
/* release channel binding to the as_share */
|
|
if (ch_vm->as_share)
|
|
gk20a_as_release_share(ch_vm->as_share);
|
|
else
|
|
gk20a_vm_put(ch_vm);
|
|
|
|
spin_lock(&ch->update_fn_lock);
|
|
ch->update_fn = NULL;
|
|
ch->update_fn_data = NULL;
|
|
spin_unlock(&ch->update_fn_lock);
|
|
cancel_work_sync(&ch->update_fn_work);
|
|
|
|
/* make sure we don't have deferred interrupts pending that
|
|
* could still touch the channel */
|
|
gk20a_wait_for_deferred_interrupts(g);
|
|
|
|
unbind:
|
|
if (gk20a_is_channel_marked_as_tsg(ch))
|
|
g->ops.fifo.tsg_unbind_channel(ch);
|
|
|
|
g->ops.fifo.unbind_channel(ch);
|
|
g->ops.fifo.free_inst(g, ch);
|
|
|
|
ch->vpr = false;
|
|
ch->vm = NULL;
|
|
|
|
mutex_lock(&ch->last_submit.fence_lock);
|
|
gk20a_fence_put(ch->last_submit.pre_fence);
|
|
gk20a_fence_put(ch->last_submit.post_fence);
|
|
ch->last_submit.pre_fence = NULL;
|
|
ch->last_submit.post_fence = NULL;
|
|
mutex_unlock(&ch->last_submit.fence_lock);
|
|
WARN_ON(ch->sync);
|
|
|
|
/* unlink all debug sessions */
|
|
mutex_lock(&g->dbg_sessions_lock);
|
|
|
|
list_for_each_entry_safe(session_data, tmp_s,
|
|
&ch->dbg_s_list, dbg_s_entry) {
|
|
dbg_s = session_data->dbg_s;
|
|
mutex_lock(&dbg_s->ch_list_lock);
|
|
list_for_each_entry_safe(ch_data, tmp,
|
|
&dbg_s->ch_list, ch_entry) {
|
|
if (ch_data->chid == ch->hw_chid)
|
|
dbg_unbind_single_channel_gk20a(dbg_s, ch_data);
|
|
}
|
|
mutex_unlock(&dbg_s->ch_list_lock);
|
|
}
|
|
|
|
mutex_unlock(&g->dbg_sessions_lock);
|
|
|
|
/* make sure we catch accesses of unopened channels in case
|
|
* there's non-refcounted channel pointers hanging around */
|
|
ch->g = NULL;
|
|
wmb();
|
|
|
|
/* ALWAYS last */
|
|
free_channel(f, ch);
|
|
}
|
|
|
|
/* Try to get a reference to the channel. Return nonzero on success. If fails,
|
|
* the channel is dead or being freed elsewhere and you must not touch it.
|
|
*
|
|
* Always when a channel_gk20a pointer is seen and about to be used, a
|
|
* reference must be held to it - either by you or the caller, which should be
|
|
* documented well or otherwise clearly seen. This usually boils down to the
|
|
* file from ioctls directly, or an explicit get in exception handlers when the
|
|
* channel is found by a hw_chid.
|
|
*
|
|
* Most global functions in this file require a reference to be held by the
|
|
* caller.
|
|
*/
|
|
struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch,
|
|
const char *caller) {
|
|
struct channel_gk20a *ret;
|
|
|
|
spin_lock(&ch->ref_obtain_lock);
|
|
|
|
if (likely(ch->referenceable)) {
|
|
atomic_inc(&ch->ref_count);
|
|
ret = ch;
|
|
} else
|
|
ret = NULL;
|
|
|
|
spin_unlock(&ch->ref_obtain_lock);
|
|
|
|
if (ret)
|
|
trace_gk20a_channel_get(ch->hw_chid, caller);
|
|
|
|
return ret;
|
|
}
|
|
|
|
void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller)
|
|
{
|
|
trace_gk20a_channel_put(ch->hw_chid, caller);
|
|
atomic_dec(&ch->ref_count);
|
|
wake_up_all(&ch->ref_count_dec_wq);
|
|
|
|
/* More puts than gets. Channel is probably going to get
|
|
* stuck. */
|
|
WARN_ON(atomic_read(&ch->ref_count) < 0);
|
|
|
|
/* Also, more puts than gets. ref_count can go to 0 only if
|
|
* the channel is closing. Channel is probably going to get
|
|
* stuck. */
|
|
WARN_ON(atomic_read(&ch->ref_count) == 0 && ch->referenceable);
|
|
}
|
|
|
|
void gk20a_channel_close(struct channel_gk20a *ch)
|
|
{
|
|
gk20a_free_channel(ch);
|
|
}
|
|
|
|
int gk20a_channel_release(struct inode *inode, struct file *filp)
|
|
{
|
|
struct channel_gk20a *ch = (struct channel_gk20a *)filp->private_data;
|
|
struct gk20a *g = ch ? ch->g : NULL;
|
|
int err;
|
|
|
|
if (!ch)
|
|
return 0;
|
|
|
|
trace_gk20a_channel_release(dev_name(g->dev));
|
|
|
|
err = gk20a_busy(g->dev);
|
|
if (err) {
|
|
gk20a_err(dev_from_gk20a(g), "failed to release channel %d",
|
|
ch->hw_chid);
|
|
return err;
|
|
}
|
|
gk20a_channel_close(ch);
|
|
gk20a_idle(g->dev);
|
|
|
|
filp->private_data = NULL;
|
|
return 0;
|
|
}
|
|
|
|
static void gk20a_channel_update_runcb_fn(struct work_struct *work)
|
|
{
|
|
struct channel_gk20a *ch =
|
|
container_of(work, struct channel_gk20a, update_fn_work);
|
|
void (*update_fn)(struct channel_gk20a *, void *);
|
|
void *update_fn_data;
|
|
|
|
spin_lock(&ch->update_fn_lock);
|
|
update_fn = ch->update_fn;
|
|
update_fn_data = ch->update_fn_data;
|
|
spin_unlock(&ch->update_fn_lock);
|
|
|
|
if (update_fn)
|
|
update_fn(ch, update_fn_data);
|
|
}
|
|
|
|
struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
|
|
void (*update_fn)(struct channel_gk20a *, void *),
|
|
void *update_fn_data)
|
|
{
|
|
struct channel_gk20a *ch = gk20a_open_new_channel(g);
|
|
|
|
if (ch) {
|
|
spin_lock(&ch->update_fn_lock);
|
|
ch->update_fn = update_fn;
|
|
ch->update_fn_data = update_fn_data;
|
|
spin_unlock(&ch->update_fn_lock);
|
|
}
|
|
|
|
return ch;
|
|
}
|
|
|
|
struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
|
|
{
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
struct channel_gk20a *ch;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
ch = allocate_channel(f);
|
|
if (ch == NULL) {
|
|
/* TBD: we want to make this virtualizable */
|
|
gk20a_err(dev_from_gk20a(g), "out of hw chids");
|
|
return NULL;
|
|
}
|
|
|
|
trace_gk20a_open_new_channel(ch->hw_chid);
|
|
|
|
BUG_ON(ch->g);
|
|
ch->g = g;
|
|
|
|
if (g->ops.fifo.alloc_inst(g, ch)) {
|
|
ch->g = NULL;
|
|
free_channel(f, ch);
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"failed to open gk20a channel, out of inst mem");
|
|
return NULL;
|
|
}
|
|
|
|
/* now the channel is in a limbo out of the free list but not marked as
|
|
* alive and used (i.e. get-able) yet */
|
|
|
|
ch->pid = current->pid;
|
|
|
|
/* By default, channel is regular (non-TSG) channel */
|
|
ch->tsgid = NVGPU_INVALID_TSG_ID;
|
|
|
|
/* reset timeout counter and update timestamp */
|
|
ch->timeout_accumulated_ms = 0;
|
|
ch->timeout_gpfifo_get = 0;
|
|
/* set gr host default timeout */
|
|
ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
|
|
ch->timeout_debug_dump = true;
|
|
ch->has_timedout = false;
|
|
ch->wdt_enabled = true;
|
|
ch->obj_class = 0;
|
|
ch->clean_up.scheduled = false;
|
|
ch->interleave_level = NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW;
|
|
ch->timeslice_us = g->timeslice_low_priority_us;
|
|
|
|
/* The channel is *not* runnable at this point. It still needs to have
|
|
* an address space bound and allocate a gpfifo and grctx. */
|
|
|
|
init_waitqueue_head(&ch->notifier_wq);
|
|
init_waitqueue_head(&ch->semaphore_wq);
|
|
init_waitqueue_head(&ch->submit_wq);
|
|
|
|
ch->update_fn = NULL;
|
|
ch->update_fn_data = NULL;
|
|
spin_lock_init(&ch->update_fn_lock);
|
|
INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn);
|
|
|
|
/* Mark the channel alive, get-able, with 1 initial use
|
|
* references. The initial reference will be decreased in
|
|
* gk20a_free_channel() */
|
|
ch->referenceable = true;
|
|
atomic_set(&ch->ref_count, 1);
|
|
wmb();
|
|
|
|
return ch;
|
|
}
|
|
|
|
static int __gk20a_channel_open(struct gk20a *g, struct file *filp)
|
|
{
|
|
int err;
|
|
struct channel_gk20a *ch;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
trace_gk20a_channel_open(dev_name(g->dev));
|
|
|
|
err = gk20a_busy(g->dev);
|
|
if (err) {
|
|
gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err);
|
|
return err;
|
|
}
|
|
ch = gk20a_open_new_channel(g);
|
|
gk20a_idle(g->dev);
|
|
if (!ch) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"failed to get f");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
gk20a_channel_trace_sched_param(
|
|
trace_gk20a_channel_sched_defaults, ch);
|
|
|
|
filp->private_data = ch;
|
|
return 0;
|
|
}
|
|
|
|
int gk20a_channel_open(struct inode *inode, struct file *filp)
|
|
{
|
|
struct gk20a *g = container_of(inode->i_cdev,
|
|
struct gk20a, channel.cdev);
|
|
int ret;
|
|
|
|
gk20a_dbg_fn("start");
|
|
ret = __gk20a_channel_open(g, filp);
|
|
|
|
gk20a_dbg_fn("end");
|
|
return ret;
|
|
}
|
|
|
|
int gk20a_channel_open_ioctl(struct gk20a *g,
|
|
struct nvgpu_channel_open_args *args)
|
|
{
|
|
int err;
|
|
int fd;
|
|
struct file *file;
|
|
char *name;
|
|
|
|
err = get_unused_fd_flags(O_RDWR);
|
|
if (err < 0)
|
|
return err;
|
|
fd = err;
|
|
|
|
name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
|
|
dev_name(g->dev), fd);
|
|
if (!name) {
|
|
err = -ENOMEM;
|
|
goto clean_up;
|
|
}
|
|
|
|
file = anon_inode_getfile(name, g->channel.cdev.ops, NULL, O_RDWR);
|
|
kfree(name);
|
|
if (IS_ERR(file)) {
|
|
err = PTR_ERR(file);
|
|
goto clean_up;
|
|
}
|
|
|
|
err = __gk20a_channel_open(g, file);
|
|
if (err)
|
|
goto clean_up_file;
|
|
|
|
fd_install(fd, file);
|
|
args->channel_fd = fd;
|
|
return 0;
|
|
|
|
clean_up_file:
|
|
fput(file);
|
|
clean_up:
|
|
put_unused_fd(fd);
|
|
return err;
|
|
}
|
|
|
|
/* allocate private cmd buffer.
|
|
used for inserting commands before/after user submitted buffers. */
|
|
static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
|
|
{
|
|
struct device *d = dev_from_gk20a(c->g);
|
|
struct vm_gk20a *ch_vm = c->vm;
|
|
struct priv_cmd_queue *q = &c->priv_cmd_q;
|
|
u32 size;
|
|
int err = 0;
|
|
|
|
/*
|
|
* Compute the amount of priv_cmdbuf space we need. In general the worst
|
|
* case is the kernel inserts both a semaphore pre-fence and post-fence.
|
|
* Any sync-pt fences will take less memory so we can ignore them for
|
|
* now.
|
|
*
|
|
* A semaphore ACQ (fence-wait) is 8 dwords: semaphore_a, semaphore_b,
|
|
* semaphore_c, and semaphore_d. A semaphore INCR (fence-get) will be 10
|
|
* dwords: all the same as an ACQ plus a non-stalling intr which is
|
|
* another 2 dwords.
|
|
*
|
|
* Lastly the number of gpfifo entries per channel is fixed so at most
|
|
* we can use 2/3rds of the gpfifo entries (1 pre-fence entry, one
|
|
* userspace entry, and one post-fence entry). Thus the computation is:
|
|
*
|
|
* (gpfifo entry number * (2 / 3) * (8 + 10) * 4 bytes.
|
|
*/
|
|
size = roundup_pow_of_two(c->gpfifo.entry_num *
|
|
2 * 18 * sizeof(u32) / 3);
|
|
|
|
err = gk20a_gmmu_alloc_map(ch_vm, size, &q->mem);
|
|
if (err) {
|
|
gk20a_err(d, "%s: memory allocation failed\n", __func__);
|
|
goto clean_up;
|
|
}
|
|
|
|
q->size = q->mem.size / sizeof (u32);
|
|
|
|
return 0;
|
|
|
|
clean_up:
|
|
channel_gk20a_free_priv_cmdbuf(c);
|
|
return err;
|
|
}
|
|
|
|
static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
|
|
{
|
|
struct vm_gk20a *ch_vm = c->vm;
|
|
struct priv_cmd_queue *q = &c->priv_cmd_q;
|
|
|
|
if (q->size == 0)
|
|
return;
|
|
|
|
gk20a_gmmu_unmap_free(ch_vm, &q->mem);
|
|
|
|
memset(q, 0, sizeof(struct priv_cmd_queue));
|
|
}
|
|
|
|
/* allocate a cmd buffer with given size. size is number of u32 entries */
|
|
int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
|
|
struct priv_cmd_entry **entry)
|
|
{
|
|
struct priv_cmd_queue *q = &c->priv_cmd_q;
|
|
struct priv_cmd_entry *e;
|
|
u32 free_count;
|
|
u32 size = orig_size;
|
|
|
|
gk20a_dbg_fn("size %d", orig_size);
|
|
|
|
*entry = NULL;
|
|
|
|
/* if free space in the end is less than requested, increase the size
|
|
* to make the real allocated space start from beginning. */
|
|
if (q->put + size > q->size)
|
|
size = orig_size + (q->size - q->put);
|
|
|
|
gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
|
|
c->hw_chid, q->get, q->put);
|
|
|
|
free_count = (q->size - (q->put - q->get) - 1) % q->size;
|
|
|
|
if (size > free_count)
|
|
return -ENOSPC;
|
|
|
|
e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
|
|
if (!e) {
|
|
gk20a_err(dev_from_gk20a(c->g),
|
|
"ch %d: fail to allocate priv cmd entry",
|
|
c->hw_chid);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
e->size = orig_size;
|
|
e->gp_get = c->gpfifo.get;
|
|
e->gp_put = c->gpfifo.put;
|
|
e->gp_wrap = c->gpfifo.wrap;
|
|
e->mem = &q->mem;
|
|
|
|
/* if we have increased size to skip free space in the end, set put
|
|
to beginning of cmd buffer (0) + size */
|
|
if (size != orig_size) {
|
|
e->off = 0;
|
|
e->gva = q->mem.gpu_va;
|
|
q->put = orig_size;
|
|
} else {
|
|
e->off = q->put;
|
|
e->gva = q->mem.gpu_va + q->put * sizeof(u32);
|
|
q->put = (q->put + orig_size) & (q->size - 1);
|
|
}
|
|
|
|
/* we already handled q->put + size > q->size so BUG_ON this */
|
|
BUG_ON(q->put > q->size);
|
|
|
|
*entry = e;
|
|
|
|
gk20a_dbg_fn("done");
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Don't call this to free an explict cmd entry.
|
|
* It doesn't update priv_cmd_queue get/put */
|
|
static void free_priv_cmdbuf(struct channel_gk20a *c,
|
|
struct priv_cmd_entry *e)
|
|
{
|
|
kfree(e);
|
|
}
|
|
|
|
int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
|
|
struct nvgpu_alloc_gpfifo_args *args)
|
|
{
|
|
struct gk20a *g = c->g;
|
|
struct device *d = dev_from_gk20a(g);
|
|
struct vm_gk20a *ch_vm;
|
|
u32 gpfifo_size;
|
|
int err = 0;
|
|
|
|
/* Kernel can insert one extra gpfifo entry before user submitted gpfifos
|
|
and another one after, for internal usage. Triple the requested size. */
|
|
gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
|
|
|
|
if (args->flags & NVGPU_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
|
|
c->vpr = true;
|
|
|
|
/* an address space needs to have been bound at this point. */
|
|
if (!gk20a_channel_as_bound(c)) {
|
|
gk20a_err(d,
|
|
"not bound to an address space at time of gpfifo"
|
|
" allocation.");
|
|
return -EINVAL;
|
|
}
|
|
ch_vm = c->vm;
|
|
|
|
c->cmds_pending = false;
|
|
mutex_lock(&c->last_submit.fence_lock);
|
|
gk20a_fence_put(c->last_submit.pre_fence);
|
|
gk20a_fence_put(c->last_submit.post_fence);
|
|
c->last_submit.pre_fence = NULL;
|
|
c->last_submit.post_fence = NULL;
|
|
mutex_unlock(&c->last_submit.fence_lock);
|
|
|
|
c->ramfc.offset = 0;
|
|
c->ramfc.size = ram_in_ramfc_s() / 8;
|
|
|
|
if (c->gpfifo.mem.cpu_va) {
|
|
gk20a_err(d, "channel %d :"
|
|
"gpfifo already allocated", c->hw_chid);
|
|
return -EEXIST;
|
|
}
|
|
|
|
err = gk20a_gmmu_alloc_map(ch_vm,
|
|
gpfifo_size * sizeof(struct nvgpu_gpfifo),
|
|
&c->gpfifo.mem);
|
|
if (err) {
|
|
gk20a_err(d, "%s: memory allocation failed\n", __func__);
|
|
goto clean_up;
|
|
}
|
|
|
|
c->gpfifo.entry_num = gpfifo_size;
|
|
c->gpfifo.get = c->gpfifo.put = 0;
|
|
|
|
gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
|
|
c->hw_chid, c->gpfifo.mem.gpu_va, c->gpfifo.entry_num);
|
|
|
|
channel_gk20a_setup_userd(c);
|
|
|
|
err = g->ops.fifo.setup_ramfc(c, c->gpfifo.mem.gpu_va,
|
|
c->gpfifo.entry_num, args->flags);
|
|
if (err)
|
|
goto clean_up_unmap;
|
|
|
|
/* TBD: setup engine contexts */
|
|
|
|
err = channel_gk20a_alloc_priv_cmdbuf(c);
|
|
if (err)
|
|
goto clean_up_unmap;
|
|
|
|
err = channel_gk20a_update_runlist(c, true);
|
|
if (err)
|
|
goto clean_up_unmap;
|
|
|
|
g->ops.fifo.bind_channel(c);
|
|
|
|
gk20a_dbg_fn("done");
|
|
return 0;
|
|
|
|
clean_up_unmap:
|
|
gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem);
|
|
clean_up:
|
|
memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
|
|
gk20a_err(d, "fail");
|
|
return err;
|
|
}
|
|
|
|
/* Update with this periodically to determine how the gpfifo is draining. */
|
|
static inline u32 update_gp_get(struct gk20a *g,
|
|
struct channel_gk20a *c)
|
|
{
|
|
u32 new_get = gk20a_bar1_readl(g,
|
|
c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
|
|
if (new_get < c->gpfifo.get)
|
|
c->gpfifo.wrap = !c->gpfifo.wrap;
|
|
c->gpfifo.get = new_get;
|
|
return new_get;
|
|
}
|
|
|
|
static inline u32 gp_free_count(struct channel_gk20a *c)
|
|
{
|
|
return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
|
|
c->gpfifo.entry_num;
|
|
}
|
|
|
|
bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
|
|
u32 timeout_delta_ms)
|
|
{
|
|
u32 gpfifo_get = update_gp_get(ch->g, ch);
|
|
/* Count consequent timeout isr */
|
|
if (gpfifo_get == ch->timeout_gpfifo_get) {
|
|
/* we didn't advance since previous channel timeout check */
|
|
ch->timeout_accumulated_ms += timeout_delta_ms;
|
|
} else {
|
|
/* first timeout isr encountered */
|
|
ch->timeout_accumulated_ms = timeout_delta_ms;
|
|
}
|
|
|
|
ch->timeout_gpfifo_get = gpfifo_get;
|
|
|
|
return ch->g->timeouts_enabled &&
|
|
ch->timeout_accumulated_ms > ch->timeout_ms_max;
|
|
}
|
|
|
|
static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch)
|
|
{
|
|
struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
|
|
return platform->ch_wdt_timeout_ms;
|
|
}
|
|
|
|
static u32 get_gp_free_count(struct channel_gk20a *c)
|
|
{
|
|
update_gp_get(c->g, c);
|
|
return gp_free_count(c);
|
|
}
|
|
|
|
static void trace_write_pushbuffer(struct channel_gk20a *c,
|
|
struct nvgpu_gpfifo *g)
|
|
{
|
|
void *mem = NULL;
|
|
unsigned int words;
|
|
u64 offset;
|
|
struct dma_buf *dmabuf = NULL;
|
|
|
|
if (gk20a_debug_trace_cmdbuf) {
|
|
u64 gpu_va = (u64)g->entry0 |
|
|
(u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
|
|
int err;
|
|
|
|
words = pbdma_gp_entry1_length_v(g->entry1);
|
|
err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
|
|
if (!err)
|
|
mem = dma_buf_vmap(dmabuf);
|
|
}
|
|
|
|
if (mem) {
|
|
u32 i;
|
|
/*
|
|
* Write in batches of 128 as there seems to be a limit
|
|
* of how much you can output to ftrace at once.
|
|
*/
|
|
for (i = 0; i < words; i += 128U) {
|
|
trace_gk20a_push_cmdbuf(
|
|
dev_name(c->g->dev),
|
|
0,
|
|
min(words - i, 128U),
|
|
offset + i * sizeof(u32),
|
|
mem);
|
|
}
|
|
dma_buf_vunmap(dmabuf, mem);
|
|
}
|
|
}
|
|
|
|
static void trace_write_pushbuffer_range(struct channel_gk20a *c,
|
|
struct nvgpu_gpfifo *g,
|
|
struct nvgpu_submit_gpfifo_args *args,
|
|
int offset,
|
|
int count)
|
|
{
|
|
u32 size;
|
|
int i;
|
|
struct nvgpu_gpfifo *gp;
|
|
bool gpfifo_allocated = false;
|
|
|
|
if (!gk20a_debug_trace_cmdbuf)
|
|
return;
|
|
|
|
if (!g && !args)
|
|
return;
|
|
|
|
if (!g) {
|
|
size = args->num_entries * sizeof(struct nvgpu_gpfifo);
|
|
if (size) {
|
|
g = nvgpu_alloc(size, false);
|
|
if (!g)
|
|
return;
|
|
|
|
if (copy_from_user(g,
|
|
(void __user *)(uintptr_t)args->gpfifo, size)) {
|
|
nvgpu_free(g);
|
|
return;
|
|
}
|
|
}
|
|
gpfifo_allocated = true;
|
|
}
|
|
|
|
gp = g + offset;
|
|
for (i = 0; i < count; i++, gp++)
|
|
trace_write_pushbuffer(c, gp);
|
|
|
|
if (gpfifo_allocated)
|
|
nvgpu_free(g);
|
|
}
|
|
|
|
static void gk20a_channel_timeout_start(struct channel_gk20a *ch,
|
|
struct channel_gk20a_job *job)
|
|
{
|
|
struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
|
|
|
|
if (!ch->g->timeouts_enabled || !platform->ch_wdt_timeout_ms)
|
|
return;
|
|
|
|
if (!ch->wdt_enabled)
|
|
return;
|
|
|
|
mutex_lock(&ch->timeout.lock);
|
|
|
|
if (ch->timeout.initialized) {
|
|
mutex_unlock(&ch->timeout.lock);
|
|
return;
|
|
}
|
|
|
|
ch->timeout.job = job;
|
|
ch->timeout.initialized = true;
|
|
schedule_delayed_work(&ch->timeout.wq,
|
|
msecs_to_jiffies(gk20a_get_channel_watchdog_timeout(ch)));
|
|
|
|
mutex_unlock(&ch->timeout.lock);
|
|
}
|
|
|
|
static void gk20a_channel_timeout_stop(struct channel_gk20a *ch)
|
|
{
|
|
mutex_lock(&ch->timeout.lock);
|
|
if (!ch->timeout.initialized) {
|
|
mutex_unlock(&ch->timeout.lock);
|
|
return;
|
|
}
|
|
mutex_unlock(&ch->timeout.lock);
|
|
|
|
cancel_delayed_work_sync(&ch->timeout.wq);
|
|
|
|
mutex_lock(&ch->timeout.lock);
|
|
ch->timeout.initialized = false;
|
|
mutex_unlock(&ch->timeout.lock);
|
|
}
|
|
|
|
void gk20a_channel_timeout_restart_all_channels(struct gk20a *g)
|
|
{
|
|
u32 chid;
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
|
|
for (chid = 0; chid < f->num_channels; chid++) {
|
|
struct channel_gk20a *ch = &f->channel[chid];
|
|
|
|
if (gk20a_channel_get(ch)) {
|
|
mutex_lock(&ch->timeout.lock);
|
|
if (!ch->timeout.initialized) {
|
|
mutex_unlock(&ch->timeout.lock);
|
|
gk20a_channel_put(ch);
|
|
continue;
|
|
}
|
|
mutex_unlock(&ch->timeout.lock);
|
|
|
|
cancel_delayed_work_sync(&ch->timeout.wq);
|
|
if (!ch->has_timedout)
|
|
schedule_delayed_work(&ch->timeout.wq,
|
|
msecs_to_jiffies(
|
|
gk20a_get_channel_watchdog_timeout(ch)));
|
|
|
|
gk20a_channel_put(ch);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void gk20a_channel_timeout_handler(struct work_struct *work)
|
|
{
|
|
struct channel_gk20a_job *job;
|
|
struct gk20a *g;
|
|
struct channel_gk20a *ch;
|
|
struct channel_gk20a *failing_ch;
|
|
u32 engine_id;
|
|
int id = -1;
|
|
bool is_tsg = false;
|
|
|
|
ch = container_of(to_delayed_work(work), struct channel_gk20a,
|
|
timeout.wq);
|
|
ch = gk20a_channel_get(ch);
|
|
if (!ch)
|
|
return;
|
|
|
|
g = ch->g;
|
|
|
|
/* Need global lock since multiple channels can timeout at a time */
|
|
mutex_lock(&g->ch_wdt_lock);
|
|
|
|
gk20a_debug_dump(g->dev);
|
|
gk20a_gr_debug_dump(g->dev);
|
|
|
|
|
|
/* Get timed out job and reset the timer */
|
|
mutex_lock(&ch->timeout.lock);
|
|
job = ch->timeout.job;
|
|
ch->timeout.initialized = false;
|
|
mutex_unlock(&ch->timeout.lock);
|
|
|
|
if (gr_gk20a_disable_ctxsw(g))
|
|
goto fail_unlock;
|
|
|
|
if (gk20a_fence_is_expired(job->post_fence))
|
|
goto fail_enable_ctxsw;
|
|
|
|
gk20a_err(dev_from_gk20a(g), "Job on channel %d timed out\n",
|
|
ch->hw_chid);
|
|
|
|
/* Get failing engine data */
|
|
engine_id = gk20a_fifo_get_failing_engine_data(g, &id, &is_tsg);
|
|
|
|
if (engine_id >= g->fifo.max_engines) {
|
|
/* If no failing engine, abort the channels */
|
|
if (gk20a_is_channel_marked_as_tsg(ch)) {
|
|
struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid];
|
|
|
|
gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg);
|
|
gk20a_fifo_abort_tsg(g, ch->tsgid, false);
|
|
} else {
|
|
gk20a_fifo_set_ctx_mmu_error_ch(g, ch);
|
|
gk20a_channel_abort(ch, false);
|
|
}
|
|
} else {
|
|
/* If failing engine, trigger recovery */
|
|
failing_ch = gk20a_channel_get(&g->fifo.channel[id]);
|
|
if (!failing_ch)
|
|
goto fail_enable_ctxsw;
|
|
|
|
if (failing_ch->hw_chid != ch->hw_chid) {
|
|
gk20a_channel_timeout_start(ch, job);
|
|
|
|
mutex_lock(&failing_ch->timeout.lock);
|
|
failing_ch->timeout.initialized = false;
|
|
mutex_unlock(&failing_ch->timeout.lock);
|
|
}
|
|
|
|
gk20a_fifo_recover(g, BIT(engine_id),
|
|
failing_ch->hw_chid, is_tsg,
|
|
true, failing_ch->timeout_debug_dump);
|
|
|
|
gk20a_channel_put(failing_ch);
|
|
}
|
|
|
|
fail_enable_ctxsw:
|
|
gr_gk20a_enable_ctxsw(g);
|
|
fail_unlock:
|
|
mutex_unlock(&g->ch_wdt_lock);
|
|
gk20a_channel_put(ch);
|
|
}
|
|
|
|
static int gk20a_free_priv_cmdbuf(struct channel_gk20a *c,
|
|
struct priv_cmd_entry *e)
|
|
{
|
|
struct priv_cmd_queue *q = &c->priv_cmd_q;
|
|
struct device *d = dev_from_gk20a(c->g);
|
|
|
|
if (!e)
|
|
return 0;
|
|
|
|
if ((q->get != e->off) && e->off != 0)
|
|
gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid);
|
|
|
|
q->get = e->off + e->size;
|
|
free_priv_cmdbuf(c, e);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void gk20a_channel_schedule_job_clean_up(struct channel_gk20a *c)
|
|
{
|
|
mutex_lock(&c->clean_up.lock);
|
|
|
|
if (c->clean_up.scheduled) {
|
|
mutex_unlock(&c->clean_up.lock);
|
|
return;
|
|
}
|
|
|
|
c->clean_up.scheduled = true;
|
|
schedule_delayed_work(&c->clean_up.wq, 1);
|
|
|
|
mutex_unlock(&c->clean_up.lock);
|
|
}
|
|
|
|
static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,
|
|
bool wait_for_completion)
|
|
{
|
|
if (wait_for_completion)
|
|
cancel_delayed_work_sync(&c->clean_up.wq);
|
|
|
|
mutex_lock(&c->clean_up.lock);
|
|
c->clean_up.scheduled = false;
|
|
mutex_unlock(&c->clean_up.lock);
|
|
}
|
|
|
|
static int gk20a_channel_add_job(struct channel_gk20a *c,
|
|
struct gk20a_fence *pre_fence,
|
|
struct gk20a_fence *post_fence,
|
|
struct priv_cmd_entry *wait_cmd,
|
|
struct priv_cmd_entry *incr_cmd,
|
|
bool skip_buffer_refcounting)
|
|
{
|
|
struct vm_gk20a *vm = c->vm;
|
|
struct channel_gk20a_job *job = NULL;
|
|
struct mapped_buffer_node **mapped_buffers = NULL;
|
|
int err = 0, num_mapped_buffers = 0;
|
|
|
|
/* job needs reference to this vm (released in channel_update) */
|
|
gk20a_vm_get(vm);
|
|
|
|
if (!skip_buffer_refcounting) {
|
|
err = gk20a_vm_get_buffers(vm, &mapped_buffers,
|
|
&num_mapped_buffers);
|
|
if (err)
|
|
goto err_put_vm;
|
|
}
|
|
|
|
job = kzalloc(sizeof(*job), GFP_KERNEL);
|
|
if (!job) {
|
|
err = -ENOMEM;
|
|
goto err_put_buffers;
|
|
}
|
|
|
|
/* put() is done in gk20a_channel_update() when the job is done */
|
|
c = gk20a_channel_get(c);
|
|
|
|
if (c) {
|
|
job->num_mapped_buffers = num_mapped_buffers;
|
|
job->mapped_buffers = mapped_buffers;
|
|
job->pre_fence = gk20a_fence_get(pre_fence);
|
|
job->post_fence = gk20a_fence_get(post_fence);
|
|
job->wait_cmd = wait_cmd;
|
|
job->incr_cmd = incr_cmd;
|
|
|
|
gk20a_channel_timeout_start(c, job);
|
|
|
|
mutex_lock(&c->jobs_lock);
|
|
list_add_tail(&job->list, &c->jobs);
|
|
mutex_unlock(&c->jobs_lock);
|
|
} else {
|
|
err = -ETIMEDOUT;
|
|
goto err_free_job;
|
|
}
|
|
|
|
return 0;
|
|
|
|
err_free_job:
|
|
kfree(job);
|
|
err_put_buffers:
|
|
gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
|
|
err_put_vm:
|
|
gk20a_vm_put(vm);
|
|
|
|
return err;
|
|
}
|
|
|
|
static void gk20a_channel_clean_up_jobs(struct work_struct *work)
|
|
{
|
|
struct channel_gk20a *c = container_of(to_delayed_work(work),
|
|
struct channel_gk20a, clean_up.wq);
|
|
struct vm_gk20a *vm;
|
|
struct channel_gk20a_job *job;
|
|
struct gk20a_platform *platform;
|
|
struct gk20a *g;
|
|
int job_finished = 0;
|
|
|
|
c = gk20a_channel_get(c);
|
|
if (!c)
|
|
return;
|
|
|
|
if (!c->g->power_on) { /* shutdown case */
|
|
gk20a_channel_put(c);
|
|
return;
|
|
}
|
|
|
|
vm = c->vm;
|
|
g = c->g;
|
|
platform = gk20a_get_platform(g->dev);
|
|
|
|
gk20a_channel_cancel_job_clean_up(c, false);
|
|
|
|
while (1) {
|
|
bool completed;
|
|
|
|
mutex_lock(&c->jobs_lock);
|
|
if (list_empty(&c->jobs)) {
|
|
mutex_unlock(&c->jobs_lock);
|
|
break;
|
|
}
|
|
job = list_first_entry(&c->jobs,
|
|
struct channel_gk20a_job, list);
|
|
mutex_unlock(&c->jobs_lock);
|
|
|
|
completed = gk20a_fence_is_expired(job->post_fence);
|
|
if (!completed) {
|
|
gk20a_channel_timeout_start(c, job);
|
|
break;
|
|
}
|
|
|
|
gk20a_channel_timeout_stop(c);
|
|
|
|
mutex_lock(&c->sync_lock);
|
|
if (c->sync) {
|
|
c->sync->signal_timeline(c->sync);
|
|
if (atomic_dec_and_test(&c->sync->refcount) &&
|
|
platform->aggressive_sync_destroy) {
|
|
gk20a_channel_sync_destroy(c->sync);
|
|
c->sync = NULL;
|
|
}
|
|
} else {
|
|
WARN_ON(1);
|
|
}
|
|
mutex_unlock(&c->sync_lock);
|
|
|
|
if (job->num_mapped_buffers)
|
|
gk20a_vm_put_buffers(vm, job->mapped_buffers,
|
|
job->num_mapped_buffers);
|
|
|
|
/* Close the fences (this will unref the semaphores and release
|
|
* them to the pool). */
|
|
gk20a_fence_put(job->pre_fence);
|
|
gk20a_fence_put(job->post_fence);
|
|
|
|
/* Free the private command buffers (wait_cmd first and
|
|
* then incr_cmd i.e. order of allocation) */
|
|
gk20a_free_priv_cmdbuf(c, job->wait_cmd);
|
|
gk20a_free_priv_cmdbuf(c, job->incr_cmd);
|
|
|
|
/* job is done. release its vm reference (taken in add_job) */
|
|
gk20a_vm_put(vm);
|
|
/* another bookkeeping taken in add_job. caller must hold a ref
|
|
* so this wouldn't get freed here. */
|
|
gk20a_channel_put(c);
|
|
|
|
mutex_lock(&c->jobs_lock);
|
|
list_del_init(&job->list);
|
|
mutex_unlock(&c->jobs_lock);
|
|
|
|
kfree(job);
|
|
job_finished = 1;
|
|
gk20a_idle(g->dev);
|
|
}
|
|
|
|
if (job_finished && c->update_fn)
|
|
schedule_work(&c->update_fn_work);
|
|
|
|
gk20a_channel_put(c);
|
|
}
|
|
|
|
void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
|
|
{
|
|
c = gk20a_channel_get(c);
|
|
if (!c)
|
|
return;
|
|
|
|
if (!c->g->power_on) { /* shutdown case */
|
|
gk20a_channel_put(c);
|
|
return;
|
|
}
|
|
|
|
update_gp_get(c->g, c);
|
|
wake_up(&c->submit_wq);
|
|
|
|
trace_gk20a_channel_update(c->hw_chid);
|
|
gk20a_channel_schedule_job_clean_up(c);
|
|
|
|
gk20a_channel_put(c);
|
|
}
|
|
|
|
int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
|
struct nvgpu_gpfifo *gpfifo,
|
|
struct nvgpu_submit_gpfifo_args *args,
|
|
u32 num_entries,
|
|
u32 flags,
|
|
struct nvgpu_fence *fence,
|
|
struct gk20a_fence **fence_out,
|
|
bool force_need_sync_fence)
|
|
{
|
|
struct gk20a *g = c->g;
|
|
struct device *d = dev_from_gk20a(g);
|
|
int err = 0;
|
|
int start, end;
|
|
int wait_fence_fd = -1;
|
|
struct priv_cmd_entry *wait_cmd = NULL;
|
|
struct priv_cmd_entry *incr_cmd = NULL;
|
|
struct gk20a_fence *pre_fence = NULL;
|
|
struct gk20a_fence *post_fence = NULL;
|
|
/* we might need two extra gpfifo entries - one for pre fence
|
|
* and one for post fence. */
|
|
const int extra_entries = 2;
|
|
bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
|
|
struct nvgpu_gpfifo *gpfifo_mem = c->gpfifo.mem.cpu_va;
|
|
bool skip_buffer_refcounting = (flags &
|
|
NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
|
|
bool need_sync_fence = false;
|
|
bool new_sync_created = false;
|
|
|
|
/*
|
|
* If user wants to allocate sync_fence_fd always, then respect that;
|
|
* otherwise, allocate sync_fence_fd based on user flags only
|
|
*/
|
|
if (force_need_sync_fence)
|
|
need_sync_fence = true;
|
|
|
|
if (c->has_timedout)
|
|
return -ETIMEDOUT;
|
|
|
|
/* fifo not large enough for request. Return error immediately.
|
|
* Kernel can insert gpfifo entries before and after user gpfifos.
|
|
* So, add extra_entries in user request. Also, HW with fifo size N
|
|
* can accept only N-1 entreis and so the below condition */
|
|
if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) {
|
|
gk20a_err(d, "not enough gpfifo space allocated");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
if (!gpfifo && !args)
|
|
return -EINVAL;
|
|
|
|
if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
|
|
NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
|
|
!fence)
|
|
return -EINVAL;
|
|
|
|
/* an address space needs to have been bound at this point. */
|
|
if (!gk20a_channel_as_bound(c)) {
|
|
gk20a_err(d,
|
|
"not bound to an address space at time of gpfifo"
|
|
" submission.");
|
|
return -EINVAL;
|
|
}
|
|
|
|
#ifdef CONFIG_DEBUG_FS
|
|
/* update debug settings */
|
|
if (g->ops.ltc.sync_debugfs)
|
|
g->ops.ltc.sync_debugfs(g);
|
|
#endif
|
|
|
|
gk20a_dbg_info("channel %d", c->hw_chid);
|
|
|
|
/* gk20a_channel_update releases this ref. */
|
|
err = gk20a_busy(g->dev);
|
|
if (err) {
|
|
gk20a_err(d, "failed to host gk20a to submit gpfifo");
|
|
return err;
|
|
}
|
|
|
|
trace_gk20a_channel_submit_gpfifo(dev_name(c->g->dev),
|
|
c->hw_chid,
|
|
num_entries,
|
|
flags,
|
|
fence ? fence->id : 0,
|
|
fence ? fence->value : 0);
|
|
|
|
gk20a_dbg_info("pre-submit put %d, get %d, size %d",
|
|
c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
|
|
|
|
/* Make sure we have enough space for gpfifo entries. If not,
|
|
* wait for signals from completed submits */
|
|
if (gp_free_count(c) < num_entries + extra_entries) {
|
|
/* we can get here via locked ioctl and other paths too */
|
|
int locked_path = mutex_is_locked(&c->ioctl_lock);
|
|
if (locked_path)
|
|
mutex_unlock(&c->ioctl_lock);
|
|
|
|
trace_gk20a_gpfifo_submit_wait_for_space(dev_name(c->g->dev));
|
|
err = wait_event_interruptible(c->submit_wq,
|
|
get_gp_free_count(c) >= num_entries + extra_entries ||
|
|
c->has_timedout);
|
|
trace_gk20a_gpfifo_submit_wait_for_space_done(dev_name(c->g->dev));
|
|
|
|
if (locked_path)
|
|
mutex_lock(&c->ioctl_lock);
|
|
}
|
|
|
|
if (c->has_timedout) {
|
|
err = -ETIMEDOUT;
|
|
goto clean_up;
|
|
}
|
|
|
|
if (err) {
|
|
err = -ENOSPC;
|
|
goto clean_up;
|
|
}
|
|
|
|
mutex_lock(&c->sync_lock);
|
|
if (!c->sync) {
|
|
c->sync = gk20a_channel_sync_create(c);
|
|
if (!c->sync) {
|
|
err = -ENOMEM;
|
|
mutex_unlock(&c->sync_lock);
|
|
goto clean_up;
|
|
}
|
|
new_sync_created = true;
|
|
}
|
|
atomic_inc(&c->sync->refcount);
|
|
mutex_unlock(&c->sync_lock);
|
|
|
|
if (g->ops.fifo.resetup_ramfc && new_sync_created) {
|
|
err = g->ops.fifo.resetup_ramfc(c);
|
|
if (err) {
|
|
goto clean_up;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* optionally insert syncpt wait in the beginning of gpfifo submission
|
|
* when user requested and the wait hasn't expired.
|
|
* validate that the id makes sense, elide if not
|
|
* the only reason this isn't being unceremoniously killed is to
|
|
* keep running some tests which trigger this condition
|
|
*/
|
|
if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
|
|
if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
|
|
wait_fence_fd = fence->id;
|
|
err = c->sync->wait_fd(c->sync, wait_fence_fd,
|
|
&wait_cmd, &pre_fence);
|
|
} else {
|
|
err = c->sync->wait_syncpt(c->sync, fence->id,
|
|
fence->value, &wait_cmd, &pre_fence);
|
|
}
|
|
}
|
|
if (err) {
|
|
goto clean_up;
|
|
}
|
|
|
|
if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) &&
|
|
(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE))
|
|
need_sync_fence = true;
|
|
|
|
/* always insert syncpt increment at end of gpfifo submission
|
|
to keep track of method completion for idle railgating */
|
|
if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
|
|
err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd,
|
|
&post_fence, need_wfi, need_sync_fence);
|
|
else
|
|
err = c->sync->incr(c->sync, &incr_cmd,
|
|
&post_fence, need_sync_fence);
|
|
if (err) {
|
|
goto clean_up;
|
|
}
|
|
|
|
if (wait_cmd) {
|
|
gpfifo_mem[c->gpfifo.put].entry0 = u64_lo32(wait_cmd->gva);
|
|
gpfifo_mem[c->gpfifo.put].entry1 = u64_hi32(wait_cmd->gva) |
|
|
pbdma_gp_entry1_length_f(wait_cmd->size);
|
|
trace_gk20a_push_cmdbuf(dev_name(c->g->dev),
|
|
0, wait_cmd->size, 0,
|
|
wait_cmd->mem->cpu_va + wait_cmd->off *
|
|
sizeof(u32));
|
|
|
|
c->gpfifo.put = (c->gpfifo.put + 1) &
|
|
(c->gpfifo.entry_num - 1);
|
|
|
|
/* save gp_put */
|
|
wait_cmd->gp_put = c->gpfifo.put;
|
|
}
|
|
|
|
/*
|
|
* Copy source gpfifo entries into the gpfifo ring buffer,
|
|
* potentially splitting into two memcpies to handle the
|
|
* ring buffer wrap-around case.
|
|
*/
|
|
start = c->gpfifo.put;
|
|
end = start + num_entries;
|
|
|
|
if (gpfifo) {
|
|
if (end > c->gpfifo.entry_num) {
|
|
int length0 = c->gpfifo.entry_num - start;
|
|
int length1 = num_entries - length0;
|
|
|
|
memcpy(gpfifo_mem + start, gpfifo,
|
|
length0 * sizeof(*gpfifo));
|
|
|
|
memcpy(gpfifo_mem, gpfifo + length0,
|
|
length1 * sizeof(*gpfifo));
|
|
|
|
trace_write_pushbuffer_range(c, gpfifo, NULL,
|
|
0, length0);
|
|
trace_write_pushbuffer_range(c, gpfifo, NULL,
|
|
length0, length1);
|
|
} else {
|
|
memcpy(gpfifo_mem + start, gpfifo,
|
|
num_entries * sizeof(*gpfifo));
|
|
|
|
trace_write_pushbuffer_range(c, gpfifo, NULL,
|
|
0, num_entries);
|
|
}
|
|
} else {
|
|
struct nvgpu_gpfifo __user *user_gpfifo =
|
|
(struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo;
|
|
if (end > c->gpfifo.entry_num) {
|
|
int length0 = c->gpfifo.entry_num - start;
|
|
int length1 = num_entries - length0;
|
|
|
|
err = copy_from_user(gpfifo_mem + start,
|
|
user_gpfifo,
|
|
length0 * sizeof(*user_gpfifo));
|
|
if (err) {
|
|
goto clean_up;
|
|
}
|
|
|
|
err = copy_from_user(gpfifo_mem,
|
|
user_gpfifo + length0,
|
|
length1 * sizeof(*user_gpfifo));
|
|
if (err) {
|
|
goto clean_up;
|
|
}
|
|
|
|
trace_write_pushbuffer_range(c, NULL, args,
|
|
0, length0);
|
|
trace_write_pushbuffer_range(c, NULL, args,
|
|
length0, length1);
|
|
} else {
|
|
err = copy_from_user(gpfifo_mem + start,
|
|
user_gpfifo,
|
|
num_entries * sizeof(*user_gpfifo));
|
|
if (err) {
|
|
goto clean_up;
|
|
}
|
|
|
|
trace_write_pushbuffer_range(c, NULL, args,
|
|
0, num_entries);
|
|
}
|
|
}
|
|
|
|
c->gpfifo.put = (c->gpfifo.put + num_entries) &
|
|
(c->gpfifo.entry_num - 1);
|
|
|
|
if (incr_cmd) {
|
|
gpfifo_mem[c->gpfifo.put].entry0 = u64_lo32(incr_cmd->gva);
|
|
gpfifo_mem[c->gpfifo.put].entry1 = u64_hi32(incr_cmd->gva) |
|
|
pbdma_gp_entry1_length_f(incr_cmd->size);
|
|
trace_gk20a_push_cmdbuf(dev_name(c->g->dev),
|
|
0, incr_cmd->size, 0,
|
|
incr_cmd->mem->cpu_va + incr_cmd->off *
|
|
sizeof(u32));
|
|
|
|
c->gpfifo.put = (c->gpfifo.put + 1) &
|
|
(c->gpfifo.entry_num - 1);
|
|
|
|
/* save gp_put */
|
|
incr_cmd->gp_put = c->gpfifo.put;
|
|
}
|
|
|
|
mutex_lock(&c->last_submit.fence_lock);
|
|
gk20a_fence_put(c->last_submit.pre_fence);
|
|
gk20a_fence_put(c->last_submit.post_fence);
|
|
c->last_submit.pre_fence = pre_fence;
|
|
c->last_submit.post_fence = post_fence;
|
|
if (fence_out)
|
|
*fence_out = gk20a_fence_get(post_fence);
|
|
mutex_unlock(&c->last_submit.fence_lock);
|
|
|
|
/* TODO! Check for errors... */
|
|
gk20a_channel_add_job(c, pre_fence, post_fence,
|
|
wait_cmd, incr_cmd,
|
|
skip_buffer_refcounting);
|
|
|
|
c->cmds_pending = true;
|
|
gk20a_bar1_writel(g,
|
|
c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
|
|
c->gpfifo.put);
|
|
|
|
trace_gk20a_channel_submitted_gpfifo(dev_name(c->g->dev),
|
|
c->hw_chid,
|
|
num_entries,
|
|
flags,
|
|
post_fence->syncpt_id,
|
|
post_fence->syncpt_value);
|
|
|
|
gk20a_dbg_info("post-submit put %d, get %d, size %d",
|
|
c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
|
|
|
|
gk20a_dbg_fn("done");
|
|
return err;
|
|
|
|
clean_up:
|
|
gk20a_dbg_fn("fail");
|
|
free_priv_cmdbuf(c, wait_cmd);
|
|
free_priv_cmdbuf(c, incr_cmd);
|
|
gk20a_fence_put(pre_fence);
|
|
gk20a_fence_put(post_fence);
|
|
gk20a_idle(g->dev);
|
|
return err;
|
|
}
|
|
|
|
int gk20a_init_channel_support(struct gk20a *g, u32 chid)
|
|
{
|
|
struct channel_gk20a *c = g->fifo.channel+chid;
|
|
c->g = NULL;
|
|
c->hw_chid = chid;
|
|
c->bound = false;
|
|
spin_lock_init(&c->ref_obtain_lock);
|
|
atomic_set(&c->ref_count, 0);
|
|
c->referenceable = false;
|
|
init_waitqueue_head(&c->ref_count_dec_wq);
|
|
mutex_init(&c->ioctl_lock);
|
|
mutex_init(&c->jobs_lock);
|
|
mutex_init(&c->last_submit.fence_lock);
|
|
mutex_init(&c->timeout.lock);
|
|
mutex_init(&c->sync_lock);
|
|
INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler);
|
|
INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_jobs);
|
|
mutex_init(&c->clean_up.lock);
|
|
INIT_LIST_HEAD(&c->jobs);
|
|
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
|
mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
|
|
mutex_init(&c->cs_client_mutex);
|
|
#endif
|
|
INIT_LIST_HEAD(&c->dbg_s_list);
|
|
INIT_LIST_HEAD(&c->event_id_list);
|
|
mutex_init(&c->event_id_list_lock);
|
|
mutex_init(&c->dbg_s_lock);
|
|
list_add(&c->free_chs, &g->fifo.free_chs);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
|
|
{
|
|
int err = 0;
|
|
struct gk20a_fence *fence;
|
|
|
|
if (!ch->cmds_pending)
|
|
return 0;
|
|
|
|
mutex_lock(&ch->last_submit.fence_lock);
|
|
fence = ch->last_submit.post_fence;
|
|
if (!fence) {
|
|
mutex_unlock(&ch->last_submit.fence_lock);
|
|
return -EINVAL;
|
|
}
|
|
mutex_unlock(&ch->last_submit.fence_lock);
|
|
|
|
/* Do not wait for a timedout channel */
|
|
if (ch->has_timedout)
|
|
return -ETIMEDOUT;
|
|
|
|
gk20a_dbg_fn("waiting for channel to finish thresh:%d sema:%p",
|
|
fence->syncpt_value, fence->semaphore);
|
|
|
|
err = gk20a_fence_wait(fence, timeout);
|
|
if (WARN_ON(err))
|
|
dev_warn(dev_from_gk20a(ch->g),
|
|
"timed out waiting for gk20a channel to finish");
|
|
else
|
|
ch->cmds_pending = false;
|
|
|
|
return err;
|
|
}
|
|
|
|
static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
|
|
ulong id, u32 offset,
|
|
u32 payload, long timeout)
|
|
{
|
|
struct device *dev = ch->g->dev;
|
|
struct dma_buf *dmabuf;
|
|
void *data;
|
|
u32 *semaphore;
|
|
int ret = 0;
|
|
long remain;
|
|
|
|
/* do not wait if channel has timed out */
|
|
if (ch->has_timedout)
|
|
return -ETIMEDOUT;
|
|
|
|
dmabuf = dma_buf_get(id);
|
|
if (IS_ERR(dmabuf)) {
|
|
gk20a_err(dev, "invalid notifier nvmap handle 0x%lx", id);
|
|
return -EINVAL;
|
|
}
|
|
|
|
data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
|
|
if (!data) {
|
|
gk20a_err(dev, "failed to map notifier memory");
|
|
ret = -EINVAL;
|
|
goto cleanup_put;
|
|
}
|
|
|
|
semaphore = data + (offset & ~PAGE_MASK);
|
|
|
|
remain = wait_event_interruptible_timeout(
|
|
ch->semaphore_wq,
|
|
*semaphore == payload || ch->has_timedout,
|
|
timeout);
|
|
|
|
if (remain == 0 && *semaphore != payload)
|
|
ret = -ETIMEDOUT;
|
|
else if (remain < 0)
|
|
ret = remain;
|
|
|
|
dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
|
|
cleanup_put:
|
|
dma_buf_put(dmabuf);
|
|
return ret;
|
|
}
|
|
|
|
static int gk20a_channel_wait(struct channel_gk20a *ch,
|
|
struct nvgpu_wait_args *args)
|
|
{
|
|
struct device *d = dev_from_gk20a(ch->g);
|
|
struct dma_buf *dmabuf;
|
|
struct notification *notif;
|
|
struct timespec tv;
|
|
u64 jiffies;
|
|
ulong id;
|
|
u32 offset;
|
|
unsigned long timeout;
|
|
int remain, ret = 0;
|
|
u64 end;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
if (ch->has_timedout)
|
|
return -ETIMEDOUT;
|
|
|
|
if (args->timeout == NVGPU_NO_TIMEOUT)
|
|
timeout = MAX_SCHEDULE_TIMEOUT;
|
|
else
|
|
timeout = (u32)msecs_to_jiffies(args->timeout);
|
|
|
|
switch (args->type) {
|
|
case NVGPU_WAIT_TYPE_NOTIFIER:
|
|
id = args->condition.notifier.dmabuf_fd;
|
|
offset = args->condition.notifier.offset;
|
|
end = offset + sizeof(struct notification);
|
|
|
|
dmabuf = dma_buf_get(id);
|
|
if (IS_ERR(dmabuf)) {
|
|
gk20a_err(d, "invalid notifier nvmap handle 0x%lx",
|
|
id);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (end > dmabuf->size || end < sizeof(struct notification)) {
|
|
dma_buf_put(dmabuf);
|
|
gk20a_err(d, "invalid notifier offset\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
notif = dma_buf_vmap(dmabuf);
|
|
if (!notif) {
|
|
gk20a_err(d, "failed to map notifier memory");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
notif = (struct notification *)((uintptr_t)notif + offset);
|
|
|
|
/* user should set status pending before
|
|
* calling this ioctl */
|
|
remain = wait_event_interruptible_timeout(
|
|
ch->notifier_wq,
|
|
notif->status == 0 || ch->has_timedout,
|
|
timeout);
|
|
|
|
if (remain == 0 && notif->status != 0) {
|
|
ret = -ETIMEDOUT;
|
|
goto notif_clean_up;
|
|
} else if (remain < 0) {
|
|
ret = -EINTR;
|
|
goto notif_clean_up;
|
|
}
|
|
|
|
/* TBD: fill in correct information */
|
|
jiffies = get_jiffies_64();
|
|
jiffies_to_timespec(jiffies, &tv);
|
|
notif->timestamp.nanoseconds[0] = tv.tv_nsec;
|
|
notif->timestamp.nanoseconds[1] = tv.tv_sec;
|
|
notif->info32 = 0xDEADBEEF; /* should be object name */
|
|
notif->info16 = ch->hw_chid; /* should be method offset */
|
|
|
|
notif_clean_up:
|
|
dma_buf_vunmap(dmabuf, notif);
|
|
return ret;
|
|
|
|
case NVGPU_WAIT_TYPE_SEMAPHORE:
|
|
ret = gk20a_channel_wait_semaphore(ch,
|
|
args->condition.semaphore.dmabuf_fd,
|
|
args->condition.semaphore.offset,
|
|
args->condition.semaphore.payload,
|
|
timeout);
|
|
|
|
break;
|
|
|
|
default:
|
|
ret = -EINVAL;
|
|
break;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static unsigned int gk20a_event_id_poll(struct file *filep, poll_table *wait)
|
|
{
|
|
unsigned int mask = 0;
|
|
struct gk20a_event_id_data *event_id_data = filep->private_data;
|
|
struct gk20a *g = event_id_data->g;
|
|
u32 event_id = event_id_data->event_id;
|
|
|
|
gk20a_dbg(gpu_dbg_fn | gpu_dbg_info, "");
|
|
|
|
poll_wait(filep, &event_id_data->event_id_wq, wait);
|
|
|
|
mutex_lock(&event_id_data->lock);
|
|
|
|
if (event_id_data->is_tsg) {
|
|
struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id;
|
|
|
|
gk20a_dbg_info(
|
|
"found pending event_id=%d on TSG=%d\n",
|
|
event_id, tsg->tsgid);
|
|
} else {
|
|
struct channel_gk20a *ch = g->fifo.channel
|
|
+ event_id_data->id;
|
|
|
|
gk20a_dbg_info(
|
|
"found pending event_id=%d on chid=%d\n",
|
|
event_id, ch->hw_chid);
|
|
}
|
|
mask = (POLLPRI | POLLIN);
|
|
|
|
mutex_unlock(&event_id_data->lock);
|
|
|
|
return mask;
|
|
}
|
|
|
|
static int gk20a_event_id_release(struct inode *inode, struct file *filp)
|
|
{
|
|
struct gk20a_event_id_data *event_id_data = filp->private_data;
|
|
struct gk20a *g = event_id_data->g;
|
|
|
|
if (event_id_data->is_tsg) {
|
|
struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id;
|
|
|
|
mutex_lock(&tsg->event_id_list_lock);
|
|
list_del_init(&event_id_data->event_id_node);
|
|
mutex_unlock(&tsg->event_id_list_lock);
|
|
} else {
|
|
struct channel_gk20a *ch = g->fifo.channel + event_id_data->id;
|
|
|
|
mutex_lock(&ch->event_id_list_lock);
|
|
list_del_init(&event_id_data->event_id_node);
|
|
mutex_unlock(&ch->event_id_list_lock);
|
|
}
|
|
|
|
kfree(event_id_data);
|
|
filp->private_data = NULL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
const struct file_operations gk20a_event_id_ops = {
|
|
.owner = THIS_MODULE,
|
|
.poll = gk20a_event_id_poll,
|
|
.release = gk20a_event_id_release,
|
|
};
|
|
|
|
static int gk20a_channel_get_event_data_from_id(struct channel_gk20a *ch,
|
|
int event_id,
|
|
struct gk20a_event_id_data **event_id_data)
|
|
{
|
|
struct gk20a_event_id_data *local_event_id_data;
|
|
bool event_found = false;
|
|
|
|
mutex_lock(&ch->event_id_list_lock);
|
|
list_for_each_entry(local_event_id_data, &ch->event_id_list,
|
|
event_id_node) {
|
|
if (local_event_id_data->event_id == event_id) {
|
|
event_found = true;
|
|
break;
|
|
}
|
|
}
|
|
mutex_unlock(&ch->event_id_list_lock);
|
|
|
|
if (event_found) {
|
|
*event_id_data = local_event_id_data;
|
|
return 0;
|
|
} else {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
void gk20a_channel_event_id_post_event(struct channel_gk20a *ch,
|
|
int event_id)
|
|
{
|
|
struct gk20a_event_id_data *event_id_data;
|
|
int err = 0;
|
|
|
|
err = gk20a_channel_get_event_data_from_id(ch, event_id,
|
|
&event_id_data);
|
|
if (err)
|
|
return;
|
|
|
|
mutex_lock(&event_id_data->lock);
|
|
|
|
gk20a_dbg_info(
|
|
"posting event for event_id=%d on ch=%d\n",
|
|
event_id, ch->hw_chid);
|
|
|
|
wake_up_interruptible_all(&event_id_data->event_id_wq);
|
|
|
|
mutex_unlock(&event_id_data->lock);
|
|
}
|
|
|
|
static int gk20a_channel_event_id_enable(struct channel_gk20a *ch,
|
|
int event_id,
|
|
int *fd)
|
|
{
|
|
int err = 0;
|
|
int local_fd;
|
|
struct file *file;
|
|
char *name;
|
|
struct gk20a_event_id_data *event_id_data;
|
|
|
|
err = gk20a_channel_get_event_data_from_id(ch,
|
|
event_id, &event_id_data);
|
|
if (err == 0) /* We already have event enabled */
|
|
return -EINVAL;
|
|
|
|
err = get_unused_fd_flags(O_RDWR);
|
|
if (err < 0)
|
|
return err;
|
|
local_fd = err;
|
|
|
|
name = kasprintf(GFP_KERNEL, "nvgpu-event%d-fd%d",
|
|
event_id, local_fd);
|
|
|
|
file = anon_inode_getfile(name, &gk20a_event_id_ops,
|
|
NULL, O_RDWR);
|
|
kfree(name);
|
|
if (IS_ERR(file)) {
|
|
err = PTR_ERR(file);
|
|
goto clean_up;
|
|
}
|
|
|
|
event_id_data = kzalloc(sizeof(*event_id_data), GFP_KERNEL);
|
|
if (!event_id_data) {
|
|
err = -ENOMEM;
|
|
goto clean_up_file;
|
|
}
|
|
event_id_data->g = ch->g;
|
|
event_id_data->id = ch->hw_chid;
|
|
event_id_data->is_tsg = false;
|
|
event_id_data->event_id = event_id;
|
|
|
|
init_waitqueue_head(&event_id_data->event_id_wq);
|
|
mutex_init(&event_id_data->lock);
|
|
INIT_LIST_HEAD(&event_id_data->event_id_node);
|
|
|
|
mutex_lock(&ch->event_id_list_lock);
|
|
list_add_tail(&event_id_data->event_id_node, &ch->event_id_list);
|
|
mutex_unlock(&ch->event_id_list_lock);
|
|
|
|
fd_install(local_fd, file);
|
|
file->private_data = event_id_data;
|
|
|
|
*fd = local_fd;
|
|
|
|
return 0;
|
|
|
|
clean_up_file:
|
|
fput(file);
|
|
clean_up:
|
|
put_unused_fd(local_fd);
|
|
return err;
|
|
}
|
|
|
|
static int gk20a_channel_event_id_ctrl(struct channel_gk20a *ch,
|
|
struct nvgpu_event_id_ctrl_args *args)
|
|
{
|
|
int err = 0;
|
|
int fd = -1;
|
|
|
|
if (args->event_id < 0 ||
|
|
args->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX)
|
|
return -EINVAL;
|
|
|
|
if (gk20a_is_channel_marked_as_tsg(ch))
|
|
return -EINVAL;
|
|
|
|
switch (args->cmd) {
|
|
case NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE:
|
|
err = gk20a_channel_event_id_enable(ch, args->event_id, &fd);
|
|
if (!err)
|
|
args->event_fd = fd;
|
|
break;
|
|
|
|
default:
|
|
gk20a_err(dev_from_gk20a(ch->g),
|
|
"unrecognized channel event id cmd: 0x%x",
|
|
args->cmd);
|
|
err = -EINVAL;
|
|
break;
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority)
|
|
{
|
|
if (gk20a_is_channel_marked_as_tsg(ch)) {
|
|
gk20a_err(dev_from_gk20a(ch->g),
|
|
"invalid operation for TSG!\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* set priority of graphics channel */
|
|
switch (priority) {
|
|
case NVGPU_PRIORITY_LOW:
|
|
ch->timeslice_us = ch->g->timeslice_low_priority_us;
|
|
break;
|
|
case NVGPU_PRIORITY_MEDIUM:
|
|
ch->timeslice_us = ch->g->timeslice_medium_priority_us;
|
|
break;
|
|
case NVGPU_PRIORITY_HIGH:
|
|
ch->timeslice_us = ch->g->timeslice_high_priority_us;
|
|
break;
|
|
default:
|
|
pr_err("Unsupported priority");
|
|
return -EINVAL;
|
|
}
|
|
|
|
return channel_gk20a_set_schedule_params(ch);
|
|
}
|
|
|
|
int gk20a_channel_set_timeslice(struct channel_gk20a *ch, u32 timeslice)
|
|
{
|
|
if (gk20a_is_channel_marked_as_tsg(ch)) {
|
|
gk20a_err(dev_from_gk20a(ch->g),
|
|
"invalid operation for TSG!\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (timeslice < NVGPU_CHANNEL_MIN_TIMESLICE_US ||
|
|
timeslice > NVGPU_CHANNEL_MAX_TIMESLICE_US)
|
|
return -EINVAL;
|
|
|
|
ch->timeslice_us = timeslice;
|
|
|
|
return channel_gk20a_set_schedule_params(ch);
|
|
}
|
|
|
|
static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
|
|
struct nvgpu_zcull_bind_args *args)
|
|
{
|
|
struct gk20a *g = ch->g;
|
|
struct gr_gk20a *gr = &g->gr;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
|
|
args->gpu_va, args->mode);
|
|
}
|
|
|
|
/* in this context the "channel" is the host1x channel which
|
|
* maps to *all* gk20a channels */
|
|
int gk20a_channel_suspend(struct gk20a *g)
|
|
{
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
u32 chid;
|
|
bool channels_in_use = false;
|
|
int err;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
/* wait for engine idle */
|
|
err = g->ops.fifo.wait_engine_idle(g);
|
|
if (err)
|
|
return err;
|
|
|
|
for (chid = 0; chid < f->num_channels; chid++) {
|
|
struct channel_gk20a *ch = &f->channel[chid];
|
|
if (gk20a_channel_get(ch)) {
|
|
gk20a_dbg_info("suspend channel %d", chid);
|
|
/* disable channel */
|
|
gk20a_disable_channel_tsg(g, ch);
|
|
/* preempt the channel */
|
|
gk20a_fifo_preempt(g, ch);
|
|
gk20a_channel_cancel_job_clean_up(ch, true);
|
|
/* wait for channel update notifiers */
|
|
if (ch->update_fn)
|
|
cancel_work_sync(&ch->update_fn_work);
|
|
|
|
channels_in_use = true;
|
|
|
|
gk20a_channel_put(ch);
|
|
}
|
|
}
|
|
|
|
if (channels_in_use) {
|
|
g->ops.fifo.update_runlist(g, 0, ~0, false, true);
|
|
|
|
for (chid = 0; chid < f->num_channels; chid++) {
|
|
if (gk20a_channel_get(&f->channel[chid])) {
|
|
g->ops.fifo.unbind_channel(&f->channel[chid]);
|
|
gk20a_channel_put(&f->channel[chid]);
|
|
}
|
|
}
|
|
}
|
|
|
|
gk20a_dbg_fn("done");
|
|
return 0;
|
|
}
|
|
|
|
int gk20a_channel_resume(struct gk20a *g)
|
|
{
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
u32 chid;
|
|
bool channels_in_use = false;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
for (chid = 0; chid < f->num_channels; chid++) {
|
|
if (gk20a_channel_get(&f->channel[chid])) {
|
|
gk20a_dbg_info("resume channel %d", chid);
|
|
g->ops.fifo.bind_channel(&f->channel[chid]);
|
|
channels_in_use = true;
|
|
gk20a_channel_put(&f->channel[chid]);
|
|
}
|
|
}
|
|
|
|
if (channels_in_use)
|
|
g->ops.fifo.update_runlist(g, 0, ~0, true, true);
|
|
|
|
gk20a_dbg_fn("done");
|
|
return 0;
|
|
}
|
|
|
|
void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events)
|
|
{
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
u32 chid;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
/*
|
|
* Ensure that all pending writes are actually done before trying to
|
|
* read semaphore values from DRAM.
|
|
*/
|
|
g->ops.mm.fb_flush(g);
|
|
|
|
for (chid = 0; chid < f->num_channels; chid++) {
|
|
struct channel_gk20a *c = g->fifo.channel+chid;
|
|
if (gk20a_channel_get(c)) {
|
|
wake_up_interruptible_all(&c->semaphore_wq);
|
|
if (post_events) {
|
|
if (gk20a_is_channel_marked_as_tsg(c)) {
|
|
struct tsg_gk20a *tsg =
|
|
&g->fifo.tsg[c->tsgid];
|
|
|
|
gk20a_tsg_event_id_post_event(tsg,
|
|
NVGPU_IOCTL_CHANNEL_EVENT_ID_BLOCKING_SYNC);
|
|
} else {
|
|
gk20a_channel_event_id_post_event(c,
|
|
NVGPU_IOCTL_CHANNEL_EVENT_ID_BLOCKING_SYNC);
|
|
}
|
|
}
|
|
gk20a_channel_update(c, 0);
|
|
gk20a_channel_put(c);
|
|
}
|
|
}
|
|
}
|
|
|
|
static int gk20a_ioctl_channel_submit_gpfifo(
|
|
struct channel_gk20a *ch,
|
|
struct nvgpu_submit_gpfifo_args *args)
|
|
{
|
|
struct gk20a_fence *fence_out;
|
|
int ret = 0;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
if (ch->has_timedout)
|
|
return -ETIMEDOUT;
|
|
|
|
ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
|
|
args->flags, &args->fence,
|
|
&fence_out, false);
|
|
|
|
if (ret)
|
|
goto clean_up;
|
|
|
|
/* Convert fence_out to something we can pass back to user space. */
|
|
if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
|
|
if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
|
|
int fd = gk20a_fence_install_fd(fence_out);
|
|
if (fd < 0)
|
|
ret = fd;
|
|
else
|
|
args->fence.id = fd;
|
|
} else {
|
|
args->fence.id = fence_out->syncpt_id;
|
|
args->fence.value = fence_out->syncpt_value;
|
|
}
|
|
}
|
|
gk20a_fence_put(fence_out);
|
|
|
|
clean_up:
|
|
return ret;
|
|
}
|
|
|
|
void gk20a_init_channel(struct gpu_ops *gops)
|
|
{
|
|
gops->fifo.bind_channel = channel_gk20a_bind;
|
|
gops->fifo.unbind_channel = channel_gk20a_unbind;
|
|
gops->fifo.disable_channel = channel_gk20a_disable;
|
|
gops->fifo.enable_channel = channel_gk20a_enable;
|
|
gops->fifo.alloc_inst = channel_gk20a_alloc_inst;
|
|
gops->fifo.free_inst = channel_gk20a_free_inst;
|
|
gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
|
|
gops->fifo.channel_set_priority = gk20a_channel_set_priority;
|
|
gops->fifo.channel_set_timeslice = gk20a_channel_set_timeslice;
|
|
}
|
|
|
|
long gk20a_channel_ioctl(struct file *filp,
|
|
unsigned int cmd, unsigned long arg)
|
|
{
|
|
struct channel_gk20a *ch = filp->private_data;
|
|
struct device *dev = ch->g->dev;
|
|
u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE];
|
|
int err = 0;
|
|
|
|
gk20a_dbg_fn("start %d", _IOC_NR(cmd));
|
|
|
|
if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) ||
|
|
(_IOC_NR(cmd) == 0) ||
|
|
(_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) ||
|
|
(_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE))
|
|
return -EINVAL;
|
|
|
|
if (_IOC_DIR(cmd) & _IOC_WRITE) {
|
|
if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
|
|
return -EFAULT;
|
|
}
|
|
|
|
/* take a ref or return timeout if channel refs can't be taken */
|
|
ch = gk20a_channel_get(ch);
|
|
if (!ch)
|
|
return -ETIMEDOUT;
|
|
|
|
/* protect our sanity for threaded userspace - most of the channel is
|
|
* not thread safe */
|
|
mutex_lock(&ch->ioctl_lock);
|
|
|
|
/* this ioctl call keeps a ref to the file which keeps a ref to the
|
|
* channel */
|
|
|
|
switch (cmd) {
|
|
case NVGPU_IOCTL_CHANNEL_OPEN:
|
|
err = gk20a_channel_open_ioctl(ch->g,
|
|
(struct nvgpu_channel_open_args *)buf);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD:
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
|
|
err = gk20a_busy(dev);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = ch->g->ops.gr.alloc_obj_ctx(ch,
|
|
(struct nvgpu_alloc_obj_ctx_args *)buf);
|
|
gk20a_idle(dev);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_FREE_OBJ_CTX:
|
|
err = gk20a_busy(dev);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = ch->g->ops.gr.free_obj_ctx(ch,
|
|
(struct nvgpu_free_obj_ctx_args *)buf);
|
|
gk20a_idle(dev);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO:
|
|
err = gk20a_busy(dev);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = gk20a_alloc_channel_gpfifo(ch,
|
|
(struct nvgpu_alloc_gpfifo_args *)buf);
|
|
gk20a_idle(dev);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO:
|
|
err = gk20a_ioctl_channel_submit_gpfifo(ch,
|
|
(struct nvgpu_submit_gpfifo_args *)buf);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_WAIT:
|
|
err = gk20a_busy(dev);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
|
|
/* waiting is thread-safe, not dropping this mutex could
|
|
* deadlock in certain conditions */
|
|
mutex_unlock(&ch->ioctl_lock);
|
|
|
|
err = gk20a_channel_wait(ch,
|
|
(struct nvgpu_wait_args *)buf);
|
|
|
|
mutex_lock(&ch->ioctl_lock);
|
|
|
|
gk20a_idle(dev);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_ZCULL_BIND:
|
|
err = gk20a_busy(dev);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = gk20a_channel_zcull_bind(ch,
|
|
(struct nvgpu_zcull_bind_args *)buf);
|
|
gk20a_idle(dev);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
|
|
err = gk20a_busy(dev);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = gk20a_init_error_notifier(ch,
|
|
(struct nvgpu_set_error_notifier *)buf);
|
|
gk20a_idle(dev);
|
|
break;
|
|
#ifdef CONFIG_GK20A_CYCLE_STATS
|
|
case NVGPU_IOCTL_CHANNEL_CYCLE_STATS:
|
|
err = gk20a_busy(dev);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = gk20a_channel_cycle_stats(ch,
|
|
(struct nvgpu_cycle_stats_args *)buf);
|
|
gk20a_idle(dev);
|
|
break;
|
|
#endif
|
|
case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT:
|
|
{
|
|
u32 timeout =
|
|
(u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
|
|
gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
|
|
timeout, ch->hw_chid);
|
|
ch->timeout_ms_max = timeout;
|
|
gk20a_channel_trace_sched_param(
|
|
trace_gk20a_channel_set_timeout, ch);
|
|
break;
|
|
}
|
|
case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX:
|
|
{
|
|
u32 timeout =
|
|
(u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
|
|
bool timeout_debug_dump = !((u32)
|
|
((struct nvgpu_set_timeout_ex_args *)buf)->flags &
|
|
(1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP));
|
|
gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
|
|
timeout, ch->hw_chid);
|
|
ch->timeout_ms_max = timeout;
|
|
ch->timeout_debug_dump = timeout_debug_dump;
|
|
gk20a_channel_trace_sched_param(
|
|
trace_gk20a_channel_set_timeout, ch);
|
|
break;
|
|
}
|
|
case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT:
|
|
((struct nvgpu_get_param_args *)buf)->value =
|
|
ch->has_timedout;
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_SET_PRIORITY:
|
|
err = gk20a_busy(dev);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = ch->g->ops.fifo.channel_set_priority(ch,
|
|
((struct nvgpu_set_priority_args *)buf)->priority);
|
|
|
|
gk20a_idle(dev);
|
|
gk20a_channel_trace_sched_param(
|
|
trace_gk20a_channel_set_priority, ch);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_ENABLE:
|
|
err = gk20a_busy(dev);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
if (ch->g->ops.fifo.enable_channel)
|
|
ch->g->ops.fifo.enable_channel(ch);
|
|
else
|
|
err = -ENOSYS;
|
|
gk20a_idle(dev);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_DISABLE:
|
|
err = gk20a_busy(dev);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
if (ch->g->ops.fifo.disable_channel)
|
|
ch->g->ops.fifo.disable_channel(ch);
|
|
else
|
|
err = -ENOSYS;
|
|
gk20a_idle(dev);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_PREEMPT:
|
|
err = gk20a_busy(dev);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = gk20a_fifo_preempt(ch->g, ch);
|
|
gk20a_idle(dev);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_FORCE_RESET:
|
|
err = gk20a_busy(dev);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = ch->g->ops.fifo.force_reset_ch(ch, true);
|
|
gk20a_idle(dev);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_EVENT_ID_CTRL:
|
|
err = gk20a_channel_event_id_ctrl(ch,
|
|
(struct nvgpu_event_id_ctrl_args *)buf);
|
|
break;
|
|
#ifdef CONFIG_GK20A_CYCLE_STATS
|
|
case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT:
|
|
err = gk20a_busy(dev);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = gk20a_channel_cycle_stats_snapshot(ch,
|
|
(struct nvgpu_cycle_stats_snapshot_args *)buf);
|
|
gk20a_idle(dev);
|
|
break;
|
|
#endif
|
|
case NVGPU_IOCTL_CHANNEL_WDT:
|
|
err = gk20a_channel_set_wdt_status(ch,
|
|
(struct nvgpu_channel_wdt_args *)buf);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_SET_RUNLIST_INTERLEAVE:
|
|
err = gk20a_busy(dev);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = gk20a_channel_set_runlist_interleave(ch,
|
|
((struct nvgpu_runlist_interleave_args *)buf)->level);
|
|
|
|
gk20a_idle(dev);
|
|
gk20a_channel_trace_sched_param(
|
|
trace_gk20a_channel_set_runlist_interleave, ch);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_SET_TIMESLICE:
|
|
err = gk20a_busy(dev);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = ch->g->ops.fifo.channel_set_timeslice(ch,
|
|
((struct nvgpu_timeslice_args *)buf)->timeslice_us);
|
|
|
|
gk20a_idle(dev);
|
|
gk20a_channel_trace_sched_param(
|
|
trace_gk20a_channel_set_timeslice, ch);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_SET_PREEMPTION_MODE:
|
|
if (ch->g->ops.gr.set_preemption_mode) {
|
|
err = gk20a_busy(dev);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = ch->g->ops.gr.set_preemption_mode(ch,
|
|
((struct nvgpu_preemption_mode_args *)buf)->graphics_preempt_mode,
|
|
((struct nvgpu_preemption_mode_args *)buf)->compute_preempt_mode);
|
|
gk20a_idle(dev);
|
|
} else {
|
|
err = -EINVAL;
|
|
}
|
|
break;
|
|
default:
|
|
dev_dbg(dev, "unrecognized ioctl cmd: 0x%x", cmd);
|
|
err = -ENOTTY;
|
|
break;
|
|
}
|
|
|
|
if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
|
|
err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
|
|
|
|
mutex_unlock(&ch->ioctl_lock);
|
|
|
|
gk20a_channel_put(ch);
|
|
|
|
gk20a_dbg_fn("end");
|
|
|
|
return err;
|
|
}
|